Spaces:

Hushh
/

hushh-valet-chat

Sleeping

Omkar008 commited on Jul 4

Commit

5df9c1c

•

1 Parent(s): 21c8bfc

Update services/utils.py

Files changed (1) hide show

services/utils.py CHANGED Viewed

@@ -49,11 +49,11 @@ def openai_response(model:OpenAI,input:str):
 def strcuture_document_data(raw_text:str)->dict:
     try:
         model_name = "gpt-3.5-turbo-instruct"
         temperature = 0.0
-        model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=800)
         # doc_query = (
         #     "Extract and return strictly a JSON object containing only the  following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
@@ -96,7 +96,7 @@ def strcuture_document_data(raw_text:str)->dict:
 def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
     # Initialize the tokenizer for the specific model
-    tokenizer = tiktoken.get_encoding(model)
     # Tokenize the text
     tokens = tokenizer.encode(text)
@@ -106,6 +106,9 @@ def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
         # Truncate the text to the maximum token limit
         truncated_tokens = tokens[:max_tokens]
         truncated_text = tokenizer.decode(truncated_tokens)
         return truncated_text
     else:
         return text

 def strcuture_document_data(raw_text:str)->dict:
+    raw_text = ensure_token_limit(raw_text)
     try:
         model_name = "gpt-3.5-turbo-instruct"
         temperature = 0.0
+        model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=256)
         # doc_query = (
         #     "Extract and return strictly a JSON object containing only the  following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
 def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
     # Initialize the tokenizer for the specific model
+    tokenizer = tiktoken.encoding_for_model(model)
     # Tokenize the text
     tokens = tokenizer.encode(text)
         # Truncate the text to the maximum token limit
         truncated_tokens = tokens[:max_tokens]
         truncated_text = tokenizer.decode(truncated_tokens)
+        with open("token.txt","a") as file :
+            file.write(truncated_text)
+        print(truncated_text)
         return truncated_text
     else:
         return text