Spaces:
Sleeping
Sleeping
Update services/utils.py
Browse files- services/utils.py +6 -3
services/utils.py
CHANGED
@@ -49,11 +49,11 @@ def openai_response(model:OpenAI,input:str):
|
|
49 |
|
50 |
|
51 |
def strcuture_document_data(raw_text:str)->dict:
|
52 |
-
|
53 |
try:
|
54 |
model_name = "gpt-3.5-turbo-instruct"
|
55 |
temperature = 0.0
|
56 |
-
model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=
|
57 |
|
58 |
# doc_query = (
|
59 |
# "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
|
@@ -96,7 +96,7 @@ def strcuture_document_data(raw_text:str)->dict:
|
|
96 |
|
97 |
def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
|
98 |
# Initialize the tokenizer for the specific model
|
99 |
-
tokenizer = tiktoken.
|
100 |
|
101 |
# Tokenize the text
|
102 |
tokens = tokenizer.encode(text)
|
@@ -106,6 +106,9 @@ def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
|
|
106 |
# Truncate the text to the maximum token limit
|
107 |
truncated_tokens = tokens[:max_tokens]
|
108 |
truncated_text = tokenizer.decode(truncated_tokens)
|
|
|
|
|
|
|
109 |
return truncated_text
|
110 |
else:
|
111 |
return text
|
|
|
49 |
|
50 |
|
51 |
def strcuture_document_data(raw_text:str)->dict:
|
52 |
+
raw_text = ensure_token_limit(raw_text)
|
53 |
try:
|
54 |
model_name = "gpt-3.5-turbo-instruct"
|
55 |
temperature = 0.0
|
56 |
+
model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=256)
|
57 |
|
58 |
# doc_query = (
|
59 |
# "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
|
|
|
96 |
|
97 |
def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
|
98 |
# Initialize the tokenizer for the specific model
|
99 |
+
tokenizer = tiktoken.encoding_for_model(model)
|
100 |
|
101 |
# Tokenize the text
|
102 |
tokens = tokenizer.encode(text)
|
|
|
106 |
# Truncate the text to the maximum token limit
|
107 |
truncated_tokens = tokens[:max_tokens]
|
108 |
truncated_text = tokenizer.decode(truncated_tokens)
|
109 |
+
with open("token.txt","a") as file :
|
110 |
+
file.write(truncated_text)
|
111 |
+
print(truncated_text)
|
112 |
return truncated_text
|
113 |
else:
|
114 |
return text
|