Omkar008 commited on
Commit
5df9c1c
1 Parent(s): 21c8bfc

Update services/utils.py

Browse files
Files changed (1) hide show
  1. services/utils.py +6 -3
services/utils.py CHANGED
@@ -49,11 +49,11 @@ def openai_response(model:OpenAI,input:str):
49
 
50
 
51
  def strcuture_document_data(raw_text:str)->dict:
52
-
53
  try:
54
  model_name = "gpt-3.5-turbo-instruct"
55
  temperature = 0.0
56
- model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=800)
57
 
58
  # doc_query = (
59
  # "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
@@ -96,7 +96,7 @@ def strcuture_document_data(raw_text:str)->dict:
96
 
97
  def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
98
  # Initialize the tokenizer for the specific model
99
- tokenizer = tiktoken.get_encoding(model)
100
 
101
  # Tokenize the text
102
  tokens = tokenizer.encode(text)
@@ -106,6 +106,9 @@ def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
106
  # Truncate the text to the maximum token limit
107
  truncated_tokens = tokens[:max_tokens]
108
  truncated_text = tokenizer.decode(truncated_tokens)
 
 
 
109
  return truncated_text
110
  else:
111
  return text
 
49
 
50
 
51
  def strcuture_document_data(raw_text:str)->dict:
52
+ raw_text = ensure_token_limit(raw_text)
53
  try:
54
  model_name = "gpt-3.5-turbo-instruct"
55
  temperature = 0.0
56
+ model = OpenAI(model_name=model_name, temperature=temperature, max_tokens=256)
57
 
58
  # doc_query = (
59
  # "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
 
96
 
97
  def ensure_token_limit(text, model='gpt-3.5-turbo-instruct', max_tokens=4096):
98
  # Initialize the tokenizer for the specific model
99
+ tokenizer = tiktoken.encoding_for_model(model)
100
 
101
  # Tokenize the text
102
  tokens = tokenizer.encode(text)
 
106
  # Truncate the text to the maximum token limit
107
  truncated_tokens = tokens[:max_tokens]
108
  truncated_text = tokenizer.decode(truncated_tokens)
109
+ with open("token.txt","a") as file :
110
+ file.write(truncated_text)
111
+ print(truncated_text)
112
  return truncated_text
113
  else:
114
  return text