Omkar008 commited on
Commit
52267ff
1 Parent(s): 78b319c

Update services/utils.py

Browse files
Files changed (1) hide show
  1. services/utils.py +2 -11
services/utils.py CHANGED
@@ -41,15 +41,6 @@ api_key=os.getenv('OPENAI_API_KEY')
41
  # """)
42
  # Date: Optional[str] = Field(default=None, description="Specify the date of purchase in the format dd-MM-yyyy. If the date of purchase is not explicitly provided on the receipt or document, or if it cannot be accurately determined, assign the value 'null'. Ensure the date is formatted correctly as day, month, and year in two digits each.")
43
 
44
- # class Candidate(BaseModel):
45
- # brand:Optional[str]= Field(default=None , description="INSERT BRAND NAME FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null")
46
- # total_cost :Optional[str]=Field(default=None , description="INSERT TOTAL COST FROM THE RECEIPT OCR TEXT(most of the times total cost is the maximum value in the OCR text). IF NOT PRESENT RETURN null")
47
- # location:Optional[str]=Field(default=None , description="INSERT LOCATION FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null")
48
- # purchase_category:Optional[str]=Field(default=None , description="INSERT PURCHASE CATEGORY FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null")
49
- # brand_category:Optional[str]=Field(default=None , description="""INSERT BRAND CATEGORY FROM THE RECEIPT OCR TEXT. CHOOSE CLOSEST BRAND CATEGORY BASED ON THE OCR FROM THIS ARRAY ["Fashion and Apparel","Jewelry and Watches","Beauty and Personal Care","Automobiles","Real Estate","Travel and Leisure","Culinary Services","Home and Lifestyle","Technology and Electronics","Sports and Leisure","Art and Collectibles","Health and Wellness","Stationery and Writing Instruments","Children and Baby","Pet Accessories","Financial Services","Airline Services","Accommodation Services","Beverages Services","Services"] ELSE IF NOT PRESENT RETURN null""")
50
- # Date:Optional[str]=Field(default=None , description="INSERT RECEIPT DATE FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null. FORMAT: dd-mm-yyyy")
51
- # # Currency:Optional[str]=Field(default=None , description= "INSERT CURRENCY FROM THE RECEIPT OCR TEXT THAT YOU FIND.IF NOT RETURN null.")
52
-
53
 
54
  class Candidate(BaseModel):
55
  brand: Optional[str] = Field(default=None, description="INSERT BRAND NAME FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null")
@@ -73,11 +64,11 @@ def strcuture_document_data(raw_text:str)->dict:
73
 
74
  # doc_query = (
75
  # "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
76
- # "\nReceipt Data:\n" + raw_text + "\nRemember the response should only be in JSON format very Strictly and it should have these keys brand , total_cost(Try to look for the highest value in the receipt nearby to words total cost or semantically similar words) , location , no_of_items , purchase_category,brand_category , Date , very Strictly.\n"
77
  # )
78
 
79
  doc_query= (
80
- "Extract and return strictly a JSON object containing only the following keys: brand, total_cost, location, purchase_category, brand_category, Date , currency ,filename,payment_method . Ensure that if a value is not present in the OCR text, it is returned as null."
81
  )
82
 
83
  parser = PydanticOutputParser(pydantic_object=Candidate)
 
41
  # """)
42
  # Date: Optional[str] = Field(default=None, description="Specify the date of purchase in the format dd-MM-yyyy. If the date of purchase is not explicitly provided on the receipt or document, or if it cannot be accurately determined, assign the value 'null'. Ensure the date is formatted correctly as day, month, and year in two digits each.")
43
 
 
 
 
 
 
 
 
 
 
44
 
45
  class Candidate(BaseModel):
46
  brand: Optional[str] = Field(default=None, description="INSERT BRAND NAME FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null")
 
64
 
65
  # doc_query = (
66
  # "Extract and return strictly a JSON object containing only the following keys strictly : brand , total_cost , location , no_of_items , purchase_category,brand_category , Date ."
67
+ # "\nReceipt Data:\n" + raw_text + "\nRemember the response should only be in JSON format very Strictly and it should have these keys brand , total_cost(LOOK FOR THE HIGHEST VALUE IN RECEIPT OCR TEXT) , location , no_of_items , purchase_category,brand_category , Date , very Strictly.\n"
68
  # )
69
 
70
  doc_query= (
71
+ "Extract and return strictly a JSON object containing only the following keys: brand, total_cost, location, purchase_category, brand_category, Date , currency ,filename,payment_method .FOR total_cost LOOK FOR THE HIGHEST VALUE IN RECEIPT OCR TEXT. Ensure that if a value is not present in the OCR text, it is returned as null."
72
  )
73
 
74
  parser = PydanticOutputParser(pydantic_object=Candidate)