File size: 9,073 Bytes
6d14138
 
 
 
 
 
 
226d230
6d14138
e4e94ef
 
6d14138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e94ef
205afd4
6d14138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8914c30
074e309
6d14138
24757a7
205afd4
6d14138
 
205afd4
6d14138
 
 
fd29cb3
e577340
6d14138
fd29cb3
f5c8650
6d14138
f5c8650
e577340
6d14138
205afd4
6d14138
 
205afd4
6d14138
 
205afd4
 
 
6d14138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c1c9f5
6d14138
0c1c9f5
6d14138
205afd4
6d14138
205afd4
53cbac4
6d14138
205afd4
 
6d14138
 
 
205afd4
6d14138
 
 
205afd4
6d14138
53cbac4
 
 
 
 
6d14138
205afd4
53cbac4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import os
import pymongo
import json
from PIL import Image
import io
import re
from bson import ObjectId
from openai import OpenAI

# Set OpenAI Client
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# MongoDB connection
client = pymongo.MongoClient("mongodb+srv://consumewise_db:p123%[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
db = client.consumeWise
collection = db.products

# Define the prompt that will be passed to the OpenAI API
label_reader_prompt = """
You will be provided with a set of images corresponding to a single product. These images are found printed on the packaging of the product.
Your goal will be to extract information from these images to populate the schema provided. Here is some information you will routinely encounter. Ensure that you capture complete information, especially for nutritional information and ingredients:
- Ingredients: List of ingredients in the item. They may have some percent listed in brackets. They may also have metadata or classification like Preservative (INS 211) where INS 211 forms the metadata. Structure accordingly. If ingredients have subingredients like sugar: added sugar, trans sugar, treat them as different ingredients.
- Claims: Like a mango fruit juice says contains fruit.
- Nutritional Information: This will have nutrients, serving size, and nutrients listed per serving. Extract the base value for reference.
- FSSAI License number: Extract the license number. There might be many, so store relevant ones.
- Name: Extract the name of the product.
- Brand/Manufactured By: Extract the parent company of this product.
- Serving size: This might be explicitly stated or inferred from the nutrients per serving.
"""

# Function to extract information from image URLs
def extract_information(image_links):
    print("in extract_information")
    image_message = [{"type": "image_url", "image_url": {"url": il}} for il in image_links]
    
    # Send the request to OpenAI API with the images and prompt
    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": label_reader_prompt},
                    *image_message,
                ],
            },
        ],
        response_format={"type": "json_schema", "json_schema": {
            "name": "label_reader",
            "schema": {
                "type": "object",
                "properties": {
                    "productName": {"type": "string"},
                    "brandName": {"type": "string"},
                    "ingredients": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {"type": "string"},
                                "percent": {"type": "string"},
                                "metadata": {"type": "string"},
                            },
                            "required": ["name", "percent", "metadata"],
                            "additionalProperties": False
                        }
                    },
                    "servingSize": {
                        "type": "object",
                        "properties": {
                            "quantity": {"type": "number"},
                            "unit": {"type": "string"},
                        },
                        "required": ["quantity", "unit"],
                        "additionalProperties": False
                    },
                    "packagingSize": {
                        "type": "object",
                        "properties": {
                            "quantity": {"type": "number"},
                            "unit": {"type": "string"},
                        },
                        "required": ["quantity", "unit"],
                        "additionalProperties": False
                    },
                    "servingsPerPack": {"type": "number"},
                    "nutritionalInformation": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {"type": "string"},
                                "unit": {"type": "string"},
                                "values": {
                                    "type": "array",
                                    "items": {
                                        "type": "object",
                                        "properties": {
                                            "base": {"type": "string"},
                                            "value": {"type": "number"},
                                        },
                                        "required": ["base", "value"],
                                        "additionalProperties": False
                                    }
                                },
                            },
                            "required": ["name", "unit", "values"],
                            "additionalProperties": False
                        },
                        "additionalProperties": True,
                    },
                    "fssaiLicenseNumbers": {"type": "array", "items": {"type": "number"}},
                    "claims": {"type": "array", "items": {"type": "string"}},
                    "shelfLife": {"type": "string"},
                },
                "required": [
                    "productName", "brandName", "ingredients", "servingSize",
                    "packagingSize", "servingsPerPack", "nutritionalInformation",
                    "fssaiLicenseNumbers", "claims", "shelfLife"
                ],
                "additionalProperties": False
            },
            "strict": True
        }}
    )
    
    # Extract and return the relevant response
    
    return response.choices[0].message.content

#Extract text from image
def extract_data(image_links):
    try:
        if not image_links:
            return {"error": "No image URLs provided"}
        
        # Call the extraction function
        extracted_data = extract_information(image_links)
        print(f"extracted data : {extracted_data} ")
        print(f"extracted data : {type(extracted_data)} ")
        
        if extracted_data:
            extracted_data_json = json.loads(extracted_data)
            # Store in MongoDB
            collection.insert_one(extracted_data_json)
            return extracted_data
        else:
            return {"error": "Failed to extract information"}
        
    except Exception as error:
        return {"error": str(error)}
    
    
def find_product(product_name):
    try: 
        if product_name:            
            # Split the input product name into words
            words = product_name.split()
            result = [' '.join(words[:i]) for i in range(2, len(words) + 1)]
            list_names = result + words

            # # Create a regex pattern that matches all the words (case-insensitive)
            # regex_pattern = ".*".join(words)  # This ensures all words appear in sequence
            # query = {"productName": {"$regex": re.compile(regex_pattern, re.IGNORECASE)}}
            product_list = []
            for i in list_names:
            # Find all products matching the regex pattern
                query = {"productName": {"$regex": re.compile(i, re.IGNORECASE)}}
                products = collection.find(query)
                for product in products:
                    if product['productName'] not in product_list:
                        product_list.append(product['productName'])
            
            # # Create a list of product names that match the query
            # product_list = [product['productName'] for product in products]
    
            if product_list:
                return {"products": product_list, "message": "Products found"}
            else:
                return {"products": [], "message": "No products found"}
        else:
            return {"error": "Please provide a valid product name or id"}
    except Exception as error:
        return {"error": str(error)}

    
def get_product(product_name):
    try:        
        if product_name:
            product = collection.find_one({"productName": product_name})
        else:
            return {"error": "Please provide a valid product name or id"}

        if not product:
            print("Product not found.")
            return {"error": "Product not found"}
        if product:
            product['_id'] = str(product['_id'])  # Convert ObjectId to string
            product_str = json.dumps(product, indent=4)  # Convert product to JSON string
            print(f"Found product: {product_str}")
            return product_str  # Return the product as a JSON string
        
    except Exception as error:
        return {"error": str(error)}