import base64 import requests from io import BytesIO from PIL import Image import gradio as gr from pymongo import MongoClient import time import uuid # MongoDB setup mongo_client = MongoClient("mongodb+srv://atharva2021:123@cluster0.so5reec.mongodb.net/") db = mongo_client['bajaj'] collection = db['client'] # Function to encode the image to base64 def encode_image(img): buffered = BytesIO() img.save(buffered, format="PNG") encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") return encoded_string # Chat function with Pixtral and MongoDB saving def chat_with_pixtral(uploaded_file, mrn_number, user_question): if uploaded_file is not None and mrn_number.strip() != "": base64_img = encode_image(uploaded_file) api = "https://api.hyperbolic.xyz/v1/chat/completions" api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJyZzMyNzAyNEBnbWFpbC5jb20ifQ._frFve-BYZdb0Qo6FIj6xcDcxpY-6QlC2O-ToQxBjkc" # Add your API key here headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", } payload = { "messages": [ { "role": "user", "content": [ {"type": "text", "text": user_question}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_img}"}, }, ], } ], "model": "mistralai/Pixtral-12B-2409", "max_tokens": 2048, "temperature": 0.7, "top_p": 0.9, } response = requests.post(api, headers=headers, json=payload) # Process API response if response.status_code == 200: response_data = response.json() if 'choices' in response_data: assistant_response = response_data['choices'][0]['message']['content'] else: assistant_response = "Response format is incorrect" else: assistant_response = f"API request failed: {response.status_code} - {response.text}" # Generate a unique ID for the request unique_id = str(uuid.uuid4()) # Save the result to MongoDB with the specified format document = { 'mrn_number': mrn_number, 'ocr_result': assistant_response, # This will be the OCR/API result 'unique_id': unique_id, 'got_mode': "plain texts OCR", 'timestamp': time.time() } collection.insert_one(document) # Insert the document into MongoDB return assistant_response return "Upload your image, enter MRN number, and enter your question." # Gradio GUI iface = gr.Interface( fn=chat_with_pixtral, inputs=[ gr.Image(type="pil", label="Upload Your Image"), gr.Textbox(label="Enter MRN Number"), gr.Textbox(label="Please enter your question") ], outputs="text", title="Pixtral Image Chat", description="Upload your Image, enter MRN number, and get insights out of the Image" ) iface.launch(share=True)