Spaces:
Running
Running
import gradio as gr | |
import requests | |
import os | |
from PIL import Image | |
import json | |
from datetime import datetime | |
# Example data with placeholder JSON for lab_results and bank_statement | |
examples = [ | |
["bonds_table.png", "Bonds table", "[{\"instrument_name\":\"example\", \"valuation\":0}]"], | |
["lab_results.png", "Lab results", "{\"patient_name\": \"example\", \"patient_age\": \"example\", \"patient_pid\": 0, \"lab_results\": [{\"investigation\": \"example\", \"result\": 0.00, \"reference_value\": \"example\", \"unit\": \"example\"}]}"], | |
["bank_statement.png", "Bank statement", "*"] | |
] | |
# JSON data for Bonds table | |
bonds_json = [ | |
{ | |
"instrument_name": "UNITS BLACKROCK FIX INC DUB FDS PLC ISHS EUR INV GRD CP BD IDX/INST/E", | |
"valuation": 19049 | |
}, | |
{ | |
"instrument_name": "UNITS ISHARES III PLC CORE EUR GOVT BOND UCITS ETF/EUR", | |
"valuation": 83488 | |
}, | |
{ | |
"instrument_name": "UNITS ISHARES III PLC EUR CORP BOND 1-5YR UCITS ETF/EUR", | |
"valuation": 213030 | |
}, | |
{ | |
"instrument_name": "UNIT ISHARES VI PLC/JP MORGAN USD E BOND EUR HED UCITS ETF DIST/HDGD/", | |
"valuation": 32774 | |
}, | |
{ | |
"instrument_name": "UNITS XTRACKERS II SICAV/EUR HY CORP BOND UCITS ETF/-1D-/DISTR.", | |
"valuation": 23643 | |
} | |
] | |
lab_results_json = { | |
"patient_name": "Yash M. Patel", | |
"patient_age": "21 Years", | |
"patient_pid": 555, | |
"lab_results": [ | |
{ | |
"investigation": "Hemoglobin (Hb)", | |
"result": 12.5, | |
"reference_value": "13.0 - 17.0", | |
"unit": "g/dL" | |
}, | |
{ | |
"investigation": "RBC COUNT", | |
"result": 5.2, | |
"reference_value": "4.5 - 5.5", | |
"unit": "mill/cumm" | |
}, | |
{ | |
"investigation": "Packed Cell Volume (PCV)", | |
"result": 57.5, | |
"reference_value": "40 - 50", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Mean Corpuscular Volume (MCV)", | |
"result": 87.75, | |
"reference_value": "83 - 101", | |
"unit": "fL" | |
}, | |
{ | |
"investigation": "MCH", | |
"result": 27.2, | |
"reference_value": "27 - 32", | |
"unit": "pg" | |
}, | |
{ | |
"investigation": "MCHC", | |
"result": 32.8, | |
"reference_value": "32.5 - 34.5", | |
"unit": "g/dL" | |
}, | |
{ | |
"investigation": "RDW", | |
"result": 13.6, | |
"reference_value": "11.6 - 14.0", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "WBC COUNT", | |
"result": 9000, | |
"reference_value": "4000-11000", | |
"unit": "cumm" | |
}, | |
{ | |
"investigation": "Neutrophils", | |
"result": 60, | |
"reference_value": "50 - 62", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Lymphocytes", | |
"result": 31, | |
"reference_value": "20 - 40", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Eosinophils", | |
"result": 1, | |
"reference_value": "00 - 06", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Monocytes", | |
"result": 7, | |
"reference_value": "00 - 10", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Basophils", | |
"result": 1, | |
"reference_value": "00 - 02", | |
"unit": "%" | |
}, | |
{ | |
"investigation": "Absolute Neutrophils", | |
"result": 6000, | |
"reference_value": "1500 - 7500", | |
"unit": "cells/mcL" | |
}, | |
{ | |
"investigation": "Absolute Lymphocytes", | |
"result": 3100, | |
"reference_value": "1300 - 3500", | |
"unit": "cells/mcL" | |
}, | |
{ | |
"investigation": "Absolute Eosinophils", | |
"result": 100, | |
"reference_value": "00 - 500", | |
"unit": "cells/mcL" | |
}, | |
{ | |
"investigation": "Absolute Monocytes", | |
"result": 700, | |
"reference_value": "200 - 950", | |
"unit": "cells/mcL" | |
}, | |
{ | |
"investigation": "Absolute Basophils", | |
"result": 100, | |
"reference_value": "00 - 300", | |
"unit": "cells/mcL" | |
}, | |
{ | |
"investigation": "Platelet Count", | |
"result": 320000, | |
"reference_value": "150000 - 410000", | |
"unit": "cumm" | |
} | |
] | |
} | |
bank_statement_json = { | |
"bank": "First Platypus Bank", | |
"address": "1234 Kings St., New York, NY 12123", | |
"account_holder": "Mary G. Orta", | |
"account_number": "1234567890123", | |
"statement_date": "3/1/2022", | |
"period_covered": "2/1/2022 - 3/1/2022", | |
"account_summary": { | |
"balance_on_march_1": "$25,032.23", | |
"total_money_in": "$10,234.23", | |
"total_money_out": "$10,532.51" | |
}, | |
"transactions": [ | |
{ | |
"date": "02/01", | |
"description": "PGD EasyPay Debit", | |
"withdrawal": "203.24", | |
"deposit": "", | |
"balance": "22,098.23" | |
}, | |
{ | |
"date": "02/02", | |
"description": "AB&B Online Payment*****", | |
"withdrawal": "71.23", | |
"deposit": "", | |
"balance": "22,027.00" | |
}, | |
{ | |
"date": "02/04", | |
"description": "Check No. 2345", | |
"withdrawal": "", | |
"deposit": "450.00", | |
"balance": "22,477.00" | |
}, | |
{ | |
"date": "02/05", | |
"description": "Payroll Direct Dep 23422342 Giants", | |
"withdrawal": "", | |
"deposit": "2,534.65", | |
"balance": "25,011.65" | |
}, | |
{ | |
"date": "02/06", | |
"description": "Signature POS Debit - TJP", | |
"withdrawal": "84.50", | |
"deposit": "", | |
"balance": "24,927.15" | |
}, | |
{ | |
"date": "02/07", | |
"description": "Check No. 234", | |
"withdrawal": "1,400.00", | |
"deposit": "", | |
"balance": "23,527.15" | |
}, | |
{ | |
"date": "02/08", | |
"description": "Check No. 342", | |
"withdrawal": "", | |
"deposit": "25.00", | |
"balance": "23,552.15" | |
}, | |
{ | |
"date": "02/09", | |
"description": "FPB AutoPay***** Credit Card", | |
"withdrawal": "456.02", | |
"deposit": "", | |
"balance": "23,096.13" | |
}, | |
{ | |
"date": "02/08", | |
"description": "Check No. 123", | |
"withdrawal": "", | |
"deposit": "25.00", | |
"balance": "23,552.15" | |
}, | |
{ | |
"date": "02/09", | |
"description": "FPB AutoPay***** Credit Card", | |
"withdrawal": "156.02", | |
"deposit": "", | |
"balance": "23,096.13" | |
}, | |
{ | |
"date": "02/08", | |
"description": "Cash Deposit", | |
"withdrawal": "", | |
"deposit": "25.00", | |
"balance": "23,552.15" | |
} | |
] | |
} | |
def run_inference(image_filepath, query, key): | |
if image_filepath is None: | |
return {"error": f"No image provided. Please upload an image before submitting."} | |
if query is None or query.strip() == "": | |
return {"error": f"No query provided. Please enter a query before submitting."} | |
if key is None or key.strip() == "": | |
return {"error": f"No Sparrow Key provided. Please enter a Sparrow Key before submitting."} | |
file_path = None | |
try: | |
# Open the uploaded image using its filepath | |
img = Image.open(image_filepath) | |
# Extract the file extension from the uploaded file | |
input_image_extension = image_filepath.split('.')[-1].lower() # Extract extension from filepath | |
# Set file extension based on the original file, otherwise default to PNG | |
if input_image_extension in ['jpg', 'jpeg', 'png']: | |
file_extension = input_image_extension | |
else: | |
file_extension = 'png' # Default to PNG if extension is unavailable or invalid | |
# Generate a unique filename using timestamp | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"image_{timestamp}.{file_extension}" | |
# Save the image | |
img.save(filename) | |
# Get the full path of the saved image | |
file_path = os.path.abspath(filename) | |
# Prepare the REST API call | |
url = 'https://katanaml-sparrow-ml.hf.space/api/v1/sparrow-llm/inference' | |
headers = { | |
'accept': 'application/json' | |
} | |
# Open the file in binary mode and send it | |
with open(filename, "rb") as f: | |
files = { | |
'file': (filename, f, f'image/{file_extension}') | |
} | |
# Convert 'query' input to JSON string if needed | |
try: | |
# Check if the query is a wildcard '*' | |
if query.strip() == "*": | |
query_json = "*" # Directly use the wildcard as valid input | |
else: | |
# Attempt to parse the query as JSON | |
query_json = json.loads(query) # This could return any valid JSON (string, number, etc.) | |
# Ensure the parsed query is either a JSON object (dict) or a list of JSON objects | |
if not isinstance(query_json, (dict, list)): | |
return { | |
"error": "Invalid input. Only JSON objects, arrays of objects, or wildcard '*' are allowed."} | |
# If it's a list, make sure it's a list of JSON objects | |
if isinstance(query_json, list): | |
if not all(isinstance(item, dict) for item in query_json): | |
return {"error": "Invalid input. Arrays must contain only JSON objects."} | |
except json.JSONDecodeError: | |
return {"error": "Invalid JSON format in query input"} | |
data = { | |
'group_by_rows': '', | |
'agent': 'sparrow-parse', | |
'keywords': '', | |
'sparrow_key': key, | |
'update_targets': '', | |
'debug': 'false', | |
'index_name': '', | |
'types': '', | |
'fields': query_json if query_json == "*" else json.dumps(query_json), # Use wildcard as-is, or JSON | |
'options': 'huggingface,katanaml/sparrow-qwen2-vl-7b' | |
} | |
# Perform the POST request | |
response = requests.post(url, headers=headers, files=files, data=data) | |
# Process the response and return the JSON data | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return {"error": f"Request failed with status code {response.status_code}", "details": response.text} | |
finally: | |
# Clean up the temporary file | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
def handle_example(example_image): | |
# Find the corresponding entry in the examples array | |
for example in examples: | |
if example[0] == example_image: | |
# Return bonds_json if Bonds table is selected | |
if example_image == "bonds_table.png": | |
return example_image, bonds_json, example[2] | |
# Return lab_results_json if Lab results is selected | |
elif example_image == "lab_results.png": | |
return example_image, lab_results_json, example[2] | |
# Return bank_statement_json if Bank statement is selected | |
elif example_image == "bank_statement.png": | |
return example_image, bank_statement_json, example[2] | |
# Default return if no match found | |
return None, "No example selected.", "" | |
# Define the UI | |
with gr.Blocks(theme=gr.themes.Ocean()) as demo: | |
with gr.Tab(label="Sparrow UI"): | |
with gr.Row(): | |
with gr.Column(): | |
input_img = gr.Image(label="Input Document Image", type="filepath") | |
query_input = gr.Textbox(label="Query", placeholder="Use * to query all data or JSON schema, e.g.: [{\"instrument_name\": \"example\"}]") | |
key_input = gr.Textbox(label="Sparrow Key", type="password") | |
submit_btn = gr.Button(value="Submit", variant="primary") | |
# Radio button for selecting examples | |
example_radio = gr.Radio(label="Select Example", choices=[ex[0] for ex in examples]) | |
with gr.Column(): | |
# JSON output for structured JSON display | |
output_json = gr.JSON(label="Response (JSON)", height=900, min_height=900) | |
# Function to handle example selection | |
def on_example_select(selected_example): | |
# Handle example selection and return the image, output (text or JSON), and query | |
return handle_example(selected_example) | |
# Update image, output JSON, and query when an example is selected | |
example_radio.change(on_example_select, | |
inputs=example_radio, | |
outputs=[input_img, output_json, query_input]) | |
# When submit is clicked | |
submit_btn.click(run_inference, [input_img, query_input, key_input], [output_json]) | |
gr.Markdown( | |
""" | |
--- | |
<p style="text-align: center;"> | |
Visit <a href="https://katanaml.io/" target="_blank">Katana ML</a> for more details. | |
</p> | |
""" | |
) | |
# Launch the app | |
demo.queue(api_open=False) | |
demo.launch(debug=True) | |