aquibmoin's picture
Create app.py
78db47d verified
raw
history blame
2.16 kB
import gradio as gr
import requests
import os
import re
API_TOKEN = os.getenv('API_TOKEN')
API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
headers = {"Authorization": f"Bearer {API_TOKEN}"}
def query_similarity(source_sentence, sentences):
payload = {
"inputs": {
"source_sentence": source_sentence,
"sentences": sentences
}
}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def format_output(response):
results = sorted(response, key=lambda x: x['score'], reverse=True)
formatted_results = []
for item in results:
formatted_results.append(f"Sentence: {item['sentence']}, Score: {item['score']:.4f}")
return "\n".join(formatted_results)
def split_into_chunks(text, chunk_size=100):
sentences = re.split(r'(?<=[.!?]) +', text) # Split text into sentences
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
sentence_length = len(sentence.split())
if current_length + sentence_length > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = [sentence]
current_length = sentence_length
else:
current_chunk.append(sentence)
current_length += sentence_length
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def semantic_search(query, document):
chunks = split_into_chunks(document)
response = query_similarity(query, chunks)
return format_output(response)
def read_file(file):
text = file.read().decode('utf-8')
return text
# Define Gradio interface
iface = gr.Interface(
fn=semantic_search,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your query here..."),
gr.File(label="Upload a .txt file")
],
outputs="text",
title="Document Semantic Search",
description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences.",
examples=[
["Enter a sample query here...", None]
]
)
iface.launch()