aquibmoin's picture
Update app.py
8f9f226 verified
raw
history blame
No virus
2.76 kB
import gradio as gr
import requests
import os
import re
API_TOKEN = os.getenv('API_TOKEN')
API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
headers = {"Authorization": f"Bearer {API_TOKEN}"}
def query_similarity(source_sentence, sentences):
payload = {
"inputs": {
"source_sentence": source_sentence,
"sentences": sentences
}
}
response = requests.post(API_URL, headers=headers, json=payload)
# Ensure response is JSON
try:
return response.json(), sentences
except json.JSONDecodeError:
return {"error": "Failed to decode JSON response"}, sentences
def format_output(response, sentences):
if isinstance(response, list):
# Pair each score with its corresponding sentence
results = list(zip(response, sentences))
# Sort results by score in descending order
results = sorted(results, key=lambda x: x[0], reverse=True)
# Format the output
formatted_results = []
for score, sentence in results:
formatted_results.append(f"Sentence: {sentence.strip()}, Score: {score:.4f}\n")
return "\n".join(formatted_results)
else:
return f"Unexpected response format: {response}"
def split_into_chunks(text, chunk_size=100):
paragraphs = text.split('\n\n') # Split text into paragraphs
chunks = []
current_chunk = []
current_length = 0
for paragraph in paragraphs:
paragraph_length = len(paragraph.split())
if current_length + paragraph_length > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = [paragraph]
current_length = paragraph_length
else:
current_chunk.append(paragraph)
current_length += paragraph_length
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def semantic_search(query, file_path):
if file_path is not None:
with open(file_path, 'r', encoding='utf-8') as file:
document = file.read()
chunks = split_into_chunks(document)
response, sentences = query_similarity(query, chunks)
return format_output(response, sentences)
else:
return "Please upload a .txt file."
# Define Gradio interface
iface = gr.Interface(
fn=semantic_search,
inputs=[
gr.Textbox(lines=2, label="Input Query", placeholder="Enter your query here..."),
gr.File(file_types=['txt'], label="Upload a .txt file")
],
outputs="text",
title="Semantic Search with Indus-ST (demo)",
description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences."
)
iface.launch()