import gradio as gr import requests import os import re API_TOKEN = os.getenv('API_TOKEN') API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2" headers = {"Authorization": f"Bearer {API_TOKEN}"} def query_similarity(source_sentence, sentences): payload = { "inputs": { "source_sentence": source_sentence, "sentences": sentences } } response = requests.post(API_URL, headers=headers, json=payload) # Ensure response is JSON try: return response.json(), sentences except json.JSONDecodeError: return {"error": "Failed to decode JSON response"}, sentences def format_output(response, sentences): if isinstance(response, list): # Pair each score with its corresponding sentence results = list(zip(response, sentences)) # Sort results by score in descending order results = sorted(results, key=lambda x: x[0], reverse=True) # Format the output formatted_results = [] for score, sentence in results: formatted_results.append(f"Sentence: {sentence.strip()}, Score: {score:.4f}\n") return "\n".join(formatted_results) else: return f"Unexpected response format: {response}" def split_into_chunks(text, chunk_size=100): paragraphs = text.split('\n\n') # Split text into paragraphs chunks = [] current_chunk = [] current_length = 0 for paragraph in paragraphs: paragraph_length = len(paragraph.split()) if current_length + paragraph_length > chunk_size: chunks.append(" ".join(current_chunk)) current_chunk = [paragraph] current_length = paragraph_length else: current_chunk.append(paragraph) current_length += paragraph_length if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def semantic_search(query, file_path): if file_path is not None: with open(file_path, 'r', encoding='utf-8') as file: document = file.read() chunks = split_into_chunks(document) response, sentences = query_similarity(query, chunks) return format_output(response, sentences) else: return "Please upload a .txt file." # Define Gradio interface iface = gr.Interface( fn=semantic_search, inputs=[ gr.Textbox(lines=2, label="Input Query", placeholder="Enter your query here..."), gr.File(file_types=['txt'], label="Upload a .txt file") ], outputs="text", title="Semantic Search with Indus-ST (demo)", description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences." ) iface.launch()