Spaces:

aquibmoin
/

Semantic-Search-with-IndusST

Running

App Files Files Community

Semantic-Search-with-IndusST / app.py

aquibmoin

Update app.py

8f9f226 verified 3 months ago

raw

history blame

No virus

2.76 kB

	import gradio as gr
	import requests
	import os
	import re

	API_TOKEN = os.getenv('API_TOKEN')
	API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
	headers = {"Authorization": f"Bearer {API_TOKEN}"}

	def query_similarity(source_sentence, sentences):
	payload = {
	"inputs": {
	"source_sentence": source_sentence,
	"sentences": sentences
	}
	}
	response = requests.post(API_URL, headers=headers, json=payload)

	# Ensure response is JSON
	try:
	return response.json(), sentences
	except json.JSONDecodeError:
	return {"error": "Failed to decode JSON response"}, sentences

	def format_output(response, sentences):
	if isinstance(response, list):
	# Pair each score with its corresponding sentence
	results = list(zip(response, sentences))
	# Sort results by score in descending order
	results = sorted(results, key=lambda x: x[0], reverse=True)
	# Format the output
	formatted_results = []
	for score, sentence in results:
	formatted_results.append(f"Sentence: {sentence.strip()}, Score: {score:.4f}\n")
	return "\n".join(formatted_results)
	else:
	return f"Unexpected response format: {response}"

	def split_into_chunks(text, chunk_size=100):
	paragraphs = text.split('\n\n') # Split text into paragraphs
	chunks = []
	current_chunk = []
	current_length = 0

	for paragraph in paragraphs:
	paragraph_length = len(paragraph.split())
	if current_length + paragraph_length > chunk_size:
	chunks.append(" ".join(current_chunk))
	current_chunk = [paragraph]
	current_length = paragraph_length
	else:
	current_chunk.append(paragraph)
	current_length += paragraph_length

	if current_chunk:
	chunks.append(" ".join(current_chunk))

	return chunks

	def semantic_search(query, file_path):
	if file_path is not None:
	with open(file_path, 'r', encoding='utf-8') as file:
	document = file.read()
	chunks = split_into_chunks(document)
	response, sentences = query_similarity(query, chunks)
	return format_output(response, sentences)
	else:
	return "Please upload a .txt file."

	# Define Gradio interface
	iface = gr.Interface(
	fn=semantic_search,
	inputs=[
	gr.Textbox(lines=2, label="Input Query", placeholder="Enter your query here..."),
	gr.File(file_types=['txt'], label="Upload a .txt file")
	],
	outputs="text",
	title="Semantic Search with Indus-ST (demo)",
	description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences."
	)

	iface.launch()