|
import gradio as gr |
|
import requests |
|
import os |
|
import re |
|
|
|
API_TOKEN = os.getenv('API_TOKEN') |
|
API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2" |
|
headers = {"Authorization": f"Bearer {API_TOKEN}"} |
|
|
|
def query_similarity(source_sentence, sentences): |
|
payload = { |
|
"inputs": { |
|
"source_sentence": source_sentence, |
|
"sentences": sentences |
|
} |
|
} |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
|
|
|
|
try: |
|
return response.json(), sentences |
|
except json.JSONDecodeError: |
|
return {"error": "Failed to decode JSON response"}, sentences |
|
|
|
def format_output(response, sentences): |
|
if isinstance(response, list): |
|
|
|
results = list(zip(response, sentences)) |
|
|
|
results = sorted(results, key=lambda x: x[0], reverse=True) |
|
|
|
formatted_results = [] |
|
for score, sentence in results: |
|
formatted_results.append(f"Sentence: {sentence.strip()}, Score: {score:.4f}\n") |
|
return "\n".join(formatted_results) |
|
else: |
|
return f"Unexpected response format: {response}" |
|
|
|
def split_into_chunks(text, chunk_size=100): |
|
paragraphs = text.split('\n\n') |
|
chunks = [] |
|
current_chunk = [] |
|
current_length = 0 |
|
|
|
for paragraph in paragraphs: |
|
paragraph_length = len(paragraph.split()) |
|
if current_length + paragraph_length > chunk_size: |
|
chunks.append(" ".join(current_chunk)) |
|
current_chunk = [paragraph] |
|
current_length = paragraph_length |
|
else: |
|
current_chunk.append(paragraph) |
|
current_length += paragraph_length |
|
|
|
if current_chunk: |
|
chunks.append(" ".join(current_chunk)) |
|
|
|
return chunks |
|
|
|
def semantic_search(query, file_path): |
|
if file_path is not None: |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
document = file.read() |
|
chunks = split_into_chunks(document) |
|
response, sentences = query_similarity(query, chunks) |
|
return format_output(response, sentences) |
|
else: |
|
return "Please upload a .txt file." |
|
|
|
|
|
|
|
article_text = """ |
|
|
|
### Notes: |
|
|
|
### Possible Demo Apps: |
|
|
|
1. **Semantic Search** |
|
2. **Document Clustering** |
|
3. **Paraphrase Detection** |
|
4. **Textual Entailment** |
|
5. **Question-Answer Matching** |
|
6. **Summarization Validation** |
|
7. **Duplicate Detection** |
|
|
|
""" |
|
|
|
iface = gr.Interface( |
|
fn=semantic_search, |
|
inputs=[ |
|
gr.Textbox(lines=2, label="Input Query", placeholder="Enter your query here..."), |
|
gr.File(file_types=['txt'], label="Upload a .txt file") |
|
], |
|
outputs="text", |
|
title="Semantic Search with Indus-ST (demo)", |
|
description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences.", |
|
article=article_text |
|
) |
|
|
|
iface.launch() |
|
|
|
|