aquibmoin commited on
Commit
78db47d
1 Parent(s): 35e4eff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ import re
5
+
6
+ API_TOKEN = os.getenv('API_TOKEN')
7
+ API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
8
+ headers = {"Authorization": f"Bearer {API_TOKEN}"}
9
+
10
+ def query_similarity(source_sentence, sentences):
11
+ payload = {
12
+ "inputs": {
13
+ "source_sentence": source_sentence,
14
+ "sentences": sentences
15
+ }
16
+ }
17
+ response = requests.post(API_URL, headers=headers, json=payload)
18
+ return response.json()
19
+
20
+ def format_output(response):
21
+ results = sorted(response, key=lambda x: x['score'], reverse=True)
22
+ formatted_results = []
23
+ for item in results:
24
+ formatted_results.append(f"Sentence: {item['sentence']}, Score: {item['score']:.4f}")
25
+ return "\n".join(formatted_results)
26
+
27
+ def split_into_chunks(text, chunk_size=100):
28
+ sentences = re.split(r'(?<=[.!?]) +', text) # Split text into sentences
29
+ chunks = []
30
+ current_chunk = []
31
+ current_length = 0
32
+
33
+ for sentence in sentences:
34
+ sentence_length = len(sentence.split())
35
+ if current_length + sentence_length > chunk_size:
36
+ chunks.append(" ".join(current_chunk))
37
+ current_chunk = [sentence]
38
+ current_length = sentence_length
39
+ else:
40
+ current_chunk.append(sentence)
41
+ current_length += sentence_length
42
+
43
+ if current_chunk:
44
+ chunks.append(" ".join(current_chunk))
45
+
46
+ return chunks
47
+
48
+ def semantic_search(query, document):
49
+ chunks = split_into_chunks(document)
50
+ response = query_similarity(query, chunks)
51
+ return format_output(response)
52
+
53
+ def read_file(file):
54
+ text = file.read().decode('utf-8')
55
+ return text
56
+
57
+ # Define Gradio interface
58
+ iface = gr.Interface(
59
+ fn=semantic_search,
60
+ inputs=[
61
+ gr.Textbox(lines=2, placeholder="Enter your query here..."),
62
+ gr.File(label="Upload a .txt file")
63
+ ],
64
+ outputs="text",
65
+ title="Document Semantic Search",
66
+ description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences.",
67
+ examples=[
68
+ ["Enter a sample query here...", None]
69
+ ]
70
+ )
71
+
72
+ iface.launch()
73
+