Spaces:
Sleeping
Sleeping
orionweller
commited on
Commit
•
dd10fbb
1
Parent(s):
da8206b
more options
Browse files
app.py
CHANGED
@@ -1,35 +1,34 @@
|
|
1 |
import gradio as gr
|
2 |
from pyserini.search.lucene import LuceneSearcher
|
3 |
import os
|
|
|
4 |
|
5 |
-
|
6 |
-
if not os.path.exists(
|
7 |
-
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
searcher.set_bm25(k1=0.9, b=0.4)
|
11 |
-
|
12 |
-
def search_pyserini(query):
|
13 |
try:
|
14 |
-
|
|
|
15 |
results = []
|
16 |
for i, hit in enumerate(hits):
|
17 |
doc = searcher.doc(hit.docid)
|
18 |
-
|
19 |
results.append({
|
20 |
"rank": i + 1,
|
21 |
"doc_id": hit.docid,
|
22 |
"score": hit.score,
|
23 |
-
"content":
|
24 |
})
|
25 |
-
return results
|
26 |
except Exception as e:
|
27 |
-
return
|
28 |
|
29 |
def format_results(results):
|
30 |
-
if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
|
31 |
-
return f"<div class='error'>An error occurred: {results[0]['error']}</div>"
|
32 |
-
|
33 |
html = "<div class='results-container'>"
|
34 |
for result in results:
|
35 |
html += f"""
|
@@ -48,14 +47,14 @@ css = """
|
|
48 |
}
|
49 |
.results-container {
|
50 |
display: flex;
|
51 |
-
flex-
|
52 |
gap: 20px;
|
53 |
}
|
54 |
.result-item {
|
55 |
border: 1px solid #ddd;
|
56 |
border-radius: 8px;
|
57 |
padding: 15px;
|
58 |
-
width:
|
59 |
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
60 |
}
|
61 |
.result-item h3 {
|
@@ -81,6 +80,23 @@ with gr.Blocks(css=css) as iface:
|
|
81 |
gr.Markdown("# Pyserini Search Interface")
|
82 |
gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).")
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
with gr.Row():
|
85 |
query_input = gr.Textbox(
|
86 |
lines=1,
|
@@ -96,31 +112,8 @@ with gr.Blocks(css=css) as iface:
|
|
96 |
|
97 |
search_button.click(
|
98 |
fn=search_pyserini,
|
99 |
-
inputs=query_input,
|
100 |
-
outputs=output
|
101 |
-
_js="(results) => format_results(results)" # Client-side formatting
|
102 |
)
|
103 |
|
104 |
-
# Add the JavaScript function to format results
|
105 |
-
iface.load(js="""
|
106 |
-
function format_results(results) {
|
107 |
-
if (Array.isArray(results) && results.length > 0 && results[0].hasOwnProperty('error')) {
|
108 |
-
return `<div class='error'>An error occurred: ${results[0].error}</div>`;
|
109 |
-
}
|
110 |
-
|
111 |
-
let html = "<div class='results-container'>";
|
112 |
-
for (let result of results) {
|
113 |
-
html += `
|
114 |
-
<div class='result-item'>
|
115 |
-
<h3>Rank ${result.rank} (Score: ${result.score.toFixed(4)})</h3>
|
116 |
-
<p class='doc-id'>Doc ID: ${result.doc_id}</p>
|
117 |
-
<p class='content'>${result.content}</p>
|
118 |
-
</div>
|
119 |
-
`;
|
120 |
-
}
|
121 |
-
html += "</div>";
|
122 |
-
return html;
|
123 |
-
}
|
124 |
-
""")
|
125 |
-
|
126 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from pyserini.search.lucene import LuceneSearcher
|
3 |
import os
|
4 |
+
import json
|
5 |
|
6 |
+
def initialize_searcher(index_name):
|
7 |
+
if not os.path.exists(index_name):
|
8 |
+
os.system(f'python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'{index_name}\')"')
|
9 |
+
searcher = LuceneSearcher.from_prebuilt_index(index_name)
|
10 |
+
searcher.set_bm25(k1=0.9, b=0.4)
|
11 |
+
return searcher
|
12 |
|
13 |
+
def search_pyserini(query, top_k, index_name):
|
|
|
|
|
|
|
14 |
try:
|
15 |
+
searcher = initialize_searcher(index_name)
|
16 |
+
hits = searcher.search(query, k=top_k)
|
17 |
results = []
|
18 |
for i, hit in enumerate(hits):
|
19 |
doc = searcher.doc(hit.docid)
|
20 |
+
doc_dict = json.loads(doc.raw())
|
21 |
results.append({
|
22 |
"rank": i + 1,
|
23 |
"doc_id": hit.docid,
|
24 |
"score": hit.score,
|
25 |
+
"content": doc_dict['contents']
|
26 |
})
|
27 |
+
return format_results(results)
|
28 |
except Exception as e:
|
29 |
+
return f"<div class='error'>An error occurred: {str(e)}</div>"
|
30 |
|
31 |
def format_results(results):
|
|
|
|
|
|
|
32 |
html = "<div class='results-container'>"
|
33 |
for result in results:
|
34 |
html += f"""
|
|
|
47 |
}
|
48 |
.results-container {
|
49 |
display: flex;
|
50 |
+
flex-direction: column;
|
51 |
gap: 20px;
|
52 |
}
|
53 |
.result-item {
|
54 |
border: 1px solid #ddd;
|
55 |
border-radius: 8px;
|
56 |
padding: 15px;
|
57 |
+
width: 100%;
|
58 |
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
59 |
}
|
60 |
.result-item h3 {
|
|
|
80 |
gr.Markdown("# Pyserini Search Interface")
|
81 |
gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).")
|
82 |
|
83 |
+
with gr.Row():
|
84 |
+
index_input = gr.Textbox(
|
85 |
+
value="msmarco-passage",
|
86 |
+
lines=1,
|
87 |
+
label="Prebuilt Index Name",
|
88 |
+
placeholder="Enter the name of the prebuilt index"
|
89 |
+
)
|
90 |
+
|
91 |
+
with gr.Row():
|
92 |
+
top_k_slider = gr.Slider(
|
93 |
+
minimum=1,
|
94 |
+
maximum=100,
|
95 |
+
value=10,
|
96 |
+
step=1,
|
97 |
+
label="Number of top results to return"
|
98 |
+
)
|
99 |
+
|
100 |
with gr.Row():
|
101 |
query_input = gr.Textbox(
|
102 |
lines=1,
|
|
|
112 |
|
113 |
search_button.click(
|
114 |
fn=search_pyserini,
|
115 |
+
inputs=[query_input, top_k_slider, index_input],
|
116 |
+
outputs=output
|
|
|
117 |
)
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
iface.launch()
|