Spaces:
Sleeping
Sleeping
import os | |
# set JAVA_HOME by finding it, e.g. JAVA_HOME=$(readlink -f /usr/bin/javac | sed "s:bin/javac::") | |
# print the contents of /user/lib | |
print(os.listdir("/usr/lib/jvm/default-java")) | |
os.environ["JAVA_HOME"] = "/usr/lib/jvm/default-java" | |
print(os.environ["JAVA_HOME"]) | |
import gradio as gr | |
from pyserini.search.lucene import LuceneSearcher | |
import os | |
import json | |
def initialize_searcher(index_name): | |
if not os.path.exists(index_name): | |
os.system(f'python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'{index_name}\')"') | |
searcher = LuceneSearcher.from_prebuilt_index(index_name) | |
searcher.set_bm25(k1=0.9, b=0.4) | |
return searcher | |
def search_pyserini(query, top_k, index_name): | |
try: | |
searcher = initialize_searcher(index_name) | |
hits = searcher.search(query, k=top_k) | |
results = [] | |
for i, hit in enumerate(hits): | |
doc = searcher.doc(hit.docid) | |
doc_dict = json.loads(doc.raw()) | |
results.append({ | |
"rank": i + 1, | |
"doc_id": hit.docid, | |
"score": hit.score, | |
"content": doc_dict['contents'] | |
}) | |
return format_results(results) | |
except Exception as e: | |
return f"<div class='error'>An error occurred: {str(e)}</div>" | |
def format_results(results): | |
html = "<div class='results-container'>" | |
for result in results: | |
html += f""" | |
<div class='result-item'> | |
<h3>Rank {result['rank']} (Score: {result['score']:.4f})</h3> | |
<p class='doc-id'>Doc ID: {result['doc_id']}</p> | |
<p class='content'>{result['content']}</p> | |
</div> | |
""" | |
html += "</div>" | |
return html | |
css = """ | |
.gradio-container { | |
font-family: 'Arial', sans-serif; | |
} | |
.results-container { | |
display: flex; | |
flex-direction: column; | |
gap: 20px; | |
} | |
.result-item { | |
border: 1px solid #ddd; | |
border-radius: 8px; | |
padding: 15px; | |
width: 100%; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.result-item h3 { | |
margin-top: 0; | |
color: #333; | |
} | |
.doc-id { | |
font-size: 0.9em; | |
color: #666; | |
margin-bottom: 10px; | |
} | |
.content { | |
font-size: 0.95em; | |
line-height: 1.4; | |
} | |
.error { | |
color: red; | |
font-weight: bold; | |
} | |
""" | |
with gr.Blocks(css=css) as iface: | |
gr.Markdown("# Pyserini Search Interface") | |
gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).") | |
with gr.Row(): | |
index_input = gr.Textbox( | |
value="msmarco-passage", | |
lines=1, | |
label="Prebuilt Index Name", | |
placeholder="Enter the name of the prebuilt index" | |
) | |
with gr.Row(): | |
top_k_slider = gr.Slider( | |
minimum=1, | |
maximum=100, | |
value=10, | |
step=1, | |
label="Number of top results to return" | |
) | |
with gr.Row(): | |
query_input = gr.Textbox( | |
lines=1, | |
placeholder="Enter your search query here...", | |
label="Search Query" | |
) | |
with gr.Row(): | |
search_button = gr.Button("Search", variant="primary") | |
with gr.Row(): | |
output = gr.HTML(label="Search Results") | |
search_button.click( | |
fn=search_pyserini, | |
inputs=[query_input, top_k_slider, index_input], | |
outputs=output | |
) | |
iface.launch() |