import os # set JAVA_HOME by finding it, e.g. JAVA_HOME=$(readlink -f /usr/bin/javac | sed "s:bin/javac::") # print the contents of /user/lib print(os.listdir("/usr/lib")) print(os.listdir("/usr/lib/jvm")) os.environ["JAVA_HOME"] = "/usr/lib/jvm" print(os.environ["JAVA_HOME"]) import gradio as gr from pyserini.search.lucene import LuceneSearcher import os import json def initialize_searcher(index_name): if not os.path.exists(index_name): os.system(f'python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'{index_name}\')"') searcher = LuceneSearcher.from_prebuilt_index(index_name) searcher.set_bm25(k1=0.9, b=0.4) return searcher def search_pyserini(query, top_k, index_name): try: searcher = initialize_searcher(index_name) hits = searcher.search(query, k=top_k) results = [] for i, hit in enumerate(hits): doc = searcher.doc(hit.docid) doc_dict = json.loads(doc.raw()) results.append({ "rank": i + 1, "doc_id": hit.docid, "score": hit.score, "content": doc_dict['contents'] }) return format_results(results) except Exception as e: return f"
An error occurred: {str(e)}
" def format_results(results): html = "
" for result in results: html += f"""

Rank {result['rank']} (Score: {result['score']:.4f})

Doc ID: {result['doc_id']}

{result['content']}

""" html += "
" return html css = """ .gradio-container { font-family: 'Arial', sans-serif; } .results-container { display: flex; flex-direction: column; gap: 20px; } .result-item { border: 1px solid #ddd; border-radius: 8px; padding: 15px; width: 100%; box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .result-item h3 { margin-top: 0; color: #333; } .doc-id { font-size: 0.9em; color: #666; margin-bottom: 10px; } .content { font-size: 0.95em; line-height: 1.4; } .error { color: red; font-weight: bold; } """ with gr.Blocks(css=css) as iface: gr.Markdown("# Pyserini Search Interface") gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).") with gr.Row(): index_input = gr.Textbox( value="msmarco-passage", lines=1, label="Prebuilt Index Name", placeholder="Enter the name of the prebuilt index" ) with gr.Row(): top_k_slider = gr.Slider( minimum=1, maximum=100, value=10, step=1, label="Number of top results to return" ) with gr.Row(): query_input = gr.Textbox( lines=1, placeholder="Enter your search query here...", label="Search Query" ) with gr.Row(): search_button = gr.Button("Search", variant="primary") with gr.Row(): output = gr.HTML(label="Search Results") search_button.click( fn=search_pyserini, inputs=[query_input, top_k_slider, index_input], outputs=output ) iface.launch()