orionweller commited on
Commit
dd10fbb
1 Parent(s): da8206b

more options

Browse files
Files changed (1) hide show
  1. app.py +35 -42
app.py CHANGED
@@ -1,35 +1,34 @@
1
  import gradio as gr
2
  from pyserini.search.lucene import LuceneSearcher
3
  import os
 
4
 
5
- # Download the index if it doesn't exist
6
- if not os.path.exists('msmarco-passage'):
7
- os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"')
 
 
 
8
 
9
- searcher = LuceneSearcher.from_prebuilt_index('msmarco-passage')
10
- searcher.set_bm25(k1=0.9, b=0.4)
11
-
12
- def search_pyserini(query):
13
  try:
14
- hits = searcher.search(query, k=10)
 
15
  results = []
16
  for i, hit in enumerate(hits):
17
  doc = searcher.doc(hit.docid)
18
- content = doc.raw()
19
  results.append({
20
  "rank": i + 1,
21
  "doc_id": hit.docid,
22
  "score": hit.score,
23
- "content": content
24
  })
25
- return results
26
  except Exception as e:
27
- return [{"error": str(e)}]
28
 
29
  def format_results(results):
30
- if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
31
- return f"<div class='error'>An error occurred: {results[0]['error']}</div>"
32
-
33
  html = "<div class='results-container'>"
34
  for result in results:
35
  html += f"""
@@ -48,14 +47,14 @@ css = """
48
  }
49
  .results-container {
50
  display: flex;
51
- flex-wrap: wrap;
52
  gap: 20px;
53
  }
54
  .result-item {
55
  border: 1px solid #ddd;
56
  border-radius: 8px;
57
  padding: 15px;
58
- width: calc(50% - 10px);
59
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
60
  }
61
  .result-item h3 {
@@ -81,6 +80,23 @@ with gr.Blocks(css=css) as iface:
81
  gr.Markdown("# Pyserini Search Interface")
82
  gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).")
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  with gr.Row():
85
  query_input = gr.Textbox(
86
  lines=1,
@@ -96,31 +112,8 @@ with gr.Blocks(css=css) as iface:
96
 
97
  search_button.click(
98
  fn=search_pyserini,
99
- inputs=query_input,
100
- outputs=output,
101
- _js="(results) => format_results(results)" # Client-side formatting
102
  )
103
 
104
- # Add the JavaScript function to format results
105
- iface.load(js="""
106
- function format_results(results) {
107
- if (Array.isArray(results) && results.length > 0 && results[0].hasOwnProperty('error')) {
108
- return `<div class='error'>An error occurred: ${results[0].error}</div>`;
109
- }
110
-
111
- let html = "<div class='results-container'>";
112
- for (let result of results) {
113
- html += `
114
- <div class='result-item'>
115
- <h3>Rank ${result.rank} (Score: ${result.score.toFixed(4)})</h3>
116
- <p class='doc-id'>Doc ID: ${result.doc_id}</p>
117
- <p class='content'>${result.content}</p>
118
- </div>
119
- `;
120
- }
121
- html += "</div>";
122
- return html;
123
- }
124
- """)
125
-
126
  iface.launch()
 
1
  import gradio as gr
2
  from pyserini.search.lucene import LuceneSearcher
3
  import os
4
+ import json
5
 
6
+ def initialize_searcher(index_name):
7
+ if not os.path.exists(index_name):
8
+ os.system(f'python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'{index_name}\')"')
9
+ searcher = LuceneSearcher.from_prebuilt_index(index_name)
10
+ searcher.set_bm25(k1=0.9, b=0.4)
11
+ return searcher
12
 
13
+ def search_pyserini(query, top_k, index_name):
 
 
 
14
  try:
15
+ searcher = initialize_searcher(index_name)
16
+ hits = searcher.search(query, k=top_k)
17
  results = []
18
  for i, hit in enumerate(hits):
19
  doc = searcher.doc(hit.docid)
20
+ doc_dict = json.loads(doc.raw())
21
  results.append({
22
  "rank": i + 1,
23
  "doc_id": hit.docid,
24
  "score": hit.score,
25
+ "content": doc_dict['contents']
26
  })
27
+ return format_results(results)
28
  except Exception as e:
29
+ return f"<div class='error'>An error occurred: {str(e)}</div>"
30
 
31
  def format_results(results):
 
 
 
32
  html = "<div class='results-container'>"
33
  for result in results:
34
  html += f"""
 
47
  }
48
  .results-container {
49
  display: flex;
50
+ flex-direction: column;
51
  gap: 20px;
52
  }
53
  .result-item {
54
  border: 1px solid #ddd;
55
  border-radius: 8px;
56
  padding: 15px;
57
+ width: 100%;
58
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
59
  }
60
  .result-item h3 {
 
80
  gr.Markdown("# Pyserini Search Interface")
81
  gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).")
82
 
83
+ with gr.Row():
84
+ index_input = gr.Textbox(
85
+ value="msmarco-passage",
86
+ lines=1,
87
+ label="Prebuilt Index Name",
88
+ placeholder="Enter the name of the prebuilt index"
89
+ )
90
+
91
+ with gr.Row():
92
+ top_k_slider = gr.Slider(
93
+ minimum=1,
94
+ maximum=100,
95
+ value=10,
96
+ step=1,
97
+ label="Number of top results to return"
98
+ )
99
+
100
  with gr.Row():
101
  query_input = gr.Textbox(
102
  lines=1,
 
112
 
113
  search_button.click(
114
  fn=search_pyserini,
115
+ inputs=[query_input, top_k_slider, index_input],
116
+ outputs=output
 
117
  )
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  iface.launch()