pyserini-wikipedia-kilt-doc / pyserini_wikipedia_kilt_doc.py
vwxyzjn's picture
commit changes
961c631
raw
history blame
820 Bytes
from pyserini.search.lucene import LuceneSearcher
from transformers import Tool
import json
# searcher = LuceneSearcher.from_prebuilt_index('wikipedia-kilt-doc')
searcher = LuceneSearcher('index-wikipedia-kilt-doc-20210421-f29307.b8ec8feb654f7aaa86f9901dc6c804a8')
def search(query):
hits = searcher.search(query, k=1)
# for i in range(0, 5):
# print(f'{i+1:2} {hits[i].docid:15} {hits[i].score:.5f}')
hit = hits[0]
contents = json.loads(hit.raw)['contents']
return contents
class PyseriniWikipediaKiltDoc(Tool):
name = "pyserini-wikipedia-kilt-doc"
description = (
"This is a tool that returns the top 5 results from the Wikipedia KILT index."
)
inputs = ["text"]
outputs = ["text"]
def __call__(self, query: str):
return search(query)