from pyserini.search.lucene import LuceneSearcher from transformers import Tool import json searcher = LuceneSearcher.from_prebuilt_index('wikipedia-kilt-doc') # searcher = LuceneSearcher('index-wikipedia-kilt-doc-20210421-f29307.b8ec8feb654f7aaa86f9901dc6c804a8') def search(query): hits = searcher.search(query, k=1) # for i in range(0, 5): # print(f'{i+1:2} {hits[i].docid:15} {hits[i].score:.5f}') hit = hits[0] contents = json.loads(hit.raw)['contents'] return contents class PyseriniWikipediaKiltDoc(Tool): name = "pyserini-wikipedia-kilt-doc" description = ( "This is a tool that returns the top 5 results from the Wikipedia KILT index." ) inputs = ["text"] outputs = ["text"] def __call__(self, query: str): return search(query)