import gradio as gr import ir_datasets import pandas as pd from autogluon.multimodal import MultiModalPredictor def text_embedding(query: str): model_name = "sentence-transformers/all-MiniLM-L6-v2" # dataset = ir_datasets.load("beir/fiqa/dev") # docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.001) predictor = MultiModalPredictor( pipeline="feature_extraction", hyperparameters={ "model.hf_text.checkpoint_name": model_name } ) # query_embedding = predictor.extract_embedding(docs_df) # return query_embedding["text"] query_embedding = predictor.extract_embedding([query]) return query_embedding["0"] def main(): with gr.Blocks(title="OpenSearch Demo") as demo: gr.Markdown("# Text Embedding for Search Queries") gr.Markdown("Ask an open question!") with gr.Row(): inp_single = gr.Textbox(show_label=False) with gr.Row(): btn_single = gr.Button("Generate Embedding") with gr.Row(): out_single = gr.DataFrame(label="Embedding", show_label=True) gr.Markdown("You can select one of the sample datasets for batch inference") with gr.Row(): with gr.Column(): btn_fiqa = gr.Button("fiqa") with gr.Column(): btn_faiss = gr.Button("faiss") with gr.Row(): out_batch = gr.DataFrame(label="Embedding", show_label=True) gr.Markdown("You can also try out our batch inference by uploading a file") with gr.Row(): out_batch = gr.File(interactive=True) with gr.Row(): btn_file = gr.Button("Generate Embedding") btn_single.click(fn=text_embedding, inputs=inp_single, outputs=out_single) btn_file.click(fn=text_embedding, inputs=inp_single, outputs=out_single) demo.launch() if __name__ == "__main__": main()