Spaces:
Runtime error
Runtime error
import ir_datasets | |
import pandas as pd | |
from autogluon.multimodal import MultiModalPredictor | |
dataset = ir_datasets.load("beir/fiqa/dev") | |
dataset = ir_datasets.load("beir/fiqa/dev") | |
docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.0001) | |
query_df = pd.DataFrame(dataset.queries_iter()).set_index("query_id") | |
model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
predictor = MultiModalPredictor( | |
pipeline="feature_extraction", | |
hyperparameters={ | |
"model.hf_text.checkpoint_name": model_name | |
} | |
) | |
document_embedding = predictor.extract_embedding(docs_df) | |
query = "What happened when the dot com bubble burst?" | |
query_embedding = predictor.extract_embedding([query]) | |
import numpy as np | |
q_norm = query_embedding['0'] / np.linalg.norm(query_embedding['0'], axis=-1, keepdims=True) | |
d_norm = document_embedding['text'] / np.linalg.norm(document_embedding['text'], axis=-1, keepdims=True) | |
scores = d_norm.dot(q_norm[0]) | |
print(f'Question: {query}') | |
print() | |
for idx in np.argsort(-scores)[:2]: | |
print(f'Top {idx} result:') | |
print('-----------------') | |
print(docs_df['text'].iloc[idx]) | |
print() |