Spaces:
Runtime error
Runtime error
File size: 1,152 Bytes
5e17fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import ir_datasets
import pandas as pd
from autogluon.multimodal import MultiModalPredictor
dataset = ir_datasets.load("beir/fiqa/dev")
dataset = ir_datasets.load("beir/fiqa/dev")
docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.0001)
query_df = pd.DataFrame(dataset.queries_iter()).set_index("query_id")
model_name = "sentence-transformers/all-MiniLM-L6-v2"
predictor = MultiModalPredictor(
pipeline="feature_extraction",
hyperparameters={
"model.hf_text.checkpoint_name": model_name
}
)
document_embedding = predictor.extract_embedding(docs_df)
query = "What happened when the dot com bubble burst?"
query_embedding = predictor.extract_embedding([query])
import numpy as np
q_norm = query_embedding['0'] / np.linalg.norm(query_embedding['0'], axis=-1, keepdims=True)
d_norm = document_embedding['text'] / np.linalg.norm(document_embedding['text'], axis=-1, keepdims=True)
scores = d_norm.dot(q_norm[0])
print(f'Question: {query}')
print()
for idx in np.argsort(-scores)[:2]:
print(f'Top {idx} result:')
print('-----------------')
print(docs_df['text'].iloc[idx])
print() |