Pedro Gengo
Adding app
62338e3
import faiss
import gradio as gr
import numpy as np
import pandas as pd
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
DIM = 768
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
print("Model loaded successfully")
papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index_col=None, on_bad_lines='skip')
papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()]
print("Data loaded successfully")
with open('data/embeddings.npy', 'rb') as f:
embeddings = np.load(f)
index = faiss.IndexFlatL2(DIM)
index.add(embeddings)
print("Index loaded successfully")
def encode_query(query):
query_embeddings = model.encode([query], convert_to_tensor=True)
query_embeddings = F.layer_norm(query_embeddings, normalized_shape=(query_embeddings.shape[1],))
query_embeddings = query_embeddings[:, :DIM]
query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
return query_embeddings
def search_nearest_papers(query, k=5):
query_embeddings = encode_query(query)
D, I = index.search(query_embeddings, k)
return papers_df.iloc[I[0]][["Title", "arXiv_link"]]
demo = gr.Interface(
search_nearest_papers,
[
"text",
gr.Slider(1, 10, value=5),
],
gr.Dataframe(
headers=["Title", "PDF"],
),
title="CVPR 2024 Paper Search",
description="Semantic search over CPVR 2024 paper summary. This app was made using the data available on https://github.com/harpreetsahota204/CVPR-2024-Papers.",
)
if __name__ == "__main__":
demo.launch()