import faiss import gradio as gr import numpy as np import pandas as pd import torch.nn.functional as F from sentence_transformers import SentenceTransformer DIM = 768 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) print("Model loaded successfully") papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index_col=None, on_bad_lines='skip') papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()] print("Data loaded successfully") with open('data/embeddings.npy', 'rb') as f: embeddings = np.load(f) index = faiss.IndexFlatL2(DIM) index.add(embeddings) print("Index loaded successfully") def encode_query(query): query_embeddings = model.encode([query], convert_to_tensor=True) query_embeddings = F.layer_norm(query_embeddings, normalized_shape=(query_embeddings.shape[1],)) query_embeddings = query_embeddings[:, :DIM] query_embeddings = F.normalize(query_embeddings, p=2, dim=1) return query_embeddings def search_nearest_papers(query, k=5): query_embeddings = encode_query(query) D, I = index.search(query_embeddings, k) return papers_df.iloc[I[0]][["Title", "arXiv_link"]] demo = gr.Interface( search_nearest_papers, [ "text", gr.Slider(1, 10, value=5), ], gr.Dataframe( headers=["Title", "PDF"], ), title="CVPR 2024 Paper Search", description="Semantic search over CPVR 2024 paper summary. This app was made using the data available on https://github.com/harpreetsahota204/CVPR-2024-Papers.", ) if __name__ == "__main__": demo.launch()