notabaka commited on
Commit
24ec349
1 Parent(s): 17ac084
Files changed (2) hide show
  1. app.py +6 -5
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,7 +4,7 @@ import streamlit as st
4
  import torch
5
  from transformers import AutoModelForCTC
6
  from transformers import AutoProcessor
7
- import faiss
8
  import numpy as np
9
 
10
  # Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")
15
  uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
16
 
17
  # Create an index for storing the embeddings
18
- index = faiss.IndexFlatL2(768) # Assuming the embeddings have a dimension of 768
19
 
20
  # Implement code to embed text from selected files in vector database using the text embeddings model
21
  success = True # Assume success by default
@@ -32,11 +32,12 @@ for file in uploaded_files:
32
  embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
33
  # Add the embeddings to the index
34
  try:
35
- index.add(embeddings.numpy())
 
 
36
  except Exception as e:
37
- success = False # Set success to False if an exception occurs
38
  st.write(f"Failed to add embeddings to the index: {e}")
39
- break
40
 
41
  if success:
42
  st.write("Embeddings added to the index successfully")
 
4
  import torch
5
  from transformers import AutoModelForCTC
6
  from transformers import AutoProcessor
7
+ import annoy
8
  import numpy as np
9
 
10
  # Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
 
15
  uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
16
 
17
  # Create an index for storing the embeddings
18
+ index = annoy.AnnoyIndex(768, 'angular') # Assuming the embeddings have a dimension of 768
19
 
20
  # Implement code to embed text from selected files in vector database using the text embeddings model
21
  success = True # Assume success by default
 
32
  embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
33
  # Add the embeddings to the index
34
  try:
35
+ for i, emb in enumerate(embeddings.numpy()):
36
+ index.add_item(i, emb)
37
+ index.build(10) # 10 trees for building the index
38
  except Exception as e:
39
+ success = False
40
  st.write(f"Failed to add embeddings to the index: {e}")
 
41
 
42
  if success:
43
  st.write("Embeddings added to the index successfully")
requirements.txt CHANGED
@@ -3,4 +3,5 @@ torch
3
  transformers
4
  librosa
5
  numpy
6
- soundfile
 
 
3
  transformers
4
  librosa
5
  numpy
6
+ soundfile
7
+ annoy