ttss
Browse files- app.py +6 -5
- requirements.txt +2 -1
app.py
CHANGED
@@ -4,7 +4,7 @@ import streamlit as st
|
|
4 |
import torch
|
5 |
from transformers import AutoModelForCTC
|
6 |
from transformers import AutoProcessor
|
7 |
-
import
|
8 |
import numpy as np
|
9 |
|
10 |
# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
|
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")
|
|
15 |
uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
|
16 |
|
17 |
# Create an index for storing the embeddings
|
18 |
-
index =
|
19 |
|
20 |
# Implement code to embed text from selected files in vector database using the text embeddings model
|
21 |
success = True # Assume success by default
|
@@ -32,11 +32,12 @@ for file in uploaded_files:
|
|
32 |
embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
|
33 |
# Add the embeddings to the index
|
34 |
try:
|
35 |
-
|
|
|
|
|
36 |
except Exception as e:
|
37 |
-
success = False
|
38 |
st.write(f"Failed to add embeddings to the index: {e}")
|
39 |
-
break
|
40 |
|
41 |
if success:
|
42 |
st.write("Embeddings added to the index successfully")
|
|
|
4 |
import torch
|
5 |
from transformers import AutoModelForCTC
|
6 |
from transformers import AutoProcessor
|
7 |
+
import annoy
|
8 |
import numpy as np
|
9 |
|
10 |
# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
|
|
|
15 |
uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
|
16 |
|
17 |
# Create an index for storing the embeddings
|
18 |
+
index = annoy.AnnoyIndex(768, 'angular') # Assuming the embeddings have a dimension of 768
|
19 |
|
20 |
# Implement code to embed text from selected files in vector database using the text embeddings model
|
21 |
success = True # Assume success by default
|
|
|
32 |
embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
|
33 |
# Add the embeddings to the index
|
34 |
try:
|
35 |
+
for i, emb in enumerate(embeddings.numpy()):
|
36 |
+
index.add_item(i, emb)
|
37 |
+
index.build(10) # 10 trees for building the index
|
38 |
except Exception as e:
|
39 |
+
success = False
|
40 |
st.write(f"Failed to add embeddings to the index: {e}")
|
|
|
41 |
|
42 |
if success:
|
43 |
st.write("Embeddings added to the index successfully")
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ torch
|
|
3 |
transformers
|
4 |
librosa
|
5 |
numpy
|
6 |
-
soundfile
|
|
|
|
3 |
transformers
|
4 |
librosa
|
5 |
numpy
|
6 |
+
soundfile
|
7 |
+
annoy
|