notabaka commited on
Commit
011c6b2
1 Parent(s): dd5c5fa
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #document q&a app to run on hugging face space (not for automatic speech recognition)
2
+
3
+ import streamlit as st
4
+ import torch
5
+ from transformers import AutoModelForCTC
6
+ from transformers import AutoProcessor
7
+ import faiss
8
+ import numpy as np
9
+
10
+ # Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
11
+ embeddings_model = AutoModelForCTC.from_pretrained("Salesforce/SFR-Embedding-Mistral")
12
+ processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")
13
+
14
+ # Use streamlit to select one or more files (documents like pdf, word or excel)
15
+ uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
16
+
17
+ # Create an index for storing the embeddings
18
+ index = faiss.IndexFlatL2(768) # Assuming the embeddings have a dimension of 768
19
+
20
+ # Implement code to embed text from selected files in vector database using the text embeddings model
21
+ success = True # Assume success by default
22
+
23
+ for file in uploaded_files:
24
+ # Read the content of the file
25
+ text = file.read().decode("utf-8")
26
+
27
+ # Tokenize the text
28
+ inputs = processor(text, return_tensors="pt", padding="max_length", truncation=True)
29
+
30
+ # Get the embeddings
31
+ with torch.no_grad():
32
+ embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
33
+ # Add the embeddings to the index
34
+ try:
35
+ index.add(embeddings.numpy())
36
+ except Exception as e:
37
+ success = False # Set success to False if an exception occurs
38
+ st.write(f"Failed to add embeddings to the index: {e}")
39
+ break
40
+
41
+ if success:
42
+ st.write("Embeddings added to the index successfully")
43
+ else:
44
+ st.write("Operation failed")
45
+
46
+
47
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ librosa
5
+ numpy
6
+ soundfile
7
+ faiss