|
|
|
|
|
import streamlit as st |
|
import torch |
|
from transformers import AutoModelForCTC |
|
from transformers import AutoProcessor |
|
import faiss |
|
import numpy as np |
|
|
|
|
|
embeddings_model = AutoModelForCTC.from_pretrained("Salesforce/SFR-Embedding-Mistral") |
|
processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral") |
|
|
|
|
|
uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True) |
|
|
|
|
|
index = faiss.IndexFlatL2(768) |
|
|
|
|
|
success = True |
|
|
|
for file in uploaded_files: |
|
|
|
text = file.read().decode("utf-8") |
|
|
|
|
|
inputs = processor(text, return_tensors="pt", padding="max_length", truncation=True) |
|
|
|
|
|
with torch.no_grad(): |
|
embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1) |
|
|
|
try: |
|
index.add(embeddings.numpy()) |
|
except Exception as e: |
|
success = False |
|
st.write(f"Failed to add embeddings to the index: {e}") |
|
break |
|
|
|
if success: |
|
st.write("Embeddings added to the index successfully") |
|
else: |
|
st.write("Operation failed") |
|
|
|
|
|
|
|
|