Spaces:

notabaka
/

ASRtest

Runtime error

App Files Files Community

ASRtest / app.py

notabaka

tst

011c6b2 9 months ago

raw

history blame

1.71 kB

	#document q&a app to run on hugging face space (not for automatic speech recognition)

	import streamlit as st
	import torch
	from transformers import AutoModelForCTC
	from transformers import AutoProcessor
	import faiss
	import numpy as np

	# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
	embeddings_model = AutoModelForCTC.from_pretrained("Salesforce/SFR-Embedding-Mistral")
	processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")

	# Use streamlit to select one or more files (documents like pdf, word or excel)
	uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)

	# Create an index for storing the embeddings
	index = faiss.IndexFlatL2(768) # Assuming the embeddings have a dimension of 768

	# Implement code to embed text from selected files in vector database using the text embeddings model
	success = True # Assume success by default

	for file in uploaded_files:
	# Read the content of the file
	text = file.read().decode("utf-8")

	# Tokenize the text
	inputs = processor(text, return_tensors="pt", padding="max_length", truncation=True)

	# Get the embeddings
	with torch.no_grad():
	embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
	# Add the embeddings to the index
	try:
	index.add(embeddings.numpy())
	except Exception as e:
	success = False # Set success to False if an exception occurs
	st.write(f"Failed to add embeddings to the index: {e}")
	break

	if success:
	st.write("Embeddings added to the index successfully")
	else:
	st.write("Operation failed")