Spaces:

Ahmadkhan12
/

rag-movies-app

Runtime error

App Files Files Community

rag-movies-app / app.py

Ahmadkhan12

Create app.py

aadb485 verified 23 days ago

raw

history blame

2.56 kB

	import pandas as pd
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import streamlit as st

	# Load CSV data using a relative path
	csv_file = "Hydra-Movie-Scrape.csv" # Ensure this file is in the same directory as app.py
	df = pd.read_csv(csv_file)

	# Use 'Summary' or 'Short Summary' as the source for documents
	# Fill NaNs with "No summary available."
	df['Summary'] = df['Summary'].fillna("No summary available.")
	documents = df['Summary'].tolist() # Use 'Summary' for document embeddings

	# Initialize the SentenceTransformer model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Create and cache embeddings
	@st.cache_resource
	def create_embeddings(documents):
	embeddings = model.encode(documents, show_progress_bar=True)
	return embeddings

	# Generate and cache embeddings
	doc_embeddings = create_embeddings(documents)

	# Convert to NumPy array (FAISS requires float32)
	embedding_matrix = np.array(doc_embeddings).astype("float32")

	# Build FAISS index for efficient similarity search
	index = faiss.IndexFlatL2(embedding_matrix.shape[1])
	index.add(embedding_matrix)

	# Function to retrieve the most relevant documents
	def retrieve(query, top_k=10): # Retrieve up to 10 movies
	query_embedding = model.encode(query) # Encode the query
	query_vector = np.array(query_embedding).astype("float32")
	distances, indices = index.search(np.array([query_vector]), top_k)
	return indices[0]

	# Streamlit application layout
	st.title("Movie Dataset RAG Application")
	query = st.text_input("Ask a question about movies:")
	if st.button("Submit"):
	if query:
	indices = retrieve(query)

	# Prepare and display detailed responses
	response = ""
	for idx in indices:
	if idx != -1: # Check if the index is valid
	movie_details = df.iloc[idx]
	response += f"Title: {movie_details['Title']}\n"
	response += f"Year: {movie_details['Year']}\n"
	response += f"Director: {movie_details['Director']}\n"
	response += f"Cast: {movie_details['Cast']}\n"
	response += f"Summary: {movie_details['Summary']}\n\n"

	# Formatting the output with clearer separation
	if response:
	st.write("Here are some movies that match your query:\n")
	st.markdown(response) # Use markdown to format the output nicely
	else:
	st.write("No relevant documents found.")
	else:
	st.write("Please enter a query.")