Ask-Holy-Quran / app.py
defoxtrotalpha's picture
Update app.py
176f991 verified
from joblib import load
from transformers import pipeline
from pydantic import BaseModel, Field
import gradio as gr
import gdown
import os
import re
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
# File ID extracted from the Google Drive link
file_id = '1IPhL1XBJyw6WeAqv8hMYV9T25lqw2YMH'
# Create a direct download link
drive_url = f"https://drive.google.com/uc?export=download&id={file_id}"
destination = 'quran_verses.joblib'
# Download the file using gdown
gdown.download(drive_url, destination, quiet=False)
# Load quran_verses using joblib
quran_verses = load(destination)
# Initialize the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small")
import re
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
def process_verse(verse):
# Extract chapter and verse number using regex
match = re.match(r"(\d+)\|(\d+)\|(.+)", verse)
if match:
chapter_no = match.group(1)
verse_no = match.group(2)
verse_text = match.group(3).strip()
return chapter_no, verse_no, verse_text
else:
raise ValueError(f"Verse format is incorrect: {verse}")
# Convert each verse into a Document object with metadata
source_docs = []
for verse in quran_verses:
chapter_no, verse_no, verse_text = process_verse(verse)
source_docs.append(
Document(
page_content=verse_text,
metadata={"chapter": chapter_no, "verse": verse_no}
)
)
# Store the documents in a FAISS vector database
vectordb = FAISS.from_documents(documents=source_docs, embedding=embedding_model)
# Save the FAISS index for later use (optional)
vectordb.save_local("quran_verses_faiss_index")
import numpy as np
def query_faiss_index(query, vectordb, embedding_model, num_results):
# Perform the search in the FAISS index
results = vectordb.similarity_search(query, k=num_results)
formatted_results = []
for result in results:
verse_text = result.page_content
chapter_no = result.metadata.get('chapter')
verse_no = result.metadata.get('verse')
formatted_verse = f"Quran ({chapter_no}:{verse_no}) {verse_text}"
formatted_results.append(formatted_verse)
return formatted_results
def prepare_context(documents):
return "\n".join([doc for doc in documents])
def generate_response(query, context):
input_text = f"Context:\n{context}\n\nFrom the context given above, answer the following question concisely:\n{query}\nAnswer:"
pipe = pipeline("text-generation", model="openai-community/gpt2-xl", max_length=400, temperature=0.8)
output = pipe(input_text, max_new_tokens=100, truncation=True)
response = output[0]['generated_text'].split('\nAnswer:')[1].strip() if '\nAnswer:' in output[0]['generated_text'] else output[0]['generated_text'].strip()
return response
def rag_pipeline(query, vectordb, embedding_model, num_results):
retrieved_docs = query_faiss_index(query, vectordb, embedding_model, num_results)
context = prepare_context(retrieved_docs)
response = generate_response(query, context)
return response
def rag_pipeline_wrapper(query):
num_results = 3
return rag_pipeline(query, vectordb, embedding_model, num_results)
custom_css = """
body {background-color: #f5f5f5; font-family: 'Arial', sans-serif;}
h1 {color: #2c3e50;}
textarea, input {font-family: 'Arial', sans-serif; font-size: 16px;}
"""
iface = gr.Interface(
fn=rag_pipeline_wrapper,
inputs=gr.Textbox(
lines=2,
placeholder="Enter your question here...",
label="Ask a Question"
),
outputs=gr.Textbox(label="Answer"),
title="Ask the Quran",
description=(
"This application allows you to ask questions related to the Quran. "
"Simply enter your question, and the model will provide an answer based on the Quran's teachings."
),
theme="default",
css=custom_css,
)
iface.launch()