Spaces:
Sleeping
Sleeping
from joblib import load | |
from transformers import pipeline | |
from pydantic import BaseModel, Field | |
import gradio as gr | |
import gdown | |
import os | |
import re | |
from langchain.docstore.document import Document | |
from langchain.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
# File ID extracted from the Google Drive link | |
file_id = '1IPhL1XBJyw6WeAqv8hMYV9T25lqw2YMH' | |
# Create a direct download link | |
drive_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
destination = 'quran_verses.joblib' | |
# Download the file using gdown | |
gdown.download(drive_url, destination, quiet=False) | |
# Load quran_verses using joblib | |
quran_verses = load(destination) | |
# Initialize the embedding model | |
embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small") | |
import re | |
from langchain.docstore.document import Document | |
from langchain.vectorstores import FAISS | |
def process_verse(verse): | |
# Extract chapter and verse number using regex | |
match = re.match(r"(\d+)\|(\d+)\|(.+)", verse) | |
if match: | |
chapter_no = match.group(1) | |
verse_no = match.group(2) | |
verse_text = match.group(3).strip() | |
return chapter_no, verse_no, verse_text | |
else: | |
raise ValueError(f"Verse format is incorrect: {verse}") | |
# Convert each verse into a Document object with metadata | |
source_docs = [] | |
for verse in quran_verses: | |
chapter_no, verse_no, verse_text = process_verse(verse) | |
source_docs.append( | |
Document( | |
page_content=verse_text, | |
metadata={"chapter": chapter_no, "verse": verse_no} | |
) | |
) | |
# Store the documents in a FAISS vector database | |
vectordb = FAISS.from_documents(documents=source_docs, embedding=embedding_model) | |
# Save the FAISS index for later use (optional) | |
vectordb.save_local("quran_verses_faiss_index") | |
import numpy as np | |
def query_faiss_index(query, vectordb, embedding_model, num_results): | |
# Perform the search in the FAISS index | |
results = vectordb.similarity_search(query, k=num_results) | |
formatted_results = [] | |
for result in results: | |
verse_text = result.page_content | |
chapter_no = result.metadata.get('chapter') | |
verse_no = result.metadata.get('verse') | |
formatted_verse = f"Quran ({chapter_no}:{verse_no}) {verse_text}" | |
formatted_results.append(formatted_verse) | |
return formatted_results | |
def prepare_context(documents): | |
return "\n".join([doc for doc in documents]) | |
def generate_response(query, context): | |
input_text = f"Context:\n{context}\n\nFrom the context given above, answer the following question concisely:\n{query}\nAnswer:" | |
pipe = pipeline("text-generation", model="openai-community/gpt2-xl", max_length=400, temperature=0.8) | |
output = pipe(input_text, max_new_tokens=100, truncation=True) | |
response = output[0]['generated_text'].split('\nAnswer:')[1].strip() if '\nAnswer:' in output[0]['generated_text'] else output[0]['generated_text'].strip() | |
return response | |
def rag_pipeline(query, vectordb, embedding_model, num_results): | |
retrieved_docs = query_faiss_index(query, vectordb, embedding_model, num_results) | |
context = prepare_context(retrieved_docs) | |
response = generate_response(query, context) | |
return response | |
def rag_pipeline_wrapper(query): | |
num_results = 3 | |
return rag_pipeline(query, vectordb, embedding_model, num_results) | |
custom_css = """ | |
body {background-color: #f5f5f5; font-family: 'Arial', sans-serif;} | |
h1 {color: #2c3e50;} | |
textarea, input {font-family: 'Arial', sans-serif; font-size: 16px;} | |
""" | |
iface = gr.Interface( | |
fn=rag_pipeline_wrapper, | |
inputs=gr.Textbox( | |
lines=2, | |
placeholder="Enter your question here...", | |
label="Ask a Question" | |
), | |
outputs=gr.Textbox(label="Answer"), | |
title="Ask the Quran", | |
description=( | |
"This application allows you to ask questions related to the Quran. " | |
"Simply enter your question, and the model will provide an answer based on the Quran's teachings." | |
), | |
theme="default", | |
css=custom_css, | |
) | |
iface.launch() |