Spaces:

FredBML
/

rag_quiz_app.py

Running

App Files Files Community

FredBML commited on May 30

Commit

11342a6

•

1 Parent(s): 31d2008

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.DS_Store +0 -0
images/.DS_Store +0 -0
images/basic RAG Pipeline.png +0 -0
images/quiz RAG Pipeline.png +0 -0
rag_quiz_app.py +78 -17

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

images/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

images/basic RAG Pipeline.png ADDED Viewed

images/quiz RAG Pipeline.png ADDED Viewed

rag_quiz_app.py CHANGED Viewed

@@ -7,8 +7,10 @@ from PyPDF2 import PdfReader
 from dotenv import load_dotenv, find_dotenv
 import os
 from langchain import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.llms import OpenAI
 from sklearn.feature_extraction.text import TfidfVectorizer
 _ = load_dotenv(find_dotenv())  # Load environment variables from .env file
@@ -20,7 +22,7 @@ llm = OpenAI(api_key=openai.api_key)  # Initialize LangChain with OpenAI API key
 def extract_text_and_create_chunks(pdf_path, chunk_size=512):
     """
     Extracts text from a PDF and splits it into manageable chunks.
     :param pdf_path: Path to the PDF file.
     :param chunk_size: Number of characters per chunk.
     :return: List of text chunks.
@@ -29,14 +31,14 @@ def extract_text_and_create_chunks(pdf_path, chunk_size=512):
     reader = PdfReader(pdf_path)
     for page in reader.pages:
         text += page.extract_text() if page.extract_text() else ''
     text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
     return text_chunks
 def index_text_chunks(text_chunks):
     """
     Indexes text chunks using FAISS to facilitate efficient similarity searches.
     :param text_chunks: List of text chunks to index.
     :return: FAISS index and the vectorizer used for feature extraction.
     """
@@ -46,25 +48,86 @@ def index_text_chunks(text_chunks):
     index.add(vectors)
     return index, vectorizer
-def generate_question_from_content(content):
     """
-    Generates a question based on the provided content using LangChain.
-    :param content: Text content to base the question on.
-    :return: Generated question as a string.
     """
     prompt = PromptTemplate(
         input_variables=["content"],
-        template="Generate a multiple-choice question based on the following content:\n\n{content}\n\nQuestion:"
-    )
     chain = LLMChain(prompt=prompt, llm=llm)
-    return chain.run(content)
 def generate_explanation(question, correct_answer):
     """
     Generates an explanation for the provided question and correct answer.
     :param question: The question for which to generate an explanation.
     :param correct_answer: The correct answer to the question.
     :return: Generated explanation as a string.
@@ -79,14 +142,11 @@ def generate_explanation(question, correct_answer):
 def generate_question(content):
     """
     Generates a multiple-choice question along with options and the correct answer based on the content.
     :param content: Text content to generate a question from.
     :return: Tuple containing the question, options, correct answer, and explanation.
     """
-    question = generate_question_from_content(content)
-    options = ["Option A", "Option B", "Option C", "Option D"]
-    correct_answer = random.choice(options)
-    explanation = generate_explanation(question, correct_answer)
     return question, options, correct_answer, explanation
 def check_answer(user_answer, correct_answer, explanation, score, count):
@@ -255,4 +315,5 @@ with gr.Blocks() as demo:
 )
     submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
     demo.launch(share=True)

 from dotenv import load_dotenv, find_dotenv
 import os
 from langchain import LLMChain
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
 from langchain.prompts import PromptTemplate
 from langchain.llms import OpenAI
+import json
 from sklearn.feature_extraction.text import TfidfVectorizer
 _ = load_dotenv(find_dotenv())  # Load environment variables from .env file
 def extract_text_and_create_chunks(pdf_path, chunk_size=512):
     """
     Extracts text from a PDF and splits it into manageable chunks.
     :param pdf_path: Path to the PDF file.
     :param chunk_size: Number of characters per chunk.
     :return: List of text chunks.
     reader = PdfReader(pdf_path)
     for page in reader.pages:
         text += page.extract_text() if page.extract_text() else ''
     text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
     return text_chunks
 def index_text_chunks(text_chunks):
     """
     Indexes text chunks using FAISS to facilitate efficient similarity searches.
     :param text_chunks: List of text chunks to index.
     :return: FAISS index and the vectorizer used for feature extraction.
     """
     index.add(vectors)
     return index, vectorizer
+def query_chunks(index, vectorizer, query, k=5):
+    """
+    Queries indexed text chunks to find the most relevant ones based on the query.
+    :param index: FAISS index of text chunks.
+    :param vectorizer: TF-IDF vectorizer used for text chunks.
+    :param query: Query string to search for.
+    :param k: Number of results to return.
+    :return: Indices of the top k relevant chunks.
     """
+    query_vector = vectorizer.transform([query]).toarray()
+    distances, indices = index.search(query_vector, k)
+    return indices[0]
+def generate_question_and_answer(content):
     """
+    Generates a multiple-choice question and the correct answer based on the provided content using LangChain.
+    :param content: Text content to base the question and answer on.
+    :return: Generated question, options, correct answer, and explanation.
+    """
+    # Define the response schema
+    response_schemas = [
+        ResponseSchema(name="question", description="The multiple-choice question"),
+        ResponseSchema(name="option_a", description="Option A for the question"),
+        ResponseSchema(name="option_b", description="Option B for the question"),
+        ResponseSchema(name="option_c", description="Option C for the question"),
+        ResponseSchema(name="option_d", description="Option D for the question"),
+        ResponseSchema(name="correct_answer", description="The correct answer for the question which should be one of the multiple-choice question"),
+    ]
+    # Create the output parser
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+    # Define the prompt template
     prompt = PromptTemplate(
         input_variables=["content"],
+        template="""
+            Generate a multiple-choice question based on the following content.
+            Content: {content}
+            {format_instructions}
+        """,
+        partial_variables={"format_instructions": output_parser.get_format_instructions()},
+    )
+    # Create the LLMChain
     chain = LLMChain(prompt=prompt, llm=llm)
+    # Run the chain with the content
+    result = chain.run(content)
+    # Parse the output using the JSON output parser
+    result = output_parser.parse(result)
+    question = result.get("question")
+    correct_answer = result.get("correct_answer")  # This should be one of 'A', 'B', 'C', 'D'
+    options = ["Option A", "Option B", "Option C", "Option D"]
+    answers = [result.get("option_a"),
+               result.get("option_b"),
+               result.get("option_c"),
+               result.get("option_d")
+              ]
+    pre_answer = ['A) ', 'B) ', 'C) ', 'D)']
+    options_answers = zip(options, answers)
+    correct_answer = [option for option, answer in options_answers if answer == correct_answer][0]
+    random.shuffle(options)
+    explanation = generate_explanation(question, correct_answer)
+    question = question + '\n' + " ".join([pre + " " + answer for  pre, answer  in zip(pre_answer, answers)])
+    return question, options, correct_answer, explanation
 def generate_explanation(question, correct_answer):
     """
     Generates an explanation for the provided question and correct answer.
     :param question: The question for which to generate an explanation.
     :param correct_answer: The correct answer to the question.
     :return: Generated explanation as a string.
 def generate_question(content):
     """
     Generates a multiple-choice question along with options and the correct answer based on the content.
     :param content: Text content to generate a question from.
     :return: Tuple containing the question, options, correct answer, and explanation.
     """
+    question, options, correct_answer, explanation = generate_question_and_answer(content)
     return question, options, correct_answer, explanation
 def check_answer(user_answer, correct_answer, explanation, score, count):
 )
     submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
     demo.launch(share=True)