Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- images/.DS_Store +0 -0
- images/basic RAG Pipeline.png +0 -0
- images/quiz RAG Pipeline.png +0 -0
- rag_quiz_app.py +78 -17
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
images/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
images/basic RAG Pipeline.png
ADDED
images/quiz RAG Pipeline.png
ADDED
rag_quiz_app.py
CHANGED
@@ -7,8 +7,10 @@ from PyPDF2 import PdfReader
|
|
7 |
from dotenv import load_dotenv, find_dotenv
|
8 |
import os
|
9 |
from langchain import LLMChain
|
|
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
from langchain.llms import OpenAI
|
|
|
12 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
13 |
|
14 |
_ = load_dotenv(find_dotenv()) # Load environment variables from .env file
|
@@ -20,7 +22,7 @@ llm = OpenAI(api_key=openai.api_key) # Initialize LangChain with OpenAI API key
|
|
20 |
def extract_text_and_create_chunks(pdf_path, chunk_size=512):
|
21 |
"""
|
22 |
Extracts text from a PDF and splits it into manageable chunks.
|
23 |
-
|
24 |
:param pdf_path: Path to the PDF file.
|
25 |
:param chunk_size: Number of characters per chunk.
|
26 |
:return: List of text chunks.
|
@@ -29,14 +31,14 @@ def extract_text_and_create_chunks(pdf_path, chunk_size=512):
|
|
29 |
reader = PdfReader(pdf_path)
|
30 |
for page in reader.pages:
|
31 |
text += page.extract_text() if page.extract_text() else ''
|
32 |
-
|
33 |
text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
34 |
return text_chunks
|
35 |
|
36 |
def index_text_chunks(text_chunks):
|
37 |
"""
|
38 |
Indexes text chunks using FAISS to facilitate efficient similarity searches.
|
39 |
-
|
40 |
:param text_chunks: List of text chunks to index.
|
41 |
:return: FAISS index and the vectorizer used for feature extraction.
|
42 |
"""
|
@@ -46,25 +48,86 @@ def index_text_chunks(text_chunks):
|
|
46 |
index.add(vectors)
|
47 |
return index, vectorizer
|
48 |
|
|
|
|
|
|
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
"""
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
prompt = PromptTemplate(
|
58 |
input_variables=["content"],
|
59 |
-
template="
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
chain = LLMChain(prompt=prompt, llm=llm)
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def generate_explanation(question, correct_answer):
|
65 |
"""
|
66 |
Generates an explanation for the provided question and correct answer.
|
67 |
-
|
68 |
:param question: The question for which to generate an explanation.
|
69 |
:param correct_answer: The correct answer to the question.
|
70 |
:return: Generated explanation as a string.
|
@@ -79,14 +142,11 @@ def generate_explanation(question, correct_answer):
|
|
79 |
def generate_question(content):
|
80 |
"""
|
81 |
Generates a multiple-choice question along with options and the correct answer based on the content.
|
82 |
-
|
83 |
:param content: Text content to generate a question from.
|
84 |
:return: Tuple containing the question, options, correct answer, and explanation.
|
85 |
"""
|
86 |
-
question =
|
87 |
-
options = ["Option A", "Option B", "Option C", "Option D"]
|
88 |
-
correct_answer = random.choice(options)
|
89 |
-
explanation = generate_explanation(question, correct_answer)
|
90 |
return question, options, correct_answer, explanation
|
91 |
|
92 |
def check_answer(user_answer, correct_answer, explanation, score, count):
|
@@ -255,4 +315,5 @@ with gr.Blocks() as demo:
|
|
255 |
)
|
256 |
submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
|
257 |
|
|
|
258 |
demo.launch(share=True)
|
|
|
7 |
from dotenv import load_dotenv, find_dotenv
|
8 |
import os
|
9 |
from langchain import LLMChain
|
10 |
+
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
from langchain.llms import OpenAI
|
13 |
+
import json
|
14 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
15 |
|
16 |
_ = load_dotenv(find_dotenv()) # Load environment variables from .env file
|
|
|
22 |
def extract_text_and_create_chunks(pdf_path, chunk_size=512):
|
23 |
"""
|
24 |
Extracts text from a PDF and splits it into manageable chunks.
|
25 |
+
|
26 |
:param pdf_path: Path to the PDF file.
|
27 |
:param chunk_size: Number of characters per chunk.
|
28 |
:return: List of text chunks.
|
|
|
31 |
reader = PdfReader(pdf_path)
|
32 |
for page in reader.pages:
|
33 |
text += page.extract_text() if page.extract_text() else ''
|
34 |
+
|
35 |
text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
36 |
return text_chunks
|
37 |
|
38 |
def index_text_chunks(text_chunks):
|
39 |
"""
|
40 |
Indexes text chunks using FAISS to facilitate efficient similarity searches.
|
41 |
+
|
42 |
:param text_chunks: List of text chunks to index.
|
43 |
:return: FAISS index and the vectorizer used for feature extraction.
|
44 |
"""
|
|
|
48 |
index.add(vectors)
|
49 |
return index, vectorizer
|
50 |
|
51 |
+
def query_chunks(index, vectorizer, query, k=5):
|
52 |
+
"""
|
53 |
+
Queries indexed text chunks to find the most relevant ones based on the query.
|
54 |
|
55 |
+
:param index: FAISS index of text chunks.
|
56 |
+
:param vectorizer: TF-IDF vectorizer used for text chunks.
|
57 |
+
:param query: Query string to search for.
|
58 |
+
:param k: Number of results to return.
|
59 |
+
:return: Indices of the top k relevant chunks.
|
60 |
"""
|
61 |
+
query_vector = vectorizer.transform([query]).toarray()
|
62 |
+
distances, indices = index.search(query_vector, k)
|
63 |
+
return indices[0]
|
64 |
+
|
65 |
+
def generate_question_and_answer(content):
|
66 |
"""
|
67 |
+
Generates a multiple-choice question and the correct answer based on the provided content using LangChain.
|
68 |
+
|
69 |
+
:param content: Text content to base the question and answer on.
|
70 |
+
:return: Generated question, options, correct answer, and explanation.
|
71 |
+
"""
|
72 |
+
# Define the response schema
|
73 |
+
response_schemas = [
|
74 |
+
ResponseSchema(name="question", description="The multiple-choice question"),
|
75 |
+
ResponseSchema(name="option_a", description="Option A for the question"),
|
76 |
+
ResponseSchema(name="option_b", description="Option B for the question"),
|
77 |
+
ResponseSchema(name="option_c", description="Option C for the question"),
|
78 |
+
ResponseSchema(name="option_d", description="Option D for the question"),
|
79 |
+
ResponseSchema(name="correct_answer", description="The correct answer for the question which should be one of the multiple-choice question"),
|
80 |
+
]
|
81 |
+
|
82 |
+
# Create the output parser
|
83 |
+
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
|
84 |
+
|
85 |
+
# Define the prompt template
|
86 |
prompt = PromptTemplate(
|
87 |
input_variables=["content"],
|
88 |
+
template="""
|
89 |
+
Generate a multiple-choice question based on the following content.
|
90 |
+
|
91 |
+
Content: {content}
|
92 |
+
|
93 |
+
{format_instructions}
|
94 |
+
""",
|
95 |
+
partial_variables={"format_instructions": output_parser.get_format_instructions()},
|
96 |
+
)
|
97 |
+
|
98 |
+
# Create the LLMChain
|
99 |
chain = LLMChain(prompt=prompt, llm=llm)
|
100 |
+
|
101 |
+
# Run the chain with the content
|
102 |
+
result = chain.run(content)
|
103 |
+
|
104 |
+
# Parse the output using the JSON output parser
|
105 |
+
result = output_parser.parse(result)
|
106 |
+
|
107 |
+
question = result.get("question")
|
108 |
+
|
109 |
+
correct_answer = result.get("correct_answer") # This should be one of 'A', 'B', 'C', 'D'
|
110 |
+
options = ["Option A", "Option B", "Option C", "Option D"]
|
111 |
+
|
112 |
+
answers = [result.get("option_a"),
|
113 |
+
result.get("option_b"),
|
114 |
+
result.get("option_c"),
|
115 |
+
result.get("option_d")
|
116 |
+
]
|
117 |
+
|
118 |
+
pre_answer = ['A) ', 'B) ', 'C) ', 'D)']
|
119 |
+
options_answers = zip(options, answers)
|
120 |
+
correct_answer = [option for option, answer in options_answers if answer == correct_answer][0]
|
121 |
+
|
122 |
+
random.shuffle(options)
|
123 |
+
explanation = generate_explanation(question, correct_answer)
|
124 |
+
question = question + '\n' + " ".join([pre + " " + answer for pre, answer in zip(pre_answer, answers)])
|
125 |
+
return question, options, correct_answer, explanation
|
126 |
|
127 |
def generate_explanation(question, correct_answer):
|
128 |
"""
|
129 |
Generates an explanation for the provided question and correct answer.
|
130 |
+
|
131 |
:param question: The question for which to generate an explanation.
|
132 |
:param correct_answer: The correct answer to the question.
|
133 |
:return: Generated explanation as a string.
|
|
|
142 |
def generate_question(content):
|
143 |
"""
|
144 |
Generates a multiple-choice question along with options and the correct answer based on the content.
|
145 |
+
|
146 |
:param content: Text content to generate a question from.
|
147 |
:return: Tuple containing the question, options, correct answer, and explanation.
|
148 |
"""
|
149 |
+
question, options, correct_answer, explanation = generate_question_and_answer(content)
|
|
|
|
|
|
|
150 |
return question, options, correct_answer, explanation
|
151 |
|
152 |
def check_answer(user_answer, correct_answer, explanation, score, count):
|
|
|
315 |
)
|
316 |
submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
|
317 |
|
318 |
+
|
319 |
demo.launch(share=True)
|