FredBML commited on
Commit
11342a6
1 Parent(s): 31d2008

Upload folder using huggingface_hub

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
images/.DS_Store ADDED
Binary file (6.15 kB). View file
 
images/basic RAG Pipeline.png ADDED
images/quiz RAG Pipeline.png ADDED
rag_quiz_app.py CHANGED
@@ -7,8 +7,10 @@ from PyPDF2 import PdfReader
7
  from dotenv import load_dotenv, find_dotenv
8
  import os
9
  from langchain import LLMChain
 
10
  from langchain.prompts import PromptTemplate
11
  from langchain.llms import OpenAI
 
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
 
14
  _ = load_dotenv(find_dotenv()) # Load environment variables from .env file
@@ -20,7 +22,7 @@ llm = OpenAI(api_key=openai.api_key) # Initialize LangChain with OpenAI API key
20
  def extract_text_and_create_chunks(pdf_path, chunk_size=512):
21
  """
22
  Extracts text from a PDF and splits it into manageable chunks.
23
-
24
  :param pdf_path: Path to the PDF file.
25
  :param chunk_size: Number of characters per chunk.
26
  :return: List of text chunks.
@@ -29,14 +31,14 @@ def extract_text_and_create_chunks(pdf_path, chunk_size=512):
29
  reader = PdfReader(pdf_path)
30
  for page in reader.pages:
31
  text += page.extract_text() if page.extract_text() else ''
32
-
33
  text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
34
  return text_chunks
35
 
36
  def index_text_chunks(text_chunks):
37
  """
38
  Indexes text chunks using FAISS to facilitate efficient similarity searches.
39
-
40
  :param text_chunks: List of text chunks to index.
41
  :return: FAISS index and the vectorizer used for feature extraction.
42
  """
@@ -46,25 +48,86 @@ def index_text_chunks(text_chunks):
46
  index.add(vectors)
47
  return index, vectorizer
48
 
 
 
 
49
 
50
- def generate_question_from_content(content):
 
 
 
 
51
  """
52
- Generates a question based on the provided content using LangChain.
53
-
54
- :param content: Text content to base the question on.
55
- :return: Generated question as a string.
 
56
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  prompt = PromptTemplate(
58
  input_variables=["content"],
59
- template="Generate a multiple-choice question based on the following content:\n\n{content}\n\nQuestion:"
60
- )
 
 
 
 
 
 
 
 
 
61
  chain = LLMChain(prompt=prompt, llm=llm)
62
- return chain.run(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def generate_explanation(question, correct_answer):
65
  """
66
  Generates an explanation for the provided question and correct answer.
67
-
68
  :param question: The question for which to generate an explanation.
69
  :param correct_answer: The correct answer to the question.
70
  :return: Generated explanation as a string.
@@ -79,14 +142,11 @@ def generate_explanation(question, correct_answer):
79
  def generate_question(content):
80
  """
81
  Generates a multiple-choice question along with options and the correct answer based on the content.
82
-
83
  :param content: Text content to generate a question from.
84
  :return: Tuple containing the question, options, correct answer, and explanation.
85
  """
86
- question = generate_question_from_content(content)
87
- options = ["Option A", "Option B", "Option C", "Option D"]
88
- correct_answer = random.choice(options)
89
- explanation = generate_explanation(question, correct_answer)
90
  return question, options, correct_answer, explanation
91
 
92
  def check_answer(user_answer, correct_answer, explanation, score, count):
@@ -255,4 +315,5 @@ with gr.Blocks() as demo:
255
  )
256
  submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
257
 
 
258
  demo.launch(share=True)
 
7
  from dotenv import load_dotenv, find_dotenv
8
  import os
9
  from langchain import LLMChain
10
+ from langchain.output_parsers import StructuredOutputParser, ResponseSchema
11
  from langchain.prompts import PromptTemplate
12
  from langchain.llms import OpenAI
13
+ import json
14
  from sklearn.feature_extraction.text import TfidfVectorizer
15
 
16
  _ = load_dotenv(find_dotenv()) # Load environment variables from .env file
 
22
  def extract_text_and_create_chunks(pdf_path, chunk_size=512):
23
  """
24
  Extracts text from a PDF and splits it into manageable chunks.
25
+
26
  :param pdf_path: Path to the PDF file.
27
  :param chunk_size: Number of characters per chunk.
28
  :return: List of text chunks.
 
31
  reader = PdfReader(pdf_path)
32
  for page in reader.pages:
33
  text += page.extract_text() if page.extract_text() else ''
34
+
35
  text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
36
  return text_chunks
37
 
38
  def index_text_chunks(text_chunks):
39
  """
40
  Indexes text chunks using FAISS to facilitate efficient similarity searches.
41
+
42
  :param text_chunks: List of text chunks to index.
43
  :return: FAISS index and the vectorizer used for feature extraction.
44
  """
 
48
  index.add(vectors)
49
  return index, vectorizer
50
 
51
+ def query_chunks(index, vectorizer, query, k=5):
52
+ """
53
+ Queries indexed text chunks to find the most relevant ones based on the query.
54
 
55
+ :param index: FAISS index of text chunks.
56
+ :param vectorizer: TF-IDF vectorizer used for text chunks.
57
+ :param query: Query string to search for.
58
+ :param k: Number of results to return.
59
+ :return: Indices of the top k relevant chunks.
60
  """
61
+ query_vector = vectorizer.transform([query]).toarray()
62
+ distances, indices = index.search(query_vector, k)
63
+ return indices[0]
64
+
65
+ def generate_question_and_answer(content):
66
  """
67
+ Generates a multiple-choice question and the correct answer based on the provided content using LangChain.
68
+
69
+ :param content: Text content to base the question and answer on.
70
+ :return: Generated question, options, correct answer, and explanation.
71
+ """
72
+ # Define the response schema
73
+ response_schemas = [
74
+ ResponseSchema(name="question", description="The multiple-choice question"),
75
+ ResponseSchema(name="option_a", description="Option A for the question"),
76
+ ResponseSchema(name="option_b", description="Option B for the question"),
77
+ ResponseSchema(name="option_c", description="Option C for the question"),
78
+ ResponseSchema(name="option_d", description="Option D for the question"),
79
+ ResponseSchema(name="correct_answer", description="The correct answer for the question which should be one of the multiple-choice question"),
80
+ ]
81
+
82
+ # Create the output parser
83
+ output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
84
+
85
+ # Define the prompt template
86
  prompt = PromptTemplate(
87
  input_variables=["content"],
88
+ template="""
89
+ Generate a multiple-choice question based on the following content.
90
+
91
+ Content: {content}
92
+
93
+ {format_instructions}
94
+ """,
95
+ partial_variables={"format_instructions": output_parser.get_format_instructions()},
96
+ )
97
+
98
+ # Create the LLMChain
99
  chain = LLMChain(prompt=prompt, llm=llm)
100
+
101
+ # Run the chain with the content
102
+ result = chain.run(content)
103
+
104
+ # Parse the output using the JSON output parser
105
+ result = output_parser.parse(result)
106
+
107
+ question = result.get("question")
108
+
109
+ correct_answer = result.get("correct_answer") # This should be one of 'A', 'B', 'C', 'D'
110
+ options = ["Option A", "Option B", "Option C", "Option D"]
111
+
112
+ answers = [result.get("option_a"),
113
+ result.get("option_b"),
114
+ result.get("option_c"),
115
+ result.get("option_d")
116
+ ]
117
+
118
+ pre_answer = ['A) ', 'B) ', 'C) ', 'D)']
119
+ options_answers = zip(options, answers)
120
+ correct_answer = [option for option, answer in options_answers if answer == correct_answer][0]
121
+
122
+ random.shuffle(options)
123
+ explanation = generate_explanation(question, correct_answer)
124
+ question = question + '\n' + " ".join([pre + " " + answer for pre, answer in zip(pre_answer, answers)])
125
+ return question, options, correct_answer, explanation
126
 
127
  def generate_explanation(question, correct_answer):
128
  """
129
  Generates an explanation for the provided question and correct answer.
130
+
131
  :param question: The question for which to generate an explanation.
132
  :param correct_answer: The correct answer to the question.
133
  :return: Generated explanation as a string.
 
142
  def generate_question(content):
143
  """
144
  Generates a multiple-choice question along with options and the correct answer based on the content.
145
+
146
  :param content: Text content to generate a question from.
147
  :return: Tuple containing the question, options, correct answer, and explanation.
148
  """
149
+ question, options, correct_answer, explanation = generate_question_and_answer(content)
 
 
 
150
  return question, options, correct_answer, explanation
151
 
152
  def check_answer(user_answer, correct_answer, explanation, score, count):
 
315
  )
316
  submit_btn.click(lambda q, o: (gr.update(value=q), gr.update(choices=o)), inputs=[question_label, options_state], outputs=[question_label, answer_radio])
317
 
318
+
319
  demo.launch(share=True)