Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,101 +1,127 @@
|
|
1 |
import streamlit as st
|
2 |
-
import requests
|
3 |
-
from openai import OpenAI
|
4 |
from youtube_transcript_api import YouTubeTranscriptApi
|
5 |
import re
|
6 |
import tempfile
|
7 |
import os
|
|
|
|
|
|
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large"
|
12 |
-
headers = {"Authorization": f"Bearer {api_key}"}
|
13 |
-
with open(file_path, "rb") as f:
|
14 |
-
data = f.read()
|
15 |
-
response = requests.post(API_URL, headers=headers, data=data)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
# Function to get YouTube
|
29 |
def get_transcript(url):
|
30 |
try:
|
31 |
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
|
32 |
-
if video_id_match:
|
33 |
-
video_id = video_id_match.group(1)
|
34 |
-
else:
|
35 |
return "Error: Invalid YouTube URL"
|
36 |
|
|
|
37 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
38 |
transcript_text = ' '.join([entry['text'] for entry in transcript])
|
39 |
return transcript_text
|
40 |
except Exception as e:
|
41 |
return str(e)
|
42 |
|
43 |
-
# Function to summarize text using
|
44 |
-
def summarize_text(
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def parse_quiz_questions(quiz_text):
|
69 |
questions = []
|
70 |
question_blocks = quiz_text.split("\n\n")
|
|
|
|
|
|
|
|
|
71 |
for block in question_blocks:
|
72 |
lines = block.strip().split("\n")
|
73 |
-
if
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
return questions
|
83 |
|
84 |
-
# Function to generate explanation using
|
85 |
-
def generate_explanation(
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Function to check answers and provide feedback
|
98 |
-
def check_answers(
|
99 |
feedback = []
|
100 |
correct_count = 0
|
101 |
for i, question in enumerate(questions):
|
@@ -110,7 +136,7 @@ def check_answers(client, questions, user_answers):
|
|
110 |
})
|
111 |
correct_count += 1
|
112 |
else:
|
113 |
-
explanation = generate_explanation(
|
114 |
feedback.append({
|
115 |
"question": question['question'],
|
116 |
"user_answer": user_answer,
|
@@ -120,60 +146,63 @@ def check_answers(client, questions, user_answers):
|
|
120 |
})
|
121 |
return feedback
|
122 |
|
123 |
-
# Function to handle uploaded
|
124 |
def handle_uploaded_file(uploaded_file):
|
125 |
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
126 |
tmp_file.write(uploaded_file.read())
|
127 |
tmp_file_path = tmp_file.name
|
128 |
return tmp_file_path
|
129 |
|
130 |
-
# Streamlit
|
131 |
st.title("YouTube Transcript Quiz Generator")
|
|
|
132 |
|
133 |
-
st.markdown("**Instructions:** Enter your OpenAI and Hugging Face API keys, and paste a YouTube link or upload a media file to generate a quiz.")
|
134 |
-
|
135 |
-
openai_api_key = st.text_input("Enter your OpenAI API Key", type="password")
|
136 |
-
hf_api_key = st.text_input("Enter your Hugging Face API Key", type="password")
|
137 |
option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
|
138 |
|
139 |
-
if openai_api_key:
|
140 |
-
client = OpenAI(api_key=openai_api_key)
|
141 |
-
|
142 |
if "generated_quiz" not in st.session_state:
|
143 |
st.session_state.generated_quiz = False
|
144 |
|
145 |
if option == "YouTube URL":
|
146 |
url = st.text_input("YouTube URL", value="")
|
147 |
-
if
|
148 |
if st.button("Generate Quiz"):
|
149 |
transcript_text = get_transcript(url)
|
150 |
if "Error" not in transcript_text:
|
151 |
-
summary = summarize_text(
|
152 |
-
quiz_text = generate_quiz_questions(
|
153 |
questions = parse_quiz_questions(quiz_text)
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
if option == "Upload audio/video file":
|
161 |
uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
|
162 |
-
if uploaded_file
|
163 |
if st.button("Generate Quiz"):
|
164 |
tmp_file_path = handle_uploaded_file(uploaded_file)
|
165 |
-
|
166 |
-
transcript_text = transcribe_audio(hf_api_key, tmp_file_path)
|
167 |
os.remove(tmp_file_path)
|
168 |
if "Error" not in transcript_text:
|
169 |
-
summary = summarize_text(
|
170 |
-
quiz_text = generate_quiz_questions(
|
171 |
questions = parse_quiz_questions(quiz_text)
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
if st.session_state.generated_quiz:
|
179 |
st.write("## Summary")
|
@@ -192,7 +221,7 @@ if st.session_state.generated_quiz:
|
|
192 |
if st.button("Submit Answers"):
|
193 |
if "questions" in st.session_state and st.session_state.questions:
|
194 |
with st.spinner('Processing your answers...'):
|
195 |
-
feedback = check_answers(
|
196 |
st.write("## Feedback")
|
197 |
for i, item in enumerate(feedback):
|
198 |
with st.expander(f"Question {i+1} Feedback"):
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
from youtube_transcript_api import YouTubeTranscriptApi
|
3 |
import re
|
4 |
import tempfile
|
5 |
import os
|
6 |
+
import whisper
|
7 |
+
import warnings
|
8 |
+
from groq import Groq
|
9 |
|
10 |
+
# Suppress specific warning
|
11 |
+
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# Set up Groq client
|
14 |
+
client = Groq(
|
15 |
+
api_key=os.environ.get("GROQ_API_KEY"),
|
16 |
+
)
|
17 |
+
|
18 |
+
# Function to transcribe audio
|
19 |
+
def transcribe_audio(file_path):
|
20 |
+
model = whisper.load_model("base")
|
21 |
+
result = model.transcribe(file_path)
|
22 |
+
return result["text"]
|
23 |
+
|
24 |
+
# Function to get transcript from YouTube
|
25 |
def get_transcript(url):
|
26 |
try:
|
27 |
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
|
28 |
+
if not video_id_match:
|
|
|
|
|
29 |
return "Error: Invalid YouTube URL"
|
30 |
|
31 |
+
video_id = video_id_match.group(1)
|
32 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
33 |
transcript_text = ' '.join([entry['text'] for entry in transcript])
|
34 |
return transcript_text
|
35 |
except Exception as e:
|
36 |
return str(e)
|
37 |
|
38 |
+
# Function to summarize text using Groq API
|
39 |
+
def summarize_text(text):
|
40 |
+
try:
|
41 |
+
response = client.chat.completions.create(
|
42 |
+
messages=[
|
43 |
+
{
|
44 |
+
"role": "user",
|
45 |
+
"content": f"Summarize the following text:\n\n{text}"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
model="llama3-8b-8192",
|
49 |
+
)
|
50 |
+
summary = response.choices[0].message.content.strip()
|
51 |
+
return summary
|
52 |
+
except Exception as e:
|
53 |
+
return f"Error summarizing text: {e}"
|
54 |
+
|
55 |
+
# Function to generate quiz questions using Groq API
|
56 |
+
def generate_quiz_questions(text):
|
57 |
+
try:
|
58 |
+
response = client.chat.completions.create(
|
59 |
+
messages=[
|
60 |
+
{
|
61 |
+
"role": "user",
|
62 |
+
"content": f"Generate quiz questions for the following text:\n\n{text}"
|
63 |
+
}
|
64 |
+
],
|
65 |
+
model="llama3-8b-8192",
|
66 |
+
)
|
67 |
+
quiz_questions = response.choices[0].message.content.strip()
|
68 |
+
return quiz_questions
|
69 |
+
except Exception as e:
|
70 |
+
return f"Error generating quiz questions: {e}"
|
71 |
+
|
72 |
+
# Function to parse quiz questions from generated text
|
73 |
def parse_quiz_questions(quiz_text):
|
74 |
questions = []
|
75 |
question_blocks = quiz_text.split("\n\n")
|
76 |
+
current_question = None
|
77 |
+
current_choices = []
|
78 |
+
correct_answer = None
|
79 |
+
|
80 |
for block in question_blocks:
|
81 |
lines = block.strip().split("\n")
|
82 |
+
if lines:
|
83 |
+
if re.match(r'^\d+\.', lines[0]): # This line is a question number
|
84 |
+
if current_question and current_choices and correct_answer:
|
85 |
+
questions.append({
|
86 |
+
"question": current_question,
|
87 |
+
"choices": current_choices,
|
88 |
+
"correct_answer": correct_answer
|
89 |
+
})
|
90 |
+
current_question = lines[0]
|
91 |
+
current_choices = lines[1:5]
|
92 |
+
correct_answer = lines[-1].split(": ")[-1].strip() if len(lines) > 5 else None
|
93 |
+
else: # This line is an answer
|
94 |
+
correct_answer = lines[-1].split(": ")[-1].strip()
|
95 |
+
|
96 |
+
# Add the last question if it exists
|
97 |
+
if current_question and current_choices and correct_answer:
|
98 |
+
questions.append({
|
99 |
+
"question": current_question,
|
100 |
+
"choices": current_choices,
|
101 |
+
"correct_answer": correct_answer
|
102 |
+
})
|
103 |
+
|
104 |
return questions
|
105 |
|
106 |
+
# Function to generate explanation for quiz answers using Groq API
|
107 |
+
def generate_explanation(question, correct_answer, user_answer):
|
108 |
+
try:
|
109 |
+
response = client.chat.completions.create(
|
110 |
+
messages=[
|
111 |
+
{
|
112 |
+
"role": "user",
|
113 |
+
"content": f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
|
114 |
+
}
|
115 |
+
],
|
116 |
+
model="llama3-8b-8192",
|
117 |
+
)
|
118 |
+
explanation = response.choices[0].message.content.strip()
|
119 |
+
return explanation
|
120 |
+
except Exception as e:
|
121 |
+
return f"Error generating explanation: {e}"
|
122 |
|
123 |
# Function to check answers and provide feedback
|
124 |
+
def check_answers(questions, user_answers):
|
125 |
feedback = []
|
126 |
correct_count = 0
|
127 |
for i, question in enumerate(questions):
|
|
|
136 |
})
|
137 |
correct_count += 1
|
138 |
else:
|
139 |
+
explanation = generate_explanation(question['question'], correct_answer, user_answer)
|
140 |
feedback.append({
|
141 |
"question": question['question'],
|
142 |
"user_answer": user_answer,
|
|
|
146 |
})
|
147 |
return feedback
|
148 |
|
149 |
+
# Function to handle uploaded files
|
150 |
def handle_uploaded_file(uploaded_file):
|
151 |
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
152 |
tmp_file.write(uploaded_file.read())
|
153 |
tmp_file_path = tmp_file.name
|
154 |
return tmp_file_path
|
155 |
|
156 |
+
# Streamlit app layout and functionality
|
157 |
st.title("YouTube Transcript Quiz Generator")
|
158 |
+
st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
|
159 |
|
|
|
|
|
|
|
|
|
160 |
option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
|
161 |
|
|
|
|
|
|
|
162 |
if "generated_quiz" not in st.session_state:
|
163 |
st.session_state.generated_quiz = False
|
164 |
|
165 |
if option == "YouTube URL":
|
166 |
url = st.text_input("YouTube URL", value="")
|
167 |
+
if url:
|
168 |
if st.button("Generate Quiz"):
|
169 |
transcript_text = get_transcript(url)
|
170 |
if "Error" not in transcript_text:
|
171 |
+
summary = summarize_text(transcript_text)
|
172 |
+
quiz_text = generate_quiz_questions(transcript_text)
|
173 |
questions = parse_quiz_questions(quiz_text)
|
174 |
|
175 |
+
if not questions:
|
176 |
+
st.error("No valid quiz questions could be generated.")
|
177 |
+
else:
|
178 |
+
st.session_state.summary = summary
|
179 |
+
st.session_state.questions = questions
|
180 |
+
st.session_state.user_answers = {}
|
181 |
+
st.session_state.generated_quiz = True
|
182 |
+
else:
|
183 |
+
st.error(transcript_text)
|
184 |
|
185 |
if option == "Upload audio/video file":
|
186 |
uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
|
187 |
+
if uploaded_file:
|
188 |
if st.button("Generate Quiz"):
|
189 |
tmp_file_path = handle_uploaded_file(uploaded_file)
|
190 |
+
transcript_text = transcribe_audio(tmp_file_path)
|
|
|
191 |
os.remove(tmp_file_path)
|
192 |
if "Error" not in transcript_text:
|
193 |
+
summary = summarize_text(transcript_text)
|
194 |
+
quiz_text = generate_quiz_questions(transcript_text)
|
195 |
questions = parse_quiz_questions(quiz_text)
|
196 |
|
197 |
+
if not questions:
|
198 |
+
st.error("No valid quiz questions could be generated.")
|
199 |
+
else:
|
200 |
+
st.session_state.summary = summary
|
201 |
+
st.session_state.questions = questions
|
202 |
+
st.session_state.user_answers = {}
|
203 |
+
st.session_state.generated_quiz = True
|
204 |
+
else:
|
205 |
+
st.error(transcript_text)
|
206 |
|
207 |
if st.session_state.generated_quiz:
|
208 |
st.write("## Summary")
|
|
|
221 |
if st.button("Submit Answers"):
|
222 |
if "questions" in st.session_state and st.session_state.questions:
|
223 |
with st.spinner('Processing your answers...'):
|
224 |
+
feedback = check_answers(st.session_state.questions, st.session_state.user_answers)
|
225 |
st.write("## Feedback")
|
226 |
for i, item in enumerate(feedback):
|
227 |
with st.expander(f"Question {i+1} Feedback"):
|