Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -235,8 +235,8 @@ Follow this example structure:
|
|
235 |
|
236 |
return json.loads(response.choices[0].message.content)
|
237 |
|
238 |
-
async def tts_generate(self, text: str, speaker: int) -> str:
|
239 |
-
voice =
|
240 |
speech = edge_tts.Communicate(text, voice)
|
241 |
|
242 |
temp_filename = f"temp_{uuid.uuid4()}.wav"
|
@@ -258,10 +258,10 @@ Follow this example structure:
|
|
258 |
combined_audio.export(output_filename, format="wav")
|
259 |
return output_filename
|
260 |
|
261 |
-
async def generate_podcast(self, input_text: str, language: str) -> str:
|
262 |
podcast_json = await self.generate_script(input_text, language)
|
263 |
print(f"Generated podcast script:\n{podcast_json}")
|
264 |
-
audio_files = await asyncio.gather(*[self.tts_generate(item['line'], item['speaker']) for item in podcast_json['podcast']])
|
265 |
combined_audio = await self.combine_audio_files(audio_files)
|
266 |
return combined_audio
|
267 |
|
@@ -288,12 +288,16 @@ class TextExtractor:
|
|
288 |
else:
|
289 |
raise ValueError(f"Unsupported file type: {file_extension}")
|
290 |
|
291 |
-
async def process_input(input_text: str, input_file, language: str) -> str:
|
|
|
|
|
|
|
|
|
292 |
if input_file:
|
293 |
input_text = await TextExtractor.extract_text(input_file.name)
|
294 |
|
295 |
podcast_generator = PodcastGenerator(groq_api_key=os.environ["GROQ_API_KEY"])
|
296 |
-
return await podcast_generator.generate_podcast(input_text, language)
|
297 |
|
298 |
# Define Gradio interface
|
299 |
iface = gr.Interface(
|
@@ -316,7 +320,29 @@ iface = gr.Interface(
|
|
316 |
"Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
|
317 |
"Uzbek", "Vietnamese", "Welsh", "Zulu"
|
318 |
],
|
319 |
-
value="English")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
],
|
321 |
outputs=[
|
322 |
gr.Audio(label="Generated Podcast Audio")
|
|
|
235 |
|
236 |
return json.loads(response.choices[0].message.content)
|
237 |
|
238 |
+
async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
|
239 |
+
voice = speaker1 if speaker == 1 else speaker2
|
240 |
speech = edge_tts.Communicate(text, voice)
|
241 |
|
242 |
temp_filename = f"temp_{uuid.uuid4()}.wav"
|
|
|
258 |
combined_audio.export(output_filename, format="wav")
|
259 |
return output_filename
|
260 |
|
261 |
+
async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str) -> str:
|
262 |
podcast_json = await self.generate_script(input_text, language)
|
263 |
print(f"Generated podcast script:\n{podcast_json}")
|
264 |
+
audio_files = await asyncio.gather(*[self.tts_generate(item['line'], item['speaker'], speaker1, speaker2) for item in podcast_json['podcast']])
|
265 |
combined_audio = await self.combine_audio_files(audio_files)
|
266 |
return combined_audio
|
267 |
|
|
|
288 |
else:
|
289 |
raise ValueError(f"Unsupported file type: {file_extension}")
|
290 |
|
291 |
+
async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str) -> str:
|
292 |
+
# Remove the gender from the speaker name
|
293 |
+
speaker1 = speaker1.split()[0]
|
294 |
+
speaker2 = speaker2.split()[0]
|
295 |
+
|
296 |
if input_file:
|
297 |
input_text = await TextExtractor.extract_text(input_file.name)
|
298 |
|
299 |
podcast_generator = PodcastGenerator(groq_api_key=os.environ["GROQ_API_KEY"])
|
300 |
+
return await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2)
|
301 |
|
302 |
# Define Gradio interface
|
303 |
iface = gr.Interface(
|
|
|
320 |
"Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
|
321 |
"Uzbek", "Vietnamese", "Welsh", "Zulu"
|
322 |
],
|
323 |
+
value="English"),
|
324 |
+
gr.Dropdown(label="Speaker 1 Voice", choices=[
|
325 |
+
"en-US-AndrewMultilingualNeural (Male)",
|
326 |
+
"en-US-AvaMultilingualNeural (Female)",
|
327 |
+
"en-US-BrianMultilingualNeural (Male)",
|
328 |
+
"en-US-EmmaMultilingualNeural (Female)",
|
329 |
+
"de-DE-FlorianMultilingualNeural (Male)",
|
330 |
+
"de-DE-SeraphinaMultilingualNeural (Female)",
|
331 |
+
"fr-FR-RemyMultilingualNeural (Male)",
|
332 |
+
"fr-FR-VivienneMultilingualNeural (Female)",
|
333 |
+
],
|
334 |
+
value="en-US-AndrewMultilingualNeural (Male)"),
|
335 |
+
gr.Dropdown(label="Speaker 2 Voice", choices=[
|
336 |
+
"en-US-AndrewMultilingualNeural (Male)",
|
337 |
+
"en-US-AvaMultilingualNeural (Female)",
|
338 |
+
"en-US-BrianMultilingualNeural (Male)",
|
339 |
+
"en-US-EmmaMultilingualNeural (Female)",
|
340 |
+
"de-DE-FlorianMultilingualNeural (Male)",
|
341 |
+
"de-DE-SeraphinaMultilingualNeural (Female)",
|
342 |
+
"fr-FR-RemyMultilingualNeural (Male)",
|
343 |
+
"fr-FR-VivienneMultilingualNeural (Female)",
|
344 |
+
],
|
345 |
+
value="en-US-AvaMultilingualNeural (Female)")
|
346 |
],
|
347 |
outputs=[
|
348 |
gr.Audio(label="Generated Podcast Audio")
|