Spaces:
Running
on
T4
Running
on
T4
gabrielchua
commited on
Commit
•
02376ba
1
Parent(s):
ca95dce
add configurability for tone and length
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import os
|
|
8 |
import time
|
9 |
from pathlib import Path
|
10 |
from tempfile import NamedTemporaryFile
|
11 |
-
from typing import List, Literal, Tuple
|
12 |
|
13 |
# Third-party imports
|
14 |
import gradio as gr
|
@@ -36,15 +36,37 @@ class Dialogue(BaseModel):
|
|
36 |
dialogue: List[DialogueItem]
|
37 |
|
38 |
|
39 |
-
def generate_podcast(file: str) -> Tuple[str, str]:
|
40 |
"""Generate the audio and transcript from the PDF."""
|
|
|
|
|
|
|
|
|
41 |
# Read the PDF file and extract text
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
# Call the LLM
|
47 |
-
llm_output = generate_script(
|
48 |
logger.info(f"Generated dialogue: {llm_output}")
|
49 |
|
50 |
# Process the dialogue
|
@@ -100,6 +122,16 @@ demo = gr.Interface(
|
|
100 |
label="PDF",
|
101 |
file_types=[".pdf", "file/*"],
|
102 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
],
|
104 |
outputs=[
|
105 |
gr.Audio(label="Audio", format="mp3"),
|
|
|
8 |
import time
|
9 |
from pathlib import Path
|
10 |
from tempfile import NamedTemporaryFile
|
11 |
+
from typing import List, Literal, Tuple, Optional
|
12 |
|
13 |
# Third-party imports
|
14 |
import gradio as gr
|
|
|
36 |
dialogue: List[DialogueItem]
|
37 |
|
38 |
|
39 |
+
def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
|
40 |
"""Generate the audio and transcript from the PDF."""
|
41 |
+
# Check if the file is a PDF
|
42 |
+
if not file.lower().endswith('.pdf'):
|
43 |
+
raise gr.Error("Please upload a PDF file.")
|
44 |
+
|
45 |
# Read the PDF file and extract text
|
46 |
+
try:
|
47 |
+
with Path(file).open("rb") as f:
|
48 |
+
reader = PdfReader(f)
|
49 |
+
text = "\n\n".join([page.extract_text() for page in reader.pages])
|
50 |
+
except Exception as e:
|
51 |
+
raise gr.Error(f"Error reading the PDF file: {str(e)}")
|
52 |
+
|
53 |
+
# Check if the PDF has more than ~150,000 characters
|
54 |
+
if len(text) > 100000:
|
55 |
+
raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
|
56 |
+
|
57 |
+
# Modify the system prompt based on the chosen tone and length
|
58 |
+
modified_system_prompt = SYSTEM_PROMPT
|
59 |
+
if tone:
|
60 |
+
modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
|
61 |
+
if length:
|
62 |
+
length_instructions = {
|
63 |
+
"Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
|
64 |
+
"Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
|
65 |
+
}
|
66 |
+
modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
|
67 |
|
68 |
# Call the LLM
|
69 |
+
llm_output = generate_script(modified_system_prompt, text, Dialogue)
|
70 |
logger.info(f"Generated dialogue: {llm_output}")
|
71 |
|
72 |
# Process the dialogue
|
|
|
122 |
label="PDF",
|
123 |
file_types=[".pdf", "file/*"],
|
124 |
),
|
125 |
+
gr.Radio(
|
126 |
+
choices=["Fun", "Formal"],
|
127 |
+
label="Tone of the podcast",
|
128 |
+
value="casual"
|
129 |
+
),
|
130 |
+
gr.Radio(
|
131 |
+
choices=["Short (1-2 min)", "Medium (3-5 min)"],
|
132 |
+
label="Length of the podcast",
|
133 |
+
value="Medium (3-5 min)"
|
134 |
+
),
|
135 |
],
|
136 |
outputs=[
|
137 |
gr.Audio(label="Audio", format="mp3"),
|
utils.py
CHANGED
@@ -23,19 +23,19 @@ client = OpenAI(
|
|
23 |
hf_client = Client("mrfakename/MeloTTS")
|
24 |
|
25 |
|
26 |
-
def generate_script(system_prompt: str,
|
27 |
"""Get the dialogue from the LLM."""
|
28 |
# Load as python object
|
29 |
try:
|
30 |
-
response = call_llm(system_prompt,
|
31 |
-
dialogue =
|
32 |
response.choices[0].message.content
|
33 |
)
|
34 |
except ValidationError as e:
|
35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
36 |
-
system_prompt_with_error = f"{system_prompt}\n\
|
37 |
-
response = call_llm(system_prompt_with_error,
|
38 |
-
dialogue =
|
39 |
response.choices[0].message.content
|
40 |
)
|
41 |
return dialogue
|
|
|
23 |
hf_client = Client("mrfakename/MeloTTS")
|
24 |
|
25 |
|
26 |
+
def generate_script(system_prompt: str, input_text: str, output_model):
|
27 |
"""Get the dialogue from the LLM."""
|
28 |
# Load as python object
|
29 |
try:
|
30 |
+
response = call_llm(system_prompt, input_text, output_model)
|
31 |
+
dialogue = output_model.model_validate_json(
|
32 |
response.choices[0].message.content
|
33 |
)
|
34 |
except ValidationError as e:
|
35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
36 |
+
system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
|
37 |
+
response = call_llm(system_prompt_with_error, input_text, output_model)
|
38 |
+
dialogue = output_model.model_validate_json(
|
39 |
response.choices[0].message.content
|
40 |
)
|
41 |
return dialogue
|