vitorcalvi commited on
Commit
d7529f8
1 Parent(s): 37b8131

pre-launch

Browse files
Files changed (2) hide show
  1. app.py +137 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import PyPDF2
3
+ import nltk
4
+ from nltk.tokenize import sent_tokenize
5
+ from sumy.parsers.plaintext import PlaintextParser
6
+ from sumy.nlp.tokenizers import Tokenizer
7
+ from sumy.summarizers.lsa import LsaSummarizer
8
+ import os
9
+ from pydub import AudioSegment
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from TTS.api import TTS
12
+
13
+ # Download necessary NLTK data
14
+ nltk.download('punkt', quiet=True)
15
+
16
+ # Initialize TTS model using ONNX
17
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)
18
+
19
+ # Set default speaker and language manually based on valid IDs obtained
20
+ default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list
21
+ default_language = "en" # Replace with a valid language code from the printed list
22
+
23
+ def extract_text_from_pdf(pdf_path):
24
+ try:
25
+ with open(pdf_path, 'rb') as file:
26
+ reader = PyPDF2.PdfReader(file)
27
+ text = ''
28
+ for page in reader.pages:
29
+ text += page.extract_text()
30
+ return text
31
+ except Exception as e:
32
+ print(f"Error extracting text from PDF: {e}")
33
+ return None
34
+
35
+ def summarize_text(text, summary_length):
36
+ parser = PlaintextParser.from_string(text, Tokenizer("english"))
37
+ summarizer = LsaSummarizer()
38
+ summary = summarizer(parser.document, summary_length)
39
+ return ' '.join([str(sentence) for sentence in summary])
40
+
41
+ def split_into_chapters(text, num_chapters):
42
+ sentences = sent_tokenize(text)
43
+ if len(sentences) <= num_chapters:
44
+ return sentences
45
+ sentences_per_chapter = max(1, len(sentences) // num_chapters)
46
+ chapters = []
47
+ for i in range(0, len(sentences), sentences_per_chapter):
48
+ chapter = ' '.join(sentences[i:i+sentences_per_chapter])
49
+ chapters.append(chapter)
50
+ while len(chapters) > num_chapters:
51
+ chapters[-2] += ' ' + chapters[-1]
52
+ chapters.pop()
53
+ return chapters
54
+
55
+ def text_to_speech(text, output_path, speaker, language):
56
+ tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
57
+ return output_path
58
+
59
+ def adjust_audio_speed(input_path, output_path, target_duration):
60
+ audio = AudioSegment.from_mp3(input_path)
61
+ current_duration = len(audio)
62
+
63
+ if current_duration == 0:
64
+ print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
65
+ return input_path
66
+
67
+ speed_factor = current_duration / target_duration
68
+ if speed_factor < 0.1:
69
+ speed_factor = 0.1
70
+
71
+ try:
72
+ adjusted_audio = audio.speedup(playback_speed=speed_factor)
73
+ adjusted_audio.export(output_path, format="mp3")
74
+ return output_path
75
+ except Exception as e:
76
+ print(f"Error adjusting audio speed: {e}")
77
+ return input_path
78
+
79
+ def process_chapter(chapter, i, speaker, language):
80
+ try:
81
+ if len(chapter.strip()) == 0:
82
+ print(f"Warning: Chapter {i+1} is empty. Skipping.")
83
+ return None
84
+
85
+ temp_path = f"temp_chapter_{i+1}.mp3"
86
+ output_path = f"chapter_{i+1}.mp3"
87
+
88
+ text_to_speech(chapter, temp_path, speaker, language)
89
+
90
+ # Adjust speed to fit into 3 minutes
91
+ adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)
92
+
93
+ os.remove(temp_path) # Clean up temporary file
94
+ return output_path
95
+ except Exception as e:
96
+ print(f"Error processing chapter {i+1}: {e}")
97
+ return None
98
+
99
+ def process_pdf(pdf_path, num_chapters, speaker, language):
100
+ full_text = extract_text_from_pdf(pdf_path)
101
+ if full_text is None or len(full_text.strip()) == 0:
102
+ print("Error: Extracted text is empty or None")
103
+ return []
104
+
105
+ # Clean text to remove unwanted characters
106
+ full_text = full_text.replace('\t', ' ')
107
+
108
+ summary_length = max(1, 15 * 150 // len(full_text.split()))
109
+ summary = summarize_text(full_text, summary_length)
110
+
111
+ chapters = split_into_chapters(summary, num_chapters)
112
+
113
+ with ThreadPoolExecutor() as executor:
114
+ chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))
115
+
116
+ return [audio for audio in chapter_audios if audio is not None]
117
+
118
+ def gradio_interface(pdf_file, num_chapters):
119
+ if pdf_file is None:
120
+ return [None] * 10
121
+
122
+ chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
123
+ return chapter_audios + [None] * (10 - len(chapter_audios))
124
+
125
+ iface = gr.Interface(
126
+ fn=gradio_interface,
127
+ inputs=[
128
+ gr.File(label="Upload PDF Book"),
129
+ gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
130
+ ],
131
+ outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
132
+ title="PDF Book to Audiobook Summary",
133
+ description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
134
+ )
135
+
136
+ if __name__ == "__main__":
137
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ nltk
2
+ numpy
3
+ onnxruntime
4
+ PyPDF2
5
+ pydub
6
+ sumy
7
+ torch
8
+ TTS
9
+ gradio