Spaces:

vitorcalvi
/

PDFToAudioBookSummary

Runtime error

App Files Files Community

vitorcalvi commited on Aug 5

Commit

d7529f8

•

1 Parent(s): 37b8131

pre-launch

Browse files

Files changed (2) hide show

app.py +137 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import gradio as gr
+import PyPDF2
+import nltk
+from nltk.tokenize import sent_tokenize
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.lsa import LsaSummarizer
+import os
+from pydub import AudioSegment
+from concurrent.futures import ThreadPoolExecutor
+from TTS.api import TTS
+# Download necessary NLTK data
+nltk.download('punkt', quiet=True)
+# Initialize TTS model using ONNX
+tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)
+# Set default speaker and language manually based on valid IDs obtained
+default_speaker = "en_speaker_1"  # Replace with a valid speaker ID from the printed list
+default_language = "en"  # Replace with a valid language code from the printed list
+def extract_text_from_pdf(pdf_path):
+    try:
+        with open(pdf_path, 'rb') as file:
+            reader = PyPDF2.PdfReader(file)
+            text = ''
+            for page in reader.pages:
+                text += page.extract_text()
+        return text
+    except Exception as e:
+        print(f"Error extracting text from PDF: {e}")
+        return None
+def summarize_text(text, summary_length):
+    parser = PlaintextParser.from_string(text, Tokenizer("english"))
+    summarizer = LsaSummarizer()
+    summary = summarizer(parser.document, summary_length)
+    return ' '.join([str(sentence) for sentence in summary])
+def split_into_chapters(text, num_chapters):
+    sentences = sent_tokenize(text)
+    if len(sentences) <= num_chapters:
+        return sentences
+    sentences_per_chapter = max(1, len(sentences) // num_chapters)
+    chapters = []
+    for i in range(0, len(sentences), sentences_per_chapter):
+        chapter = ' '.join(sentences[i:i+sentences_per_chapter])
+        chapters.append(chapter)
+    while len(chapters) > num_chapters:
+        chapters[-2] += ' ' + chapters[-1]
+        chapters.pop()
+    return chapters
+def text_to_speech(text, output_path, speaker, language):
+    tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
+    return output_path
+def adjust_audio_speed(input_path, output_path, target_duration):
+    audio = AudioSegment.from_mp3(input_path)
+    current_duration = len(audio)
+    if current_duration == 0:
+        print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
+        return input_path
+    speed_factor = current_duration / target_duration
+    if speed_factor < 0.1:
+        speed_factor = 0.1
+    try:
+        adjusted_audio = audio.speedup(playback_speed=speed_factor)
+        adjusted_audio.export(output_path, format="mp3")
+        return output_path
+    except Exception as e:
+        print(f"Error adjusting audio speed: {e}")
+        return input_path
+def process_chapter(chapter, i, speaker, language):
+    try:
+        if len(chapter.strip()) == 0:
+            print(f"Warning: Chapter {i+1} is empty. Skipping.")
+            return None
+        temp_path = f"temp_chapter_{i+1}.mp3"
+        output_path = f"chapter_{i+1}.mp3"
+        text_to_speech(chapter, temp_path, speaker, language)
+        # Adjust speed to fit into 3 minutes
+        adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)
+        os.remove(temp_path)  # Clean up temporary file
+        return output_path
+    except Exception as e:
+        print(f"Error processing chapter {i+1}: {e}")
+        return None
+def process_pdf(pdf_path, num_chapters, speaker, language):
+    full_text = extract_text_from_pdf(pdf_path)
+    if full_text is None or len(full_text.strip()) == 0:
+        print("Error: Extracted text is empty or None")
+        return []
+    # Clean text to remove unwanted characters
+    full_text = full_text.replace('\t', ' ')
+    summary_length = max(1, 15 * 150 // len(full_text.split()))
+    summary = summarize_text(full_text, summary_length)
+    chapters = split_into_chapters(summary, num_chapters)
+    with ThreadPoolExecutor() as executor:
+        chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))
+    return [audio for audio in chapter_audios if audio is not None]
+def gradio_interface(pdf_file, num_chapters):
+    if pdf_file is None:
+        return [None] * 10
+    chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
+    return chapter_audios + [None] * (10 - len(chapter_audios))
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.File(label="Upload PDF Book"),
+        gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
+    ],
+    outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
+    title="PDF Book to Audiobook Summary",
+    description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
+)
+if __name__ == "__main__":
+    iface.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+nltk
+numpy
+onnxruntime
+PyPDF2
+pydub
+sumy
+torch
+TTS
+gradio