Spaces:
Runtime error
Runtime error
import gradio as gr | |
import PyPDF2 | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
from sumy.parsers.plaintext import PlaintextParser | |
from sumy.nlp.tokenizers import Tokenizer | |
from sumy.summarizers.lsa import LsaSummarizer | |
import os | |
from pydub import AudioSegment | |
from concurrent.futures import ThreadPoolExecutor | |
from TTS.api import TTS | |
# Download necessary NLTK data | |
nltk.download('punkt', quiet=True) | |
# Initialize TTS model using ONNX | |
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True) | |
# Set default speaker and language manually based on valid IDs obtained | |
default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list | |
default_language = "en" # Replace with a valid language code from the printed list | |
def extract_text_from_pdf(pdf_path): | |
try: | |
with open(pdf_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
text = '' | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
except Exception as e: | |
print(f"Error extracting text from PDF: {e}") | |
return None | |
def summarize_text(text, summary_length): | |
parser = PlaintextParser.from_string(text, Tokenizer("english")) | |
summarizer = LsaSummarizer() | |
summary = summarizer(parser.document, summary_length) | |
return ' '.join([str(sentence) for sentence in summary]) | |
def split_into_chapters(text, num_chapters): | |
sentences = sent_tokenize(text) | |
if len(sentences) <= num_chapters: | |
return sentences | |
sentences_per_chapter = max(1, len(sentences) // num_chapters) | |
chapters = [] | |
for i in range(0, len(sentences), sentences_per_chapter): | |
chapter = ' '.join(sentences[i:i+sentences_per_chapter]) | |
chapters.append(chapter) | |
while len(chapters) > num_chapters: | |
chapters[-2] += ' ' + chapters[-1] | |
chapters.pop() | |
return chapters | |
def text_to_speech(text, output_path, speaker, language): | |
tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language) | |
return output_path | |
def adjust_audio_speed(input_path, output_path, target_duration): | |
audio = AudioSegment.from_mp3(input_path) | |
current_duration = len(audio) | |
if current_duration == 0: | |
print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.") | |
return input_path | |
speed_factor = current_duration / target_duration | |
if speed_factor < 0.1: | |
speed_factor = 0.1 | |
try: | |
adjusted_audio = audio.speedup(playback_speed=speed_factor) | |
adjusted_audio.export(output_path, format="mp3") | |
return output_path | |
except Exception as e: | |
print(f"Error adjusting audio speed: {e}") | |
return input_path | |
def process_chapter(chapter, i, speaker, language): | |
try: | |
if len(chapter.strip()) == 0: | |
print(f"Warning: Chapter {i+1} is empty. Skipping.") | |
return None | |
temp_path = f"temp_chapter_{i+1}.mp3" | |
output_path = f"chapter_{i+1}.mp3" | |
text_to_speech(chapter, temp_path, speaker, language) | |
# Adjust speed to fit into 3 minutes | |
adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000) | |
os.remove(temp_path) # Clean up temporary file | |
return output_path | |
except Exception as e: | |
print(f"Error processing chapter {i+1}: {e}") | |
return None | |
def process_pdf(pdf_path, num_chapters, speaker, language): | |
full_text = extract_text_from_pdf(pdf_path) | |
if full_text is None or len(full_text.strip()) == 0: | |
print("Error: Extracted text is empty or None") | |
return [] | |
# Clean text to remove unwanted characters | |
full_text = full_text.replace('\t', ' ') | |
summary_length = max(1, 15 * 150 // len(full_text.split())) | |
summary = summarize_text(full_text, summary_length) | |
chapters = split_into_chapters(summary, num_chapters) | |
with ThreadPoolExecutor() as executor: | |
chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters)))) | |
return [audio for audio in chapter_audios if audio is not None] | |
def gradio_interface(pdf_file, num_chapters): | |
if pdf_file is None: | |
return [None] * 10 | |
chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language) | |
return chapter_audios + [None] * (10 - len(chapter_audios)) | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[ | |
gr.File(label="Upload PDF Book"), | |
gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5) | |
], | |
outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)], | |
title="PDF Book to Audiobook Summary", | |
description="Upload a PDF book to get a 15-minute audiobook summary split into chapters." | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) | |