import gradio as gr import PyPDF2 import nltk from nltk.tokenize import sent_tokenize from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.lsa import LsaSummarizer import os from pydub import AudioSegment from concurrent.futures import ThreadPoolExecutor from TTS.api import TTS # Download necessary NLTK data nltk.download('punkt', quiet=True) # Initialize TTS model using ONNX tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True) # Set default speaker and language manually based on valid IDs obtained default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list default_language = "en" # Replace with a valid language code from the printed list def extract_text_from_pdf(pdf_path): try: with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() return text except Exception as e: print(f"Error extracting text from PDF: {e}") return None def summarize_text(text, summary_length): parser = PlaintextParser.from_string(text, Tokenizer("english")) summarizer = LsaSummarizer() summary = summarizer(parser.document, summary_length) return ' '.join([str(sentence) for sentence in summary]) def split_into_chapters(text, num_chapters): sentences = sent_tokenize(text) if len(sentences) <= num_chapters: return sentences sentences_per_chapter = max(1, len(sentences) // num_chapters) chapters = [] for i in range(0, len(sentences), sentences_per_chapter): chapter = ' '.join(sentences[i:i+sentences_per_chapter]) chapters.append(chapter) while len(chapters) > num_chapters: chapters[-2] += ' ' + chapters[-1] chapters.pop() return chapters def text_to_speech(text, output_path, speaker, language): tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language) return output_path def adjust_audio_speed(input_path, output_path, target_duration): audio = AudioSegment.from_mp3(input_path) current_duration = len(audio) if current_duration == 0: print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.") return input_path speed_factor = current_duration / target_duration if speed_factor < 0.1: speed_factor = 0.1 try: adjusted_audio = audio.speedup(playback_speed=speed_factor) adjusted_audio.export(output_path, format="mp3") return output_path except Exception as e: print(f"Error adjusting audio speed: {e}") return input_path def process_chapter(chapter, i, speaker, language): try: if len(chapter.strip()) == 0: print(f"Warning: Chapter {i+1} is empty. Skipping.") return None temp_path = f"temp_chapter_{i+1}.mp3" output_path = f"chapter_{i+1}.mp3" text_to_speech(chapter, temp_path, speaker, language) # Adjust speed to fit into 3 minutes adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000) os.remove(temp_path) # Clean up temporary file return output_path except Exception as e: print(f"Error processing chapter {i+1}: {e}") return None def process_pdf(pdf_path, num_chapters, speaker, language): full_text = extract_text_from_pdf(pdf_path) if full_text is None or len(full_text.strip()) == 0: print("Error: Extracted text is empty or None") return [] # Clean text to remove unwanted characters full_text = full_text.replace('\t', ' ') summary_length = max(1, 15 * 150 // len(full_text.split())) summary = summarize_text(full_text, summary_length) chapters = split_into_chapters(summary, num_chapters) with ThreadPoolExecutor() as executor: chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters)))) return [audio for audio in chapter_audios if audio is not None] def gradio_interface(pdf_file, num_chapters): if pdf_file is None: return [None] * 10 chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language) return chapter_audios + [None] * (10 - len(chapter_audios)) iface = gr.Interface( fn=gradio_interface, inputs=[ gr.File(label="Upload PDF Book"), gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5) ], outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)], title="PDF Book to Audiobook Summary", description="Upload a PDF book to get a 15-minute audiobook summary split into chapters." ) if __name__ == "__main__": iface.launch(share=True)