vitorcalvi's picture
pre-launch
d7529f8
raw
history blame contribute delete
No virus
4.94 kB
import gradio as gr
import PyPDF2
import nltk
from nltk.tokenize import sent_tokenize
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import os
from pydub import AudioSegment
from concurrent.futures import ThreadPoolExecutor
from TTS.api import TTS
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
# Initialize TTS model using ONNX
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)
# Set default speaker and language manually based on valid IDs obtained
default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list
default_language = "en" # Replace with a valid language code from the printed list
def extract_text_from_pdf(pdf_path):
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return None
def summarize_text(text, summary_length):
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, summary_length)
return ' '.join([str(sentence) for sentence in summary])
def split_into_chapters(text, num_chapters):
sentences = sent_tokenize(text)
if len(sentences) <= num_chapters:
return sentences
sentences_per_chapter = max(1, len(sentences) // num_chapters)
chapters = []
for i in range(0, len(sentences), sentences_per_chapter):
chapter = ' '.join(sentences[i:i+sentences_per_chapter])
chapters.append(chapter)
while len(chapters) > num_chapters:
chapters[-2] += ' ' + chapters[-1]
chapters.pop()
return chapters
def text_to_speech(text, output_path, speaker, language):
tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
return output_path
def adjust_audio_speed(input_path, output_path, target_duration):
audio = AudioSegment.from_mp3(input_path)
current_duration = len(audio)
if current_duration == 0:
print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
return input_path
speed_factor = current_duration / target_duration
if speed_factor < 0.1:
speed_factor = 0.1
try:
adjusted_audio = audio.speedup(playback_speed=speed_factor)
adjusted_audio.export(output_path, format="mp3")
return output_path
except Exception as e:
print(f"Error adjusting audio speed: {e}")
return input_path
def process_chapter(chapter, i, speaker, language):
try:
if len(chapter.strip()) == 0:
print(f"Warning: Chapter {i+1} is empty. Skipping.")
return None
temp_path = f"temp_chapter_{i+1}.mp3"
output_path = f"chapter_{i+1}.mp3"
text_to_speech(chapter, temp_path, speaker, language)
# Adjust speed to fit into 3 minutes
adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)
os.remove(temp_path) # Clean up temporary file
return output_path
except Exception as e:
print(f"Error processing chapter {i+1}: {e}")
return None
def process_pdf(pdf_path, num_chapters, speaker, language):
full_text = extract_text_from_pdf(pdf_path)
if full_text is None or len(full_text.strip()) == 0:
print("Error: Extracted text is empty or None")
return []
# Clean text to remove unwanted characters
full_text = full_text.replace('\t', ' ')
summary_length = max(1, 15 * 150 // len(full_text.split()))
summary = summarize_text(full_text, summary_length)
chapters = split_into_chapters(summary, num_chapters)
with ThreadPoolExecutor() as executor:
chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))
return [audio for audio in chapter_audios if audio is not None]
def gradio_interface(pdf_file, num_chapters):
if pdf_file is None:
return [None] * 10
chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
return chapter_audios + [None] * (10 - len(chapter_audios))
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.File(label="Upload PDF Book"),
gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
],
outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
title="PDF Book to Audiobook Summary",
description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
)
if __name__ == "__main__":
iface.launch(share=True)