Spaces:

vitorcalvi
/

PDFToAudioBookSummary

Runtime error

App Files Files Community

PDFToAudioBookSummary / app.py

vitorcalvi

pre-launch

d7529f8 about 2 months ago

raw

history blame contribute delete

No virus

4.94 kB

	import gradio as gr
	import PyPDF2
	import nltk
	from nltk.tokenize import sent_tokenize
	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.summarizers.lsa import LsaSummarizer
	import os
	from pydub import AudioSegment
	from concurrent.futures import ThreadPoolExecutor
	from TTS.api import TTS

	# Download necessary NLTK data
	nltk.download('punkt', quiet=True)

	# Initialize TTS model using ONNX
	tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)

	# Set default speaker and language manually based on valid IDs obtained
	default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list
	default_language = "en" # Replace with a valid language code from the printed list

	def extract_text_from_pdf(pdf_path):
	try:
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text = ''
	for page in reader.pages:
	text += page.extract_text()
	return text
	except Exception as e:
	print(f"Error extracting text from PDF: {e}")
	return None

	def summarize_text(text, summary_length):
	parser = PlaintextParser.from_string(text, Tokenizer("english"))
	summarizer = LsaSummarizer()
	summary = summarizer(parser.document, summary_length)
	return ' '.join([str(sentence) for sentence in summary])

	def split_into_chapters(text, num_chapters):
	sentences = sent_tokenize(text)
	if len(sentences) <= num_chapters:
	return sentences
	sentences_per_chapter = max(1, len(sentences) // num_chapters)
	chapters = []
	for i in range(0, len(sentences), sentences_per_chapter):
	chapter = ' '.join(sentences[i:i+sentences_per_chapter])
	chapters.append(chapter)
	while len(chapters) > num_chapters:
	chapters[-2] += ' ' + chapters[-1]
	chapters.pop()
	return chapters

	def text_to_speech(text, output_path, speaker, language):
	tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
	return output_path

	def adjust_audio_speed(input_path, output_path, target_duration):
	audio = AudioSegment.from_mp3(input_path)
	current_duration = len(audio)

	if current_duration == 0:
	print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
	return input_path

	speed_factor = current_duration / target_duration
	if speed_factor < 0.1:
	speed_factor = 0.1

	try:
	adjusted_audio = audio.speedup(playback_speed=speed_factor)
	adjusted_audio.export(output_path, format="mp3")
	return output_path
	except Exception as e:
	print(f"Error adjusting audio speed: {e}")
	return input_path

	def process_chapter(chapter, i, speaker, language):
	try:
	if len(chapter.strip()) == 0:
	print(f"Warning: Chapter {i+1} is empty. Skipping.")
	return None

	temp_path = f"temp_chapter_{i+1}.mp3"
	output_path = f"chapter_{i+1}.mp3"

	text_to_speech(chapter, temp_path, speaker, language)

	# Adjust speed to fit into 3 minutes
	adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)

	os.remove(temp_path) # Clean up temporary file
	return output_path
	except Exception as e:
	print(f"Error processing chapter {i+1}: {e}")
	return None

	def process_pdf(pdf_path, num_chapters, speaker, language):
	full_text = extract_text_from_pdf(pdf_path)
	if full_text is None or len(full_text.strip()) == 0:
	print("Error: Extracted text is empty or None")
	return []

	# Clean text to remove unwanted characters
	full_text = full_text.replace('\t', ' ')

	summary_length = max(1, 15 * 150 // len(full_text.split()))
	summary = summarize_text(full_text, summary_length)

	chapters = split_into_chapters(summary, num_chapters)

	with ThreadPoolExecutor() as executor:
	chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))

	return [audio for audio in chapter_audios if audio is not None]

	def gradio_interface(pdf_file, num_chapters):
	if pdf_file is None:
	return [None] * 10

	chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
	return chapter_audios + [None] * (10 - len(chapter_audios))

	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[
	gr.File(label="Upload PDF Book"),
	gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
	],
	outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
	title="PDF Book to Audiobook Summary",
	description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
	)

	if __name__ == "__main__":
	iface.launch(share=True)