File size: 4,936 Bytes
d7529f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import PyPDF2
import nltk
from nltk.tokenize import sent_tokenize
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import os
from pydub import AudioSegment
from concurrent.futures import ThreadPoolExecutor
from TTS.api import TTS

# Download necessary NLTK data
nltk.download('punkt', quiet=True)

# Initialize TTS model using ONNX
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)

# Set default speaker and language manually based on valid IDs obtained
default_speaker = "en_speaker_1"  # Replace with a valid speaker ID from the printed list
default_language = "en"  # Replace with a valid language code from the printed list

def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ''
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None

def summarize_text(text, summary_length):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, summary_length)
    return ' '.join([str(sentence) for sentence in summary])

def split_into_chapters(text, num_chapters):
    sentences = sent_tokenize(text)
    if len(sentences) <= num_chapters:
        return sentences
    sentences_per_chapter = max(1, len(sentences) // num_chapters)
    chapters = []
    for i in range(0, len(sentences), sentences_per_chapter):
        chapter = ' '.join(sentences[i:i+sentences_per_chapter])
        chapters.append(chapter)
    while len(chapters) > num_chapters:
        chapters[-2] += ' ' + chapters[-1]
        chapters.pop()
    return chapters

def text_to_speech(text, output_path, speaker, language):
    tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
    return output_path

def adjust_audio_speed(input_path, output_path, target_duration):
    audio = AudioSegment.from_mp3(input_path)
    current_duration = len(audio)
    
    if current_duration == 0:
        print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
        return input_path
    
    speed_factor = current_duration / target_duration
    if speed_factor < 0.1:
        speed_factor = 0.1
    
    try:
        adjusted_audio = audio.speedup(playback_speed=speed_factor)
        adjusted_audio.export(output_path, format="mp3")
        return output_path
    except Exception as e:
        print(f"Error adjusting audio speed: {e}")
        return input_path

def process_chapter(chapter, i, speaker, language):
    try:
        if len(chapter.strip()) == 0:
            print(f"Warning: Chapter {i+1} is empty. Skipping.")
            return None
        
        temp_path = f"temp_chapter_{i+1}.mp3"
        output_path = f"chapter_{i+1}.mp3"
        
        text_to_speech(chapter, temp_path, speaker, language)
        
        # Adjust speed to fit into 3 minutes
        adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)
        
        os.remove(temp_path)  # Clean up temporary file
        return output_path
    except Exception as e:
        print(f"Error processing chapter {i+1}: {e}")
        return None

def process_pdf(pdf_path, num_chapters, speaker, language):
    full_text = extract_text_from_pdf(pdf_path)
    if full_text is None or len(full_text.strip()) == 0:
        print("Error: Extracted text is empty or None")
        return []

    # Clean text to remove unwanted characters
    full_text = full_text.replace('\t', ' ')

    summary_length = max(1, 15 * 150 // len(full_text.split()))
    summary = summarize_text(full_text, summary_length)
    
    chapters = split_into_chapters(summary, num_chapters)
    
    with ThreadPoolExecutor() as executor:
        chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))
    
    return [audio for audio in chapter_audios if audio is not None]

def gradio_interface(pdf_file, num_chapters):
    if pdf_file is None:
        return [None] * 10
    
    chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
    return chapter_audios + [None] * (10 - len(chapter_audios))

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.File(label="Upload PDF Book"),
        gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
    ],
    outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
    title="PDF Book to Audiobook Summary",
    description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
)

if __name__ == "__main__":
    iface.launch(share=True)