|
import os |
|
import gradio as gr |
|
import replicate |
|
from pydub import AudioSegment |
|
from fpdf import FPDF |
|
|
|
|
|
replicate_token = os.getenv("REPLICATE_API_TOKEN") |
|
|
|
if not replicate_token: |
|
raise ValueError("No se ha encontrado el token de API de Replicate.") |
|
|
|
|
|
def dividir_audio(audio_path, segment_duration_ms=5*60*1000): |
|
audio = AudioSegment.from_file(audio_path) |
|
audio_length = len(audio) |
|
segments = [] |
|
|
|
|
|
for i in range(0, audio_length, segment_duration_ms): |
|
segment = audio[i:i + segment_duration_ms] |
|
segment_path = f"segment_{i // (60 * 1000)}.wav" |
|
segment.export(segment_path, format="wav") |
|
|
|
|
|
if os.path.getsize(segment_path) > 10 * 1024 * 1024: |
|
print(f"Warning: Segment {segment_path} exceeds 10MB, reducing segment duration.") |
|
return dividir_audio(audio_path, segment_duration_ms // 2) |
|
|
|
segments.append(segment_path) |
|
|
|
return segments |
|
|
|
|
|
def crear_pdf(transcripcion): |
|
pdf = FPDF() |
|
pdf.add_page() |
|
|
|
pdf.set_font("Arial", size=12) |
|
|
|
|
|
for line in transcripcion.split("\n"): |
|
pdf.multi_cell(0, 10, line) |
|
|
|
|
|
pdf_path = "/mnt/data/transcripcion_audio.pdf" |
|
pdf.output(pdf_path) |
|
|
|
return pdf_path |
|
|
|
|
|
def transcribe_audio_y_pdf(audio_file, progress=gr.Progress()): |
|
|
|
audio = AudioSegment.from_file(audio_file) |
|
audio_duration_minutes = len(audio) / (1000 * 60) |
|
|
|
|
|
if audio_duration_minutes > 5: |
|
segments = dividir_audio(audio_file, segment_duration_ms=5 * 60 * 1000) |
|
else: |
|
segments = [audio_file] |
|
|
|
|
|
all_transcriptions = [] |
|
|
|
|
|
for index, segment_path in enumerate(segments): |
|
with open(segment_path, "rb") as audio: |
|
try: |
|
progress(index / len(segments)) |
|
output = replicate.run( |
|
"vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c", |
|
input={ |
|
"task": "transcribe", |
|
"audio": audio, |
|
"language": "None", |
|
"timestamp": "chunk", |
|
"batch_size": 64, |
|
"diarise_audio": False |
|
} |
|
) |
|
transcription = output['text'] |
|
all_transcriptions.append(f"Segment {index + 1}:\n{transcription}") |
|
|
|
except Exception as e: |
|
return f"Error transcribiendo el segmento {index + 1}: {e}" |
|
|
|
|
|
full_transcription = "\n".join(all_transcriptions) |
|
|
|
|
|
pdf_path = crear_pdf(full_transcription) |
|
return full_transcription, pdf_path |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Transcriptor de Audio a Texto (Genera PDF)") |
|
audio_input = gr.Audio(type="filepath", label="Sube tu archivo de audio") |
|
output_text = gr.Textbox(label="Transcripci贸n") |
|
output_pdf = gr.File(label="Descarga el PDF") |
|
|
|
transcribe_button = gr.Button("Transcribir y Crear PDF") |
|
transcribe_button.click(fn=transcribe_audio_y_pdf, inputs=audio_input, outputs=[output_text, output_pdf]) |
|
|
|
|
|
demo.launch() |
|
|