Spaces:

capradeepgujaran
/

DocChat_n_Talk

Running

File size: 6,325 Bytes

# openai_tts_tool.py

from openai import OpenAI
import os
from langdetect import detect, DetectorFactory
import logging

# Set up logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')

# Ensure consistent results from langdetect
DetectorFactory.seed = 0

# Simple in-memory cache for translations
translation_cache = {}

def translate_text(api_key, text, target_language):
    """
    Translate text to the target language using OpenAI's API with gpt-4o-mini model.
    
    Args:
        api_key (str): OpenAI API key
        text (str): Text to translate
        target_language (str): Target language code (e.g., 'en' for English)
        
    Returns:
        str: Translated text or error message
    """
    cache_key = (text, target_language)
    if cache_key in translation_cache:
        logging.info("Fetching translation from cache.")
        return translation_cache[cache_key]
    
    try:
        logging.info("Starting translation process.")
        client = OpenAI(api_key=api_key)
        prompt = f"Translate the following text to {target_language}:\n\n{text}"
        response = client.completions.create(
            model="gpt-4o-mini",  # Updated model name
            prompt=prompt,
            max_tokens=1000,
            temperature=0.3
        )
        translated_text = response.choices[0].text.strip()
        logging.info("Translation successful.")
        
        # Cache the translation
        translation_cache[cache_key] = translated_text
        
        return translated_text
    except Exception as e:
        logging.error(f"Error in translation: {str(e)}")
        return f"Error in translation: {str(e)}"

def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
    """
    Generate audio and text files from input text using OpenAI's TTS API.
    
    Args:
        api_key (str): OpenAI API key
        input_text (str): Text to convert to speech
        model_name (str): OpenAI model name
        voice_type (str): Voice type for TTS
        voice_speed (float): Speed of speech
        language (str): Language code for synthesis
        output_option (str): Output type ('audio', 'script_text', or 'both')
    
    Returns:
        tuple: (audio_file_path, script_file_path, status_message)
    """
    if not input_text:
        logging.warning("No input text provided.")
        return None, None, "No input text provided"
    
    if not api_key:
        logging.warning("No API key provided.")
        return None, None, "No API key provided"
    
    try:
        logging.info("Initializing OpenAI client.")
        client = OpenAI(api_key=api_key)
        
        # Create temp directory if it doesn't exist
        temp_dir = os.path.join(os.getcwd(), 'temp')
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
            logging.info(f"Created temporary directory at {temp_dir}.")
        
        # Detect input language
        try:
            detected_language = detect(input_text)
            logging.info(f"Detected input language: {detected_language}")
        except Exception as e:
            logging.error(f"Error detecting language: {str(e)}")
            return None, None, f"Error detecting language: {str(e)}"
        
        # Map language codes if necessary (langdetect uses ISO 639-1 codes)
        target_language = language.lower()[:2]  # e.g., 'en' for English
        
        # If detected language is different from target, translate
        if detected_language != target_language:
            logging.info("Input language differs from target language. Proceeding to translate.")
            translated_text = translate_text(api_key, input_text, target_language)
            if translated_text.startswith("Error in translation:"):
                return None, None, translated_text
        else:
            logging.info("Input language matches target language. No translation needed.")
            translated_text = input_text
        
        # Generate audio file
        audio_file = None
        if output_option in ["audio", "both"]:
            try:
                logging.info("Starting audio generation.")
                speech_response = client.audio.speech.create(
                    model="tts-1",
                    voice=voice_type,
                    input=translated_text,
                    speed=float(voice_speed)
                )
                
                # Save the audio to a temporary file
                audio_filename = f"output_{hash(translated_text)}_{target_language}.mp3"
                audio_path = os.path.join(temp_dir, audio_filename)
                with open(audio_path, "wb") as f:
                    for chunk in speech_response.iter_bytes():
                        f.write(chunk)
                logging.info(f"Audio file saved at {audio_path}.")
                audio_file = audio_path
            except Exception as e:
                logging.error(f"Error during audio generation: {str(e)}")
                return None, None, f"Error during audio generation: {str(e)}"
        
        # Save the (translated) text as a script file
        script_file = None
        if output_option in ["script_text", "both"]:
            try:
                logging.info("Starting script text generation.")
                script_text = translated_text
                script_filename = f"script_{hash(script_text)}_{target_language}.txt"
                script_path = os.path.join(temp_dir, script_filename)
                with open(script_path, "w", encoding='utf-8') as f:
                    f.write(script_text)
                logging.info(f"Script file saved at {script_path}.")
                script_file = script_path
            except Exception as e:
                logging.error(f"Error during script text generation: {str(e)}")
                return None, None, f"Error during script text generation: {str(e)}"
        
        status_message = f"Generation completed successfully in {language}!"
        logging.info(status_message)
        return audio_file, script_file, status_message
                
    except Exception as e:
        logging.error(f"Unexpected error: {str(e)}")
        return None, None, f"Error: {str(e)}"