File size: 6,325 Bytes
251214c
 
f9ae432
 
251214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78decde
5fad48b
9c78509
 
 
 
 
 
 
 
 
5fad48b
9c78509
 
 
 
 
f9ae432
251214c
9c78509
 
 
251214c
9c78509
f9ae432
 
251214c
f9ae432
 
9c78509
 
 
 
251214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9ae432
9c78509
f9ae432
 
251214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9ae432
251214c
9c78509
 
251214c
 
 
 
 
 
 
 
 
 
 
 
9c78509
5fad48b
251214c
9c78509
251214c
f9ae432
251214c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# openai_tts_tool.py

from openai import OpenAI
import os
from langdetect import detect, DetectorFactory
import logging

# Set up logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')

# Ensure consistent results from langdetect
DetectorFactory.seed = 0

# Simple in-memory cache for translations
translation_cache = {}

def translate_text(api_key, text, target_language):
    """
    Translate text to the target language using OpenAI's API with gpt-4o-mini model.
    
    Args:
        api_key (str): OpenAI API key
        text (str): Text to translate
        target_language (str): Target language code (e.g., 'en' for English)
        
    Returns:
        str: Translated text or error message
    """
    cache_key = (text, target_language)
    if cache_key in translation_cache:
        logging.info("Fetching translation from cache.")
        return translation_cache[cache_key]
    
    try:
        logging.info("Starting translation process.")
        client = OpenAI(api_key=api_key)
        prompt = f"Translate the following text to {target_language}:\n\n{text}"
        response = client.completions.create(
            model="gpt-4o-mini",  # Updated model name
            prompt=prompt,
            max_tokens=1000,
            temperature=0.3
        )
        translated_text = response.choices[0].text.strip()
        logging.info("Translation successful.")
        
        # Cache the translation
        translation_cache[cache_key] = translated_text
        
        return translated_text
    except Exception as e:
        logging.error(f"Error in translation: {str(e)}")
        return f"Error in translation: {str(e)}"

def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
    """
    Generate audio and text files from input text using OpenAI's TTS API.
    
    Args:
        api_key (str): OpenAI API key
        input_text (str): Text to convert to speech
        model_name (str): OpenAI model name
        voice_type (str): Voice type for TTS
        voice_speed (float): Speed of speech
        language (str): Language code for synthesis
        output_option (str): Output type ('audio', 'script_text', or 'both')
    
    Returns:
        tuple: (audio_file_path, script_file_path, status_message)
    """
    if not input_text:
        logging.warning("No input text provided.")
        return None, None, "No input text provided"
    
    if not api_key:
        logging.warning("No API key provided.")
        return None, None, "No API key provided"
    
    try:
        logging.info("Initializing OpenAI client.")
        client = OpenAI(api_key=api_key)
        
        # Create temp directory if it doesn't exist
        temp_dir = os.path.join(os.getcwd(), 'temp')
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
            logging.info(f"Created temporary directory at {temp_dir}.")
        
        # Detect input language
        try:
            detected_language = detect(input_text)
            logging.info(f"Detected input language: {detected_language}")
        except Exception as e:
            logging.error(f"Error detecting language: {str(e)}")
            return None, None, f"Error detecting language: {str(e)}"
        
        # Map language codes if necessary (langdetect uses ISO 639-1 codes)
        target_language = language.lower()[:2]  # e.g., 'en' for English
        
        # If detected language is different from target, translate
        if detected_language != target_language:
            logging.info("Input language differs from target language. Proceeding to translate.")
            translated_text = translate_text(api_key, input_text, target_language)
            if translated_text.startswith("Error in translation:"):
                return None, None, translated_text
        else:
            logging.info("Input language matches target language. No translation needed.")
            translated_text = input_text
        
        # Generate audio file
        audio_file = None
        if output_option in ["audio", "both"]:
            try:
                logging.info("Starting audio generation.")
                speech_response = client.audio.speech.create(
                    model="tts-1",
                    voice=voice_type,
                    input=translated_text,
                    speed=float(voice_speed)
                )
                
                # Save the audio to a temporary file
                audio_filename = f"output_{hash(translated_text)}_{target_language}.mp3"
                audio_path = os.path.join(temp_dir, audio_filename)
                with open(audio_path, "wb") as f:
                    for chunk in speech_response.iter_bytes():
                        f.write(chunk)
                logging.info(f"Audio file saved at {audio_path}.")
                audio_file = audio_path
            except Exception as e:
                logging.error(f"Error during audio generation: {str(e)}")
                return None, None, f"Error during audio generation: {str(e)}"
        
        # Save the (translated) text as a script file
        script_file = None
        if output_option in ["script_text", "both"]:
            try:
                logging.info("Starting script text generation.")
                script_text = translated_text
                script_filename = f"script_{hash(script_text)}_{target_language}.txt"
                script_path = os.path.join(temp_dir, script_filename)
                with open(script_path, "w", encoding='utf-8') as f:
                    f.write(script_text)
                logging.info(f"Script file saved at {script_path}.")
                script_file = script_path
            except Exception as e:
                logging.error(f"Error during script text generation: {str(e)}")
                return None, None, f"Error during script text generation: {str(e)}"
        
        status_message = f"Generation completed successfully in {language}!"
        logging.info(status_message)
        return audio_file, script_file, status_message
                
    except Exception as e:
        logging.error(f"Unexpected error: {str(e)}")
        return None, None, f"Error: {str(e)}"