capradeepgujaran commited on
Commit
251214c
1 Parent(s): 4e6b972

Update openai_tts_tool.py

Browse files
Files changed (1) hide show
  1. openai_tts_tool.py +113 -21
openai_tts_tool.py CHANGED
@@ -1,5 +1,56 @@
 
 
1
  from openai import OpenAI
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
5
  """
@@ -18,47 +69,88 @@ def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_s
18
  tuple: (audio_file_path, script_file_path, status_message)
19
  """
20
  if not input_text:
 
21
  return None, None, "No input text provided"
22
 
23
  if not api_key:
 
24
  return None, None, "No API key provided"
25
 
26
  try:
 
27
  client = OpenAI(api_key=api_key)
28
 
29
  # Create temp directory if it doesn't exist
30
  temp_dir = os.path.join(os.getcwd(), 'temp')
31
  if not os.path.exists(temp_dir):
32
  os.makedirs(temp_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Generate audio file
35
  audio_file = None
36
  if output_option in ["audio", "both"]:
37
- speech_response = client.audio.speech.create(
38
- model="tts-1",
39
- voice=voice_type,
40
- input=input_text,
41
- speed=float(voice_speed)
42
- )
43
-
44
- # Save the audio to a temporary file
45
- audio_path = os.path.join(temp_dir, f"output_{hash(input_text)}_{language}.mp3")
46
- with open(audio_path, "wb") as f:
47
- for chunk in speech_response.iter_bytes():
48
- f.write(chunk)
49
-
50
- audio_file = audio_path
 
 
 
 
 
 
51
 
52
- # Save the input text as a script file
53
  script_file = None
54
  if output_option in ["script_text", "both"]:
55
- script_path = os.path.join(temp_dir, f"script_{hash(input_text)}_{language}.txt")
56
- with open(script_path, "w", encoding='utf-8') as f:
57
- f.write(input_text)
58
- script_file = script_path
 
 
 
 
 
 
 
 
59
 
60
  status_message = f"Generation completed successfully in {language}!"
 
61
  return audio_file, script_file, status_message
62
-
63
  except Exception as e:
64
- return None, None, f"Error: {str(e)}"
 
 
1
+ # openai_tts_tool.py
2
+
3
  from openai import OpenAI
4
  import os
5
+ from langdetect import detect, DetectorFactory
6
+ import logging
7
+
8
+ # Set up logging configuration
9
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
10
+
11
+ # Ensure consistent results from langdetect
12
+ DetectorFactory.seed = 0
13
+
14
+ # Simple in-memory cache for translations
15
+ translation_cache = {}
16
+
17
+ def translate_text(api_key, text, target_language):
18
+ """
19
+ Translate text to the target language using OpenAI's API with gpt-4o-mini model.
20
+
21
+ Args:
22
+ api_key (str): OpenAI API key
23
+ text (str): Text to translate
24
+ target_language (str): Target language code (e.g., 'en' for English)
25
+
26
+ Returns:
27
+ str: Translated text or error message
28
+ """
29
+ cache_key = (text, target_language)
30
+ if cache_key in translation_cache:
31
+ logging.info("Fetching translation from cache.")
32
+ return translation_cache[cache_key]
33
+
34
+ try:
35
+ logging.info("Starting translation process.")
36
+ client = OpenAI(api_key=api_key)
37
+ prompt = f"Translate the following text to {target_language}:\n\n{text}"
38
+ response = client.completions.create(
39
+ model="gpt-4o-mini", # Updated model name
40
+ prompt=prompt,
41
+ max_tokens=1000,
42
+ temperature=0.3
43
+ )
44
+ translated_text = response.choices[0].text.strip()
45
+ logging.info("Translation successful.")
46
+
47
+ # Cache the translation
48
+ translation_cache[cache_key] = translated_text
49
+
50
+ return translated_text
51
+ except Exception as e:
52
+ logging.error(f"Error in translation: {str(e)}")
53
+ return f"Error in translation: {str(e)}"
54
 
55
  def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
56
  """
 
69
  tuple: (audio_file_path, script_file_path, status_message)
70
  """
71
  if not input_text:
72
+ logging.warning("No input text provided.")
73
  return None, None, "No input text provided"
74
 
75
  if not api_key:
76
+ logging.warning("No API key provided.")
77
  return None, None, "No API key provided"
78
 
79
  try:
80
+ logging.info("Initializing OpenAI client.")
81
  client = OpenAI(api_key=api_key)
82
 
83
  # Create temp directory if it doesn't exist
84
  temp_dir = os.path.join(os.getcwd(), 'temp')
85
  if not os.path.exists(temp_dir):
86
  os.makedirs(temp_dir)
87
+ logging.info(f"Created temporary directory at {temp_dir}.")
88
+
89
+ # Detect input language
90
+ try:
91
+ detected_language = detect(input_text)
92
+ logging.info(f"Detected input language: {detected_language}")
93
+ except Exception as e:
94
+ logging.error(f"Error detecting language: {str(e)}")
95
+ return None, None, f"Error detecting language: {str(e)}"
96
+
97
+ # Map language codes if necessary (langdetect uses ISO 639-1 codes)
98
+ target_language = language.lower()[:2] # e.g., 'en' for English
99
+
100
+ # If detected language is different from target, translate
101
+ if detected_language != target_language:
102
+ logging.info("Input language differs from target language. Proceeding to translate.")
103
+ translated_text = translate_text(api_key, input_text, target_language)
104
+ if translated_text.startswith("Error in translation:"):
105
+ return None, None, translated_text
106
+ else:
107
+ logging.info("Input language matches target language. No translation needed.")
108
+ translated_text = input_text
109
 
110
  # Generate audio file
111
  audio_file = None
112
  if output_option in ["audio", "both"]:
113
+ try:
114
+ logging.info("Starting audio generation.")
115
+ speech_response = client.audio.speech.create(
116
+ model="tts-1",
117
+ voice=voice_type,
118
+ input=translated_text,
119
+ speed=float(voice_speed)
120
+ )
121
+
122
+ # Save the audio to a temporary file
123
+ audio_filename = f"output_{hash(translated_text)}_{target_language}.mp3"
124
+ audio_path = os.path.join(temp_dir, audio_filename)
125
+ with open(audio_path, "wb") as f:
126
+ for chunk in speech_response.iter_bytes():
127
+ f.write(chunk)
128
+ logging.info(f"Audio file saved at {audio_path}.")
129
+ audio_file = audio_path
130
+ except Exception as e:
131
+ logging.error(f"Error during audio generation: {str(e)}")
132
+ return None, None, f"Error during audio generation: {str(e)}"
133
 
134
+ # Save the (translated) text as a script file
135
  script_file = None
136
  if output_option in ["script_text", "both"]:
137
+ try:
138
+ logging.info("Starting script text generation.")
139
+ script_text = translated_text
140
+ script_filename = f"script_{hash(script_text)}_{target_language}.txt"
141
+ script_path = os.path.join(temp_dir, script_filename)
142
+ with open(script_path, "w", encoding='utf-8') as f:
143
+ f.write(script_text)
144
+ logging.info(f"Script file saved at {script_path}.")
145
+ script_file = script_path
146
+ except Exception as e:
147
+ logging.error(f"Error during script text generation: {str(e)}")
148
+ return None, None, f"Error during script text generation: {str(e)}"
149
 
150
  status_message = f"Generation completed successfully in {language}!"
151
+ logging.info(status_message)
152
  return audio_file, script_file, status_message
153
+
154
  except Exception as e:
155
+ logging.error(f"Unexpected error: {str(e)}")
156
+ return None, None, f"Error: {str(e)}"