Spaces:
Running
Running
File size: 6,325 Bytes
251214c f9ae432 251214c 78decde 5fad48b 9c78509 5fad48b 9c78509 f9ae432 251214c 9c78509 251214c 9c78509 f9ae432 251214c f9ae432 9c78509 251214c f9ae432 9c78509 f9ae432 251214c f9ae432 251214c 9c78509 251214c 9c78509 5fad48b 251214c 9c78509 251214c f9ae432 251214c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# openai_tts_tool.py
from openai import OpenAI
import os
from langdetect import detect, DetectorFactory
import logging
# Set up logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
# Ensure consistent results from langdetect
DetectorFactory.seed = 0
# Simple in-memory cache for translations
translation_cache = {}
def translate_text(api_key, text, target_language):
"""
Translate text to the target language using OpenAI's API with gpt-4o-mini model.
Args:
api_key (str): OpenAI API key
text (str): Text to translate
target_language (str): Target language code (e.g., 'en' for English)
Returns:
str: Translated text or error message
"""
cache_key = (text, target_language)
if cache_key in translation_cache:
logging.info("Fetching translation from cache.")
return translation_cache[cache_key]
try:
logging.info("Starting translation process.")
client = OpenAI(api_key=api_key)
prompt = f"Translate the following text to {target_language}:\n\n{text}"
response = client.completions.create(
model="gpt-4o-mini", # Updated model name
prompt=prompt,
max_tokens=1000,
temperature=0.3
)
translated_text = response.choices[0].text.strip()
logging.info("Translation successful.")
# Cache the translation
translation_cache[cache_key] = translated_text
return translated_text
except Exception as e:
logging.error(f"Error in translation: {str(e)}")
return f"Error in translation: {str(e)}"
def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
"""
Generate audio and text files from input text using OpenAI's TTS API.
Args:
api_key (str): OpenAI API key
input_text (str): Text to convert to speech
model_name (str): OpenAI model name
voice_type (str): Voice type for TTS
voice_speed (float): Speed of speech
language (str): Language code for synthesis
output_option (str): Output type ('audio', 'script_text', or 'both')
Returns:
tuple: (audio_file_path, script_file_path, status_message)
"""
if not input_text:
logging.warning("No input text provided.")
return None, None, "No input text provided"
if not api_key:
logging.warning("No API key provided.")
return None, None, "No API key provided"
try:
logging.info("Initializing OpenAI client.")
client = OpenAI(api_key=api_key)
# Create temp directory if it doesn't exist
temp_dir = os.path.join(os.getcwd(), 'temp')
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
logging.info(f"Created temporary directory at {temp_dir}.")
# Detect input language
try:
detected_language = detect(input_text)
logging.info(f"Detected input language: {detected_language}")
except Exception as e:
logging.error(f"Error detecting language: {str(e)}")
return None, None, f"Error detecting language: {str(e)}"
# Map language codes if necessary (langdetect uses ISO 639-1 codes)
target_language = language.lower()[:2] # e.g., 'en' for English
# If detected language is different from target, translate
if detected_language != target_language:
logging.info("Input language differs from target language. Proceeding to translate.")
translated_text = translate_text(api_key, input_text, target_language)
if translated_text.startswith("Error in translation:"):
return None, None, translated_text
else:
logging.info("Input language matches target language. No translation needed.")
translated_text = input_text
# Generate audio file
audio_file = None
if output_option in ["audio", "both"]:
try:
logging.info("Starting audio generation.")
speech_response = client.audio.speech.create(
model="tts-1",
voice=voice_type,
input=translated_text,
speed=float(voice_speed)
)
# Save the audio to a temporary file
audio_filename = f"output_{hash(translated_text)}_{target_language}.mp3"
audio_path = os.path.join(temp_dir, audio_filename)
with open(audio_path, "wb") as f:
for chunk in speech_response.iter_bytes():
f.write(chunk)
logging.info(f"Audio file saved at {audio_path}.")
audio_file = audio_path
except Exception as e:
logging.error(f"Error during audio generation: {str(e)}")
return None, None, f"Error during audio generation: {str(e)}"
# Save the (translated) text as a script file
script_file = None
if output_option in ["script_text", "both"]:
try:
logging.info("Starting script text generation.")
script_text = translated_text
script_filename = f"script_{hash(script_text)}_{target_language}.txt"
script_path = os.path.join(temp_dir, script_filename)
with open(script_path, "w", encoding='utf-8') as f:
f.write(script_text)
logging.info(f"Script file saved at {script_path}.")
script_file = script_path
except Exception as e:
logging.error(f"Error during script text generation: {str(e)}")
return None, None, f"Error during script text generation: {str(e)}"
status_message = f"Generation completed successfully in {language}!"
logging.info(status_message)
return audio_file, script_file, status_message
except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
return None, None, f"Error: {str(e)}"
|