Spaces:
Runtime error
Runtime error
Create tts.py
Browse files
tts.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import csv
|
3 |
+
import random
|
4 |
+
import urllib.request
|
5 |
+
from PIL import Image
|
6 |
+
import os
|
7 |
+
from openai import OpenAI
|
8 |
+
import time
|
9 |
+
import pandas as pd
|
10 |
+
from google.cloud import texttospeech
|
11 |
+
|
12 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'shaikespeare_ck.json'
|
13 |
+
|
14 |
+
client = OpenAI(api_key='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')
|
15 |
+
#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
|
16 |
+
|
17 |
+
def adjust_parameters_based_on_emotion(emotion):
|
18 |
+
if emotion == 'Positive':
|
19 |
+
return 20, 1.2 # Higher pitch and faster rate for positive emotion
|
20 |
+
elif emotion == 'Negative':
|
21 |
+
return -20, 0.8 # Lower pitch and slower rate for negative emotion
|
22 |
+
else:
|
23 |
+
return 0, 1.0 # Neutral pitch and rate for neutral or unspecified emotion
|
24 |
+
|
25 |
+
def select_voice(gender, natural_voices, standard_voices, used_voices):
|
26 |
+
# Prioritize natural voices first, then standard voices
|
27 |
+
voice_pool = natural_voices if any(voice not in used_voices for voice in natural_voices) else standard_voices
|
28 |
+
voice = random.choice(voice_pool)
|
29 |
+
while voice in used_voices:
|
30 |
+
voice = random.choice(voice_pool)
|
31 |
+
used_voices.add(voice)
|
32 |
+
return voice
|
33 |
+
|
34 |
+
natural_male_voices = [
|
35 |
+
'en-US-Neural2-A', 'en-US-Neural2-D', 'en-US-Neural2-I', 'en-US-Neural2-J',
|
36 |
+
'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-D', 'en-US-Wavenet-I', 'en-US-Wavenet-J'
|
37 |
+
]
|
38 |
+
standard_male_voices = [
|
39 |
+
'en-US-News-M','en-US-News-N''en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-D', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-M', 'en-US-Studio-Q'
|
40 |
+
]
|
41 |
+
|
42 |
+
natural_female_voices = [
|
43 |
+
'en-US-Neural2-C', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H',
|
44 |
+
'en-US-Wavenet-C', 'en-US-Wavenet-E', 'en-US-Wavenet-F', 'en-US-Wavenet-H','en-US-Wavenet-G'
|
45 |
+
]
|
46 |
+
standard_female_voices = ['en-US-News-K', 'en-US-News-L',
|
47 |
+
'en-US-Standard-C', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Studio-O'
|
48 |
+
]
|
49 |
+
|
50 |
+
def txtToMp3(txt_file_path):
|
51 |
+
with open(txt_file_path, 'r') as file:
|
52 |
+
contents = file.readlines()
|
53 |
+
first_line = contents[0].strip()
|
54 |
+
os.makedirs(first_line)
|
55 |
+
title_file_path = os.path.join(first_line, 'title.txt')
|
56 |
+
with open(title_file_path, 'w') as title_file:
|
57 |
+
title_file.write(first_line)
|
58 |
+
title_file_path = os.path.join(first_line, 'author.txt')
|
59 |
+
second_line = contents[1].strip()
|
60 |
+
with open(title_file_path, 'w') as title_file:
|
61 |
+
title_file.write(second_line)
|
62 |
+
new_file_path = os.path.join(first_line, txt_file_path)
|
63 |
+
with open(txt_file_path, 'r') as file:
|
64 |
+
lines = file.readlines()
|
65 |
+
with open(new_file_path, 'w') as new_file:
|
66 |
+
for line in lines:
|
67 |
+
line = line.strip()
|
68 |
+
if line.startswith('(') and line.endswith(')'):
|
69 |
+
line = line[1:-1]
|
70 |
+
new_file.write("NARRATOR\n")
|
71 |
+
new_file.write(line + '\n')
|
72 |
+
with open(new_file_path, 'r') as file:
|
73 |
+
lines = file.readlines()
|
74 |
+
with open(new_file_path, 'w') as new_file:
|
75 |
+
for line in lines:
|
76 |
+
if line != '\n':
|
77 |
+
new_file.write(line)
|
78 |
+
with open(new_file_path, 'r') as file:
|
79 |
+
lines = file.readlines()
|
80 |
+
with open(new_file_path, 'w') as new_file:
|
81 |
+
for line in lines[2:]:
|
82 |
+
new_file.write(line)
|
83 |
+
new_new_file_path = new_file_path.replace('.txt', '.csv')
|
84 |
+
with open(new_file_path, 'r') as file:
|
85 |
+
lines = file.readlines()
|
86 |
+
|
87 |
+
odd_lines = lines[::2]
|
88 |
+
even_lines = lines[1::2]
|
89 |
+
|
90 |
+
with open(new_new_file_path, 'w', newline='') as csv_file:
|
91 |
+
writer = csv.writer(csv_file)
|
92 |
+
writer.writerow(['Character', 'Line'])
|
93 |
+
for odd, even in zip(odd_lines, even_lines):
|
94 |
+
writer.writerow([odd.strip(), even.strip()])
|
95 |
+
os.remove(new_file_path)
|
96 |
+
|
97 |
+
import requests
|
98 |
+
|
99 |
+
# API configuration
|
100 |
+
API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_gender_prediction"
|
101 |
+
headers = {"Authorization": "Bearer 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"}
|
102 |
+
|
103 |
+
def query(payload):
|
104 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
105 |
+
return response.json()
|
106 |
+
|
107 |
+
def predict_gender_aggregated(character, lines):
|
108 |
+
character_gender_mapping = {
|
109 |
+
"NARRATOR": "Neutral",
|
110 |
+
"EGEON": "Male",
|
111 |
+
"DUKE": "Male",
|
112 |
+
"JAILER": "Male"
|
113 |
+
}
|
114 |
+
|
115 |
+
# Check if the character is in the mapping
|
116 |
+
if character.upper() in character_gender_mapping:
|
117 |
+
return character_gender_mapping[character.upper()]
|
118 |
+
|
119 |
+
# For other characters, use the inference API
|
120 |
+
aggregated_text = " ".join(lines)
|
121 |
+
input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"
|
122 |
+
|
123 |
+
# Query the API
|
124 |
+
api_response = query({"inputs": input_text})
|
125 |
+
gender_prediction = api_response.get('gender', 'unknown')
|
126 |
+
|
127 |
+
return gender_prediction
|
128 |
+
|
129 |
+
# Read CSV data into a DataFrame
|
130 |
+
# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
|
131 |
+
data = pd.read_csv(new_new_file_path)
|
132 |
+
|
133 |
+
# Aggregate lines for each character
|
134 |
+
character_lines = data.groupby('Character')['Line'].apply(list)
|
135 |
+
|
136 |
+
# Create a Series for character genders with the correct character names
|
137 |
+
character_genders = character_lines.index.to_series().apply(lambda character: predict_gender_aggregated(character, character_lines[character]))
|
138 |
+
|
139 |
+
# Map the predicted gender back to the original DataFrame
|
140 |
+
data['Gender'] = data['Character'].map(character_genders)
|
141 |
+
|
142 |
+
# Write the modified DataFrame back to a CSV file
|
143 |
+
data.to_csv(new_new_file_path, index=False)
|
144 |
+
print(new_new_file_path)
|
145 |
+
|
146 |
+
# #Replace with Jisu's code to predict gender
|
147 |
+
# with open(new_new_file_path, 'r') as file:
|
148 |
+
# reader = csv.reader(file)
|
149 |
+
# rows = list(reader)
|
150 |
+
# header = rows[0]
|
151 |
+
# header.append('Gender')
|
152 |
+
# for row in rows[1:]:
|
153 |
+
# character = row[0]
|
154 |
+
# if character == 'NARRATOR' or character == 'FATHER':
|
155 |
+
# row.append('Male')
|
156 |
+
# else:
|
157 |
+
# row.append('Female')
|
158 |
+
# with open(new_new_file_path, 'w', newline='') as new_file:
|
159 |
+
# writer = csv.writer(new_file)
|
160 |
+
# writer.writerows(rows)
|
161 |
+
|
162 |
+
# import requests
|
163 |
+
|
164 |
+
# API configuration
|
165 |
+
API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_sentimental_analysis"
|
166 |
+
headers = {"Authorization": "Bearer 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"}
|
167 |
+
|
168 |
+
# def query(payload):
|
169 |
+
# response = requests.post(API_URL, headers=headers, json=payload)
|
170 |
+
# return response.json()
|
171 |
+
|
172 |
+
# Mapping for converting labels to more readable forms
|
173 |
+
emotion_mapping = {'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive'}
|
174 |
+
|
175 |
+
def get_emotion_from_api(line):
|
176 |
+
api_response = query({"inputs": line})
|
177 |
+
# Extracting the label with the highest score
|
178 |
+
label = sorted(api_response[0], key=lambda x: x['score'], reverse=True)[0]['label']
|
179 |
+
# Map the label to a more readable form
|
180 |
+
emotion = emotion_mapping.get(label, 'Unknown')
|
181 |
+
return emotion
|
182 |
+
|
183 |
+
# Reading from the CSV, querying the API for each line, and appending the emotion
|
184 |
+
# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
|
185 |
+
with open(new_new_file_path, 'r') as file:
|
186 |
+
reader = csv.reader(file)
|
187 |
+
rows = list(reader)
|
188 |
+
header = rows[0]
|
189 |
+
header.append('Emotion')
|
190 |
+
for row in rows[1:]:
|
191 |
+
emotion = get_emotion_from_api(row[1])
|
192 |
+
row.append(emotion)
|
193 |
+
with open(new_new_file_path, 'w', newline='') as new_file:
|
194 |
+
writer = csv.writer(new_file)
|
195 |
+
writer.writerows(rows)
|
196 |
+
|
197 |
+
# #Replace with Jisu's code to predict emotion
|
198 |
+
# with open(new_new_file_path, 'r') as file:
|
199 |
+
# reader = csv.reader(file)
|
200 |
+
# rows = list(reader)
|
201 |
+
# header = rows[0]
|
202 |
+
# header.append('Emotion')
|
203 |
+
# for row in rows[1:]:
|
204 |
+
# emotion = random.choice(['Positive', 'Neutral', 'Negative'])
|
205 |
+
# row.append(emotion)
|
206 |
+
# with open(new_new_file_path, 'w', newline='') as new_file:
|
207 |
+
# writer = csv.writer(new_file)
|
208 |
+
# writer.writerows(rows)
|
209 |
+
|
210 |
+
#go through each row in the csv and convert the text to mp3
|
211 |
+
with open(new_new_file_path, 'r') as csv_file:
|
212 |
+
reader = csv.reader(csv_file)
|
213 |
+
rows = list(reader)
|
214 |
+
header = rows[0]
|
215 |
+
rows = rows[1:]
|
216 |
+
#assign each character to a specific voice
|
217 |
+
female_voices = ['Olivia', 'Amy', 'Danielle', 'Salli', 'Kimberly', 'Kendra', 'Joanna', 'Ruth']
|
218 |
+
male_voices = ['Brian', 'Arthur', 'Gregory', 'Matthew', 'Joey', 'Stephen']
|
219 |
+
character_voice_dict = {}
|
220 |
+
for row in rows:
|
221 |
+
character = row[0]
|
222 |
+
if character not in character_voice_dict:
|
223 |
+
if row[2] == 'Male':
|
224 |
+
voice = random.choice(male_voices)
|
225 |
+
male_voices.remove(voice)
|
226 |
+
else:
|
227 |
+
voice = random.choice(female_voices)
|
228 |
+
female_voices.remove(voice)
|
229 |
+
character_voice_dict[character] = voice
|
230 |
+
output_folder = os.path.join(first_line, 'audio_files') # Specify the folder to save the audio files in the directory you created earlier
|
231 |
+
os.makedirs(output_folder, exist_ok=True) # Create the output folder if it doesn't exist
|
232 |
+
|
233 |
+
df = pd.read_csv(new_new_file_path)
|
234 |
+
tts_client = texttospeech.TextToSpeechClient()
|
235 |
+
used_male_voices, used_female_voices = set(), set()
|
236 |
+
character_voices = {}
|
237 |
+
for index, row in df.iterrows():
|
238 |
+
character = row['Character']
|
239 |
+
gender = row['Gender']
|
240 |
+
emotion = row['Emotion']
|
241 |
+
|
242 |
+
if character not in character_voices:
|
243 |
+
if gender == 'Male':
|
244 |
+
character_voices[character] = select_voice(gender, natural_male_voices, standard_male_voices, used_male_voices)
|
245 |
+
else:
|
246 |
+
character_voices[character] = select_voice(gender, natural_female_voices, standard_female_voices, used_female_voices)
|
247 |
+
|
248 |
+
voice_name = character_voices[character]
|
249 |
+
pitch, rate = adjust_parameters_based_on_emotion(emotion)
|
250 |
+
text = f"<speak><prosody pitch='{pitch}%' rate='{rate}'>{row['Line']}</prosody></speak>"
|
251 |
+
|
252 |
+
synthesis_input = texttospeech.SynthesisInput(ssml=text)
|
253 |
+
voice_params = texttospeech.VoiceSelectionParams(language_code='en-US', name=voice_name)
|
254 |
+
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
|
255 |
+
|
256 |
+
response = tts_client.synthesize_speech(input=synthesis_input, voice=voice_params, audio_config=audio_config)
|
257 |
+
file_name = os.path.join(output_folder, f'{index}.mp3')
|
258 |
+
with open(file_name, 'wb') as out:
|
259 |
+
out.write(response.audio_content)
|
260 |
+
|
261 |
+
with open(new_new_file_path, 'r') as file:
|
262 |
+
reader = csv.reader(file)
|
263 |
+
groupOfThreeLines = []
|
264 |
+
for i, row in enumerate(reader):
|
265 |
+
if i == 0:
|
266 |
+
continue
|
267 |
+
groupOfThreeLines.append(row[0] + ':' + row[1] + '\n')
|
268 |
+
if len(groupOfThreeLines) == 3:
|
269 |
+
prompt = ''.join(groupOfThreeLines)
|
270 |
+
groupOfThreeLines = []
|
271 |
+
print(prompt)
|
272 |
+
response = client.chat.completions.create(
|
273 |
+
model="gpt-3.5-turbo",
|
274 |
+
messages=[
|
275 |
+
{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
|
276 |
+
{"role": "user", "content": prompt}
|
277 |
+
]
|
278 |
+
)
|
279 |
+
print(response.choices[0].message.content)
|
280 |
+
responsePic = client.images.generate(
|
281 |
+
model="dall-e-3",
|
282 |
+
prompt=response.choices[0].message.content,
|
283 |
+
size="1792x1024",
|
284 |
+
quality="standard",
|
285 |
+
n=1,
|
286 |
+
)
|
287 |
+
image_url = responsePic.data[0].url
|
288 |
+
image_folder = os.path.join(first_line, 'images')
|
289 |
+
os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
|
290 |
+
image_path = os.path.join(image_folder, str(i) + '.png')
|
291 |
+
urllib.request.urlretrieve(image_url, image_path)
|
292 |
+
time.sleep(60)
|
293 |
+
prompt = ' '.join(groupOfThreeLines)
|
294 |
+
response = client.chat.completions.create(
|
295 |
+
model="gpt-3.5-turbo",
|
296 |
+
messages=[
|
297 |
+
{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
|
298 |
+
{"role": "user", "content": prompt}
|
299 |
+
]
|
300 |
+
)
|
301 |
+
print(response.choices[0].message.content)
|
302 |
+
responsePic = client.images.generate(
|
303 |
+
model="dall-e-3",
|
304 |
+
prompt=response.choices[0].message.content,
|
305 |
+
size="1792x1024",
|
306 |
+
quality="standard",
|
307 |
+
n=1,
|
308 |
+
)
|
309 |
+
image_url = responsePic.data[0].url
|
310 |
+
os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
|
311 |
+
image_path = os.path.join(image_folder, str(i) + '.png')
|
312 |
+
urllib.request.urlretrieve(image_url, image_path)
|
313 |
+
|
314 |
+
# Usage example
|
315 |
+
txt_file_path = 'TheComedyOfErrors.txt'
|
316 |
+
txtToMp3(txt_file_path)
|