ddiddu commited on
Commit
2416ba0
1 Parent(s): 58b312e

Create tts.py

Browse files
Files changed (1) hide show
  1. tts.py +316 -0
tts.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import random
4
+ import urllib.request
5
+ from PIL import Image
6
+ import os
7
+ from openai import OpenAI
8
+ import time
9
+ import pandas as pd
10
+ from google.cloud import texttospeech
11
+
12
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'shaikespeare_ck.json'
13
+
14
+ client = OpenAI(api_key='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')
15
+ #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
16
+
17
+ def adjust_parameters_based_on_emotion(emotion):
18
+ if emotion == 'Positive':
19
+ return 20, 1.2 # Higher pitch and faster rate for positive emotion
20
+ elif emotion == 'Negative':
21
+ return -20, 0.8 # Lower pitch and slower rate for negative emotion
22
+ else:
23
+ return 0, 1.0 # Neutral pitch and rate for neutral or unspecified emotion
24
+
25
+ def select_voice(gender, natural_voices, standard_voices, used_voices):
26
+ # Prioritize natural voices first, then standard voices
27
+ voice_pool = natural_voices if any(voice not in used_voices for voice in natural_voices) else standard_voices
28
+ voice = random.choice(voice_pool)
29
+ while voice in used_voices:
30
+ voice = random.choice(voice_pool)
31
+ used_voices.add(voice)
32
+ return voice
33
+
34
+ natural_male_voices = [
35
+ 'en-US-Neural2-A', 'en-US-Neural2-D', 'en-US-Neural2-I', 'en-US-Neural2-J',
36
+ 'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-D', 'en-US-Wavenet-I', 'en-US-Wavenet-J'
37
+ ]
38
+ standard_male_voices = [
39
+ 'en-US-News-M','en-US-News-N''en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-D', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-M', 'en-US-Studio-Q'
40
+ ]
41
+
42
+ natural_female_voices = [
43
+ 'en-US-Neural2-C', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H',
44
+ 'en-US-Wavenet-C', 'en-US-Wavenet-E', 'en-US-Wavenet-F', 'en-US-Wavenet-H','en-US-Wavenet-G'
45
+ ]
46
+ standard_female_voices = ['en-US-News-K', 'en-US-News-L',
47
+ 'en-US-Standard-C', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Studio-O'
48
+ ]
49
+
50
+ def txtToMp3(txt_file_path):
51
+ with open(txt_file_path, 'r') as file:
52
+ contents = file.readlines()
53
+ first_line = contents[0].strip()
54
+ os.makedirs(first_line)
55
+ title_file_path = os.path.join(first_line, 'title.txt')
56
+ with open(title_file_path, 'w') as title_file:
57
+ title_file.write(first_line)
58
+ title_file_path = os.path.join(first_line, 'author.txt')
59
+ second_line = contents[1].strip()
60
+ with open(title_file_path, 'w') as title_file:
61
+ title_file.write(second_line)
62
+ new_file_path = os.path.join(first_line, txt_file_path)
63
+ with open(txt_file_path, 'r') as file:
64
+ lines = file.readlines()
65
+ with open(new_file_path, 'w') as new_file:
66
+ for line in lines:
67
+ line = line.strip()
68
+ if line.startswith('(') and line.endswith(')'):
69
+ line = line[1:-1]
70
+ new_file.write("NARRATOR\n")
71
+ new_file.write(line + '\n')
72
+ with open(new_file_path, 'r') as file:
73
+ lines = file.readlines()
74
+ with open(new_file_path, 'w') as new_file:
75
+ for line in lines:
76
+ if line != '\n':
77
+ new_file.write(line)
78
+ with open(new_file_path, 'r') as file:
79
+ lines = file.readlines()
80
+ with open(new_file_path, 'w') as new_file:
81
+ for line in lines[2:]:
82
+ new_file.write(line)
83
+ new_new_file_path = new_file_path.replace('.txt', '.csv')
84
+ with open(new_file_path, 'r') as file:
85
+ lines = file.readlines()
86
+
87
+ odd_lines = lines[::2]
88
+ even_lines = lines[1::2]
89
+
90
+ with open(new_new_file_path, 'w', newline='') as csv_file:
91
+ writer = csv.writer(csv_file)
92
+ writer.writerow(['Character', 'Line'])
93
+ for odd, even in zip(odd_lines, even_lines):
94
+ writer.writerow([odd.strip(), even.strip()])
95
+ os.remove(new_file_path)
96
+
97
+ import requests
98
+
99
+ # API configuration
100
+ API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_gender_prediction"
101
+ headers = {"Authorization": "Bearer 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"}
102
+
103
+ def query(payload):
104
+ response = requests.post(API_URL, headers=headers, json=payload)
105
+ return response.json()
106
+
107
+ def predict_gender_aggregated(character, lines):
108
+ character_gender_mapping = {
109
+ "NARRATOR": "Neutral",
110
+ "EGEON": "Male",
111
+ "DUKE": "Male",
112
+ "JAILER": "Male"
113
+ }
114
+
115
+ # Check if the character is in the mapping
116
+ if character.upper() in character_gender_mapping:
117
+ return character_gender_mapping[character.upper()]
118
+
119
+ # For other characters, use the inference API
120
+ aggregated_text = " ".join(lines)
121
+ input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"
122
+
123
+ # Query the API
124
+ api_response = query({"inputs": input_text})
125
+ gender_prediction = api_response.get('gender', 'unknown')
126
+
127
+ return gender_prediction
128
+
129
+ # Read CSV data into a DataFrame
130
+ # new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
131
+ data = pd.read_csv(new_new_file_path)
132
+
133
+ # Aggregate lines for each character
134
+ character_lines = data.groupby('Character')['Line'].apply(list)
135
+
136
+ # Create a Series for character genders with the correct character names
137
+ character_genders = character_lines.index.to_series().apply(lambda character: predict_gender_aggregated(character, character_lines[character]))
138
+
139
+ # Map the predicted gender back to the original DataFrame
140
+ data['Gender'] = data['Character'].map(character_genders)
141
+
142
+ # Write the modified DataFrame back to a CSV file
143
+ data.to_csv(new_new_file_path, index=False)
144
+ print(new_new_file_path)
145
+
146
+ # #Replace with Jisu's code to predict gender
147
+ # with open(new_new_file_path, 'r') as file:
148
+ # reader = csv.reader(file)
149
+ # rows = list(reader)
150
+ # header = rows[0]
151
+ # header.append('Gender')
152
+ # for row in rows[1:]:
153
+ # character = row[0]
154
+ # if character == 'NARRATOR' or character == 'FATHER':
155
+ # row.append('Male')
156
+ # else:
157
+ # row.append('Female')
158
+ # with open(new_new_file_path, 'w', newline='') as new_file:
159
+ # writer = csv.writer(new_file)
160
+ # writer.writerows(rows)
161
+
162
+ # import requests
163
+
164
+ # API configuration
165
+ API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_sentimental_analysis"
166
+ headers = {"Authorization": "Bearer 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"}
167
+
168
+ # def query(payload):
169
+ # response = requests.post(API_URL, headers=headers, json=payload)
170
+ # return response.json()
171
+
172
+ # Mapping for converting labels to more readable forms
173
+ emotion_mapping = {'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive'}
174
+
175
+ def get_emotion_from_api(line):
176
+ api_response = query({"inputs": line})
177
+ # Extracting the label with the highest score
178
+ label = sorted(api_response[0], key=lambda x: x['score'], reverse=True)[0]['label']
179
+ # Map the label to a more readable form
180
+ emotion = emotion_mapping.get(label, 'Unknown')
181
+ return emotion
182
+
183
+ # Reading from the CSV, querying the API for each line, and appending the emotion
184
+ # new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
185
+ with open(new_new_file_path, 'r') as file:
186
+ reader = csv.reader(file)
187
+ rows = list(reader)
188
+ header = rows[0]
189
+ header.append('Emotion')
190
+ for row in rows[1:]:
191
+ emotion = get_emotion_from_api(row[1])
192
+ row.append(emotion)
193
+ with open(new_new_file_path, 'w', newline='') as new_file:
194
+ writer = csv.writer(new_file)
195
+ writer.writerows(rows)
196
+
197
+ # #Replace with Jisu's code to predict emotion
198
+ # with open(new_new_file_path, 'r') as file:
199
+ # reader = csv.reader(file)
200
+ # rows = list(reader)
201
+ # header = rows[0]
202
+ # header.append('Emotion')
203
+ # for row in rows[1:]:
204
+ # emotion = random.choice(['Positive', 'Neutral', 'Negative'])
205
+ # row.append(emotion)
206
+ # with open(new_new_file_path, 'w', newline='') as new_file:
207
+ # writer = csv.writer(new_file)
208
+ # writer.writerows(rows)
209
+
210
+ #go through each row in the csv and convert the text to mp3
211
+ with open(new_new_file_path, 'r') as csv_file:
212
+ reader = csv.reader(csv_file)
213
+ rows = list(reader)
214
+ header = rows[0]
215
+ rows = rows[1:]
216
+ #assign each character to a specific voice
217
+ female_voices = ['Olivia', 'Amy', 'Danielle', 'Salli', 'Kimberly', 'Kendra', 'Joanna', 'Ruth']
218
+ male_voices = ['Brian', 'Arthur', 'Gregory', 'Matthew', 'Joey', 'Stephen']
219
+ character_voice_dict = {}
220
+ for row in rows:
221
+ character = row[0]
222
+ if character not in character_voice_dict:
223
+ if row[2] == 'Male':
224
+ voice = random.choice(male_voices)
225
+ male_voices.remove(voice)
226
+ else:
227
+ voice = random.choice(female_voices)
228
+ female_voices.remove(voice)
229
+ character_voice_dict[character] = voice
230
+ output_folder = os.path.join(first_line, 'audio_files') # Specify the folder to save the audio files in the directory you created earlier
231
+ os.makedirs(output_folder, exist_ok=True) # Create the output folder if it doesn't exist
232
+
233
+ df = pd.read_csv(new_new_file_path)
234
+ tts_client = texttospeech.TextToSpeechClient()
235
+ used_male_voices, used_female_voices = set(), set()
236
+ character_voices = {}
237
+ for index, row in df.iterrows():
238
+ character = row['Character']
239
+ gender = row['Gender']
240
+ emotion = row['Emotion']
241
+
242
+ if character not in character_voices:
243
+ if gender == 'Male':
244
+ character_voices[character] = select_voice(gender, natural_male_voices, standard_male_voices, used_male_voices)
245
+ else:
246
+ character_voices[character] = select_voice(gender, natural_female_voices, standard_female_voices, used_female_voices)
247
+
248
+ voice_name = character_voices[character]
249
+ pitch, rate = adjust_parameters_based_on_emotion(emotion)
250
+ text = f"<speak><prosody pitch='{pitch}%' rate='{rate}'>{row['Line']}</prosody></speak>"
251
+
252
+ synthesis_input = texttospeech.SynthesisInput(ssml=text)
253
+ voice_params = texttospeech.VoiceSelectionParams(language_code='en-US', name=voice_name)
254
+ audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
255
+
256
+ response = tts_client.synthesize_speech(input=synthesis_input, voice=voice_params, audio_config=audio_config)
257
+ file_name = os.path.join(output_folder, f'{index}.mp3')
258
+ with open(file_name, 'wb') as out:
259
+ out.write(response.audio_content)
260
+
261
+ with open(new_new_file_path, 'r') as file:
262
+ reader = csv.reader(file)
263
+ groupOfThreeLines = []
264
+ for i, row in enumerate(reader):
265
+ if i == 0:
266
+ continue
267
+ groupOfThreeLines.append(row[0] + ':' + row[1] + '\n')
268
+ if len(groupOfThreeLines) == 3:
269
+ prompt = ''.join(groupOfThreeLines)
270
+ groupOfThreeLines = []
271
+ print(prompt)
272
+ response = client.chat.completions.create(
273
+ model="gpt-3.5-turbo",
274
+ messages=[
275
+ {"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
276
+ {"role": "user", "content": prompt}
277
+ ]
278
+ )
279
+ print(response.choices[0].message.content)
280
+ responsePic = client.images.generate(
281
+ model="dall-e-3",
282
+ prompt=response.choices[0].message.content,
283
+ size="1792x1024",
284
+ quality="standard",
285
+ n=1,
286
+ )
287
+ image_url = responsePic.data[0].url
288
+ image_folder = os.path.join(first_line, 'images')
289
+ os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
290
+ image_path = os.path.join(image_folder, str(i) + '.png')
291
+ urllib.request.urlretrieve(image_url, image_path)
292
+ time.sleep(60)
293
+ prompt = ' '.join(groupOfThreeLines)
294
+ response = client.chat.completions.create(
295
+ model="gpt-3.5-turbo",
296
+ messages=[
297
+ {"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
298
+ {"role": "user", "content": prompt}
299
+ ]
300
+ )
301
+ print(response.choices[0].message.content)
302
+ responsePic = client.images.generate(
303
+ model="dall-e-3",
304
+ prompt=response.choices[0].message.content,
305
+ size="1792x1024",
306
+ quality="standard",
307
+ n=1,
308
+ )
309
+ image_url = responsePic.data[0].url
310
+ os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
311
+ image_path = os.path.join(image_folder, str(i) + '.png')
312
+ urllib.request.urlretrieve(image_url, image_path)
313
+
314
+ # Usage example
315
+ txt_file_path = 'TheComedyOfErrors.txt'
316
+ txtToMp3(txt_file_path)