ddiddu's picture
Create tts.py
2416ba0
import os
import csv
import random
import urllib.request
from PIL import Image
import os
from openai import OpenAI
import time
import pandas as pd
from google.cloud import texttospeech
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'shaikespeare_ck.json'
client = OpenAI(api_key='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')
#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
def adjust_parameters_based_on_emotion(emotion):
if emotion == 'Positive':
return 20, 1.2 # Higher pitch and faster rate for positive emotion
elif emotion == 'Negative':
return -20, 0.8 # Lower pitch and slower rate for negative emotion
else:
return 0, 1.0 # Neutral pitch and rate for neutral or unspecified emotion
def select_voice(gender, natural_voices, standard_voices, used_voices):
# Prioritize natural voices first, then standard voices
voice_pool = natural_voices if any(voice not in used_voices for voice in natural_voices) else standard_voices
voice = random.choice(voice_pool)
while voice in used_voices:
voice = random.choice(voice_pool)
used_voices.add(voice)
return voice
natural_male_voices = [
'en-US-Neural2-A', 'en-US-Neural2-D', 'en-US-Neural2-I', 'en-US-Neural2-J',
'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-D', 'en-US-Wavenet-I', 'en-US-Wavenet-J'
]
standard_male_voices = [
'en-US-News-M','en-US-News-N''en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-D', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-M', 'en-US-Studio-Q'
]
natural_female_voices = [
'en-US-Neural2-C', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H',
'en-US-Wavenet-C', 'en-US-Wavenet-E', 'en-US-Wavenet-F', 'en-US-Wavenet-H','en-US-Wavenet-G'
]
standard_female_voices = ['en-US-News-K', 'en-US-News-L',
'en-US-Standard-C', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Studio-O'
]
def txtToMp3(txt_file_path):
with open(txt_file_path, 'r') as file:
contents = file.readlines()
first_line = contents[0].strip()
os.makedirs(first_line)
title_file_path = os.path.join(first_line, 'title.txt')
with open(title_file_path, 'w') as title_file:
title_file.write(first_line)
title_file_path = os.path.join(first_line, 'author.txt')
second_line = contents[1].strip()
with open(title_file_path, 'w') as title_file:
title_file.write(second_line)
new_file_path = os.path.join(first_line, txt_file_path)
with open(txt_file_path, 'r') as file:
lines = file.readlines()
with open(new_file_path, 'w') as new_file:
for line in lines:
line = line.strip()
if line.startswith('(') and line.endswith(')'):
line = line[1:-1]
new_file.write("NARRATOR\n")
new_file.write(line + '\n')
with open(new_file_path, 'r') as file:
lines = file.readlines()
with open(new_file_path, 'w') as new_file:
for line in lines:
if line != '\n':
new_file.write(line)
with open(new_file_path, 'r') as file:
lines = file.readlines()
with open(new_file_path, 'w') as new_file:
for line in lines[2:]:
new_file.write(line)
new_new_file_path = new_file_path.replace('.txt', '.csv')
with open(new_file_path, 'r') as file:
lines = file.readlines()
odd_lines = lines[::2]
even_lines = lines[1::2]
with open(new_new_file_path, 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Character', 'Line'])
for odd, even in zip(odd_lines, even_lines):
writer.writerow([odd.strip(), even.strip()])
os.remove(new_file_path)
import requests
# API configuration
API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_gender_prediction"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def predict_gender_aggregated(character, lines):
character_gender_mapping = {
"NARRATOR": "Neutral",
"EGEON": "Male",
"DUKE": "Male",
"JAILER": "Male"
}
# Check if the character is in the mapping
if character.upper() in character_gender_mapping:
return character_gender_mapping[character.upper()]
# For other characters, use the inference API
aggregated_text = " ".join(lines)
input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"
# Query the API
api_response = query({"inputs": input_text})
gender_prediction = api_response.get('gender', 'unknown')
return gender_prediction
# Read CSV data into a DataFrame
# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
data = pd.read_csv(new_new_file_path)
# Aggregate lines for each character
character_lines = data.groupby('Character')['Line'].apply(list)
# Create a Series for character genders with the correct character names
character_genders = character_lines.index.to_series().apply(lambda character: predict_gender_aggregated(character, character_lines[character]))
# Map the predicted gender back to the original DataFrame
data['Gender'] = data['Character'].map(character_genders)
# Write the modified DataFrame back to a CSV file
data.to_csv(new_new_file_path, index=False)
print(new_new_file_path)
# #Replace with Jisu's code to predict gender
# with open(new_new_file_path, 'r') as file:
# reader = csv.reader(file)
# rows = list(reader)
# header = rows[0]
# header.append('Gender')
# for row in rows[1:]:
# character = row[0]
# if character == 'NARRATOR' or character == 'FATHER':
# row.append('Male')
# else:
# row.append('Female')
# with open(new_new_file_path, 'w', newline='') as new_file:
# writer = csv.writer(new_file)
# writer.writerows(rows)
# import requests
# API configuration
API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_sentimental_analysis"
headers = {"Authorization": "Bearer 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"}
# def query(payload):
# response = requests.post(API_URL, headers=headers, json=payload)
# return response.json()
# Mapping for converting labels to more readable forms
emotion_mapping = {'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive'}
def get_emotion_from_api(line):
api_response = query({"inputs": line})
# Extracting the label with the highest score
label = sorted(api_response[0], key=lambda x: x['score'], reverse=True)[0]['label']
# Map the label to a more readable form
emotion = emotion_mapping.get(label, 'Unknown')
return emotion
# Reading from the CSV, querying the API for each line, and appending the emotion
# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
with open(new_new_file_path, 'r') as file:
reader = csv.reader(file)
rows = list(reader)
header = rows[0]
header.append('Emotion')
for row in rows[1:]:
emotion = get_emotion_from_api(row[1])
row.append(emotion)
with open(new_new_file_path, 'w', newline='') as new_file:
writer = csv.writer(new_file)
writer.writerows(rows)
# #Replace with Jisu's code to predict emotion
# with open(new_new_file_path, 'r') as file:
# reader = csv.reader(file)
# rows = list(reader)
# header = rows[0]
# header.append('Emotion')
# for row in rows[1:]:
# emotion = random.choice(['Positive', 'Neutral', 'Negative'])
# row.append(emotion)
# with open(new_new_file_path, 'w', newline='') as new_file:
# writer = csv.writer(new_file)
# writer.writerows(rows)
#go through each row in the csv and convert the text to mp3
with open(new_new_file_path, 'r') as csv_file:
reader = csv.reader(csv_file)
rows = list(reader)
header = rows[0]
rows = rows[1:]
#assign each character to a specific voice
female_voices = ['Olivia', 'Amy', 'Danielle', 'Salli', 'Kimberly', 'Kendra', 'Joanna', 'Ruth']
male_voices = ['Brian', 'Arthur', 'Gregory', 'Matthew', 'Joey', 'Stephen']
character_voice_dict = {}
for row in rows:
character = row[0]
if character not in character_voice_dict:
if row[2] == 'Male':
voice = random.choice(male_voices)
male_voices.remove(voice)
else:
voice = random.choice(female_voices)
female_voices.remove(voice)
character_voice_dict[character] = voice
output_folder = os.path.join(first_line, 'audio_files') # Specify the folder to save the audio files in the directory you created earlier
os.makedirs(output_folder, exist_ok=True) # Create the output folder if it doesn't exist
df = pd.read_csv(new_new_file_path)
tts_client = texttospeech.TextToSpeechClient()
used_male_voices, used_female_voices = set(), set()
character_voices = {}
for index, row in df.iterrows():
character = row['Character']
gender = row['Gender']
emotion = row['Emotion']
if character not in character_voices:
if gender == 'Male':
character_voices[character] = select_voice(gender, natural_male_voices, standard_male_voices, used_male_voices)
else:
character_voices[character] = select_voice(gender, natural_female_voices, standard_female_voices, used_female_voices)
voice_name = character_voices[character]
pitch, rate = adjust_parameters_based_on_emotion(emotion)
text = f"<speak><prosody pitch='{pitch}%' rate='{rate}'>{row['Line']}</prosody></speak>"
synthesis_input = texttospeech.SynthesisInput(ssml=text)
voice_params = texttospeech.VoiceSelectionParams(language_code='en-US', name=voice_name)
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
response = tts_client.synthesize_speech(input=synthesis_input, voice=voice_params, audio_config=audio_config)
file_name = os.path.join(output_folder, f'{index}.mp3')
with open(file_name, 'wb') as out:
out.write(response.audio_content)
with open(new_new_file_path, 'r') as file:
reader = csv.reader(file)
groupOfThreeLines = []
for i, row in enumerate(reader):
if i == 0:
continue
groupOfThreeLines.append(row[0] + ':' + row[1] + '\n')
if len(groupOfThreeLines) == 3:
prompt = ''.join(groupOfThreeLines)
groupOfThreeLines = []
print(prompt)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
{"role": "user", "content": prompt}
]
)
print(response.choices[0].message.content)
responsePic = client.images.generate(
model="dall-e-3",
prompt=response.choices[0].message.content,
size="1792x1024",
quality="standard",
n=1,
)
image_url = responsePic.data[0].url
image_folder = os.path.join(first_line, 'images')
os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
image_path = os.path.join(image_folder, str(i) + '.png')
urllib.request.urlretrieve(image_url, image_path)
time.sleep(60)
prompt = ' '.join(groupOfThreeLines)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
{"role": "user", "content": prompt}
]
)
print(response.choices[0].message.content)
responsePic = client.images.generate(
model="dall-e-3",
prompt=response.choices[0].message.content,
size="1792x1024",
quality="standard",
n=1,
)
image_url = responsePic.data[0].url
os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
image_path = os.path.join(image_folder, str(i) + '.png')
urllib.request.urlretrieve(image_url, image_path)
# Usage example
txt_file_path = 'TheComedyOfErrors.txt'
txtToMp3(txt_file_path)