Spaces:

Startup-Exchange
/

tech_poet_society

Runtime error

App Files Files Community

tech_poet_society / tts.py

ddiddu

Create tts.py

2416ba0 12 months ago

raw

history blame contribute delete

13.4 kB

	import os
	import csv
	import random
	import urllib.request
	from PIL import Image
	import os
	from openai import OpenAI
	import time
	import pandas as pd
	from google.cloud import texttospeech

	os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'shaikespeare_ck.json'

	client = OpenAI(api_key='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')
	#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

	def adjust_parameters_based_on_emotion(emotion):
	if emotion == 'Positive':
	return 20, 1.2 # Higher pitch and faster rate for positive emotion
	elif emotion == 'Negative':
	return -20, 0.8 # Lower pitch and slower rate for negative emotion
	else:
	return 0, 1.0 # Neutral pitch and rate for neutral or unspecified emotion

	def select_voice(gender, natural_voices, standard_voices, used_voices):
	# Prioritize natural voices first, then standard voices
	voice_pool = natural_voices if any(voice not in used_voices for voice in natural_voices) else standard_voices
	voice = random.choice(voice_pool)
	while voice in used_voices:
	voice = random.choice(voice_pool)
	used_voices.add(voice)
	return voice

	natural_male_voices = [
	'en-US-Neural2-A', 'en-US-Neural2-D', 'en-US-Neural2-I', 'en-US-Neural2-J',
	'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-D', 'en-US-Wavenet-I', 'en-US-Wavenet-J'
	]
	standard_male_voices = [
	'en-US-News-M','en-US-News-N''en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-D', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-M', 'en-US-Studio-Q'
	]

	natural_female_voices = [
	'en-US-Neural2-C', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H',
	'en-US-Wavenet-C', 'en-US-Wavenet-E', 'en-US-Wavenet-F', 'en-US-Wavenet-H','en-US-Wavenet-G'
	]
	standard_female_voices = ['en-US-News-K', 'en-US-News-L',
	'en-US-Standard-C', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Studio-O'
	]

	def txtToMp3(txt_file_path):
	with open(txt_file_path, 'r') as file:
	contents = file.readlines()
	first_line = contents[0].strip()
	os.makedirs(first_line)
	title_file_path = os.path.join(first_line, 'title.txt')
	with open(title_file_path, 'w') as title_file:
	title_file.write(first_line)
	title_file_path = os.path.join(first_line, 'author.txt')
	second_line = contents[1].strip()
	with open(title_file_path, 'w') as title_file:
	title_file.write(second_line)
	new_file_path = os.path.join(first_line, txt_file_path)
	with open(txt_file_path, 'r') as file:
	lines = file.readlines()
	with open(new_file_path, 'w') as new_file:
	for line in lines:
	line = line.strip()
	if line.startswith('(') and line.endswith(')'):
	line = line[1:-1]
	new_file.write("NARRATOR\n")
	new_file.write(line + '\n')
	with open(new_file_path, 'r') as file:
	lines = file.readlines()
	with open(new_file_path, 'w') as new_file:
	for line in lines:
	if line != '\n':
	new_file.write(line)
	with open(new_file_path, 'r') as file:
	lines = file.readlines()
	with open(new_file_path, 'w') as new_file:
	for line in lines[2:]:
	new_file.write(line)
	new_new_file_path = new_file_path.replace('.txt', '.csv')
	with open(new_file_path, 'r') as file:
	lines = file.readlines()

	odd_lines = lines[::2]
	even_lines = lines[1::2]

	with open(new_new_file_path, 'w', newline='') as csv_file:
	writer = csv.writer(csv_file)
	writer.writerow(['Character', 'Line'])
	for odd, even in zip(odd_lines, even_lines):
	writer.writerow([odd.strip(), even.strip()])
	os.remove(new_file_path)

	import requests

	# API configuration
	API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_gender_prediction"
	headers = {"Authorization": "Bearer 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"}

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	def predict_gender_aggregated(character, lines):
	character_gender_mapping = {
	"NARRATOR": "Neutral",
	"EGEON": "Male",
	"DUKE": "Male",
	"JAILER": "Male"
	}

	# Check if the character is in the mapping
	if character.upper() in character_gender_mapping:
	return character_gender_mapping[character.upper()]

	# For other characters, use the inference API
	aggregated_text = " ".join(lines)
	input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"

	# Query the API
	api_response = query({"inputs": input_text})
	gender_prediction = api_response.get('gender', 'unknown')

	return gender_prediction

	# Read CSV data into a DataFrame
	# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
	data = pd.read_csv(new_new_file_path)

	# Aggregate lines for each character
	character_lines = data.groupby('Character')['Line'].apply(list)

	# Create a Series for character genders with the correct character names
	character_genders = character_lines.index.to_series().apply(lambda character: predict_gender_aggregated(character, character_lines[character]))

	# Map the predicted gender back to the original DataFrame
	data['Gender'] = data['Character'].map(character_genders)

	# Write the modified DataFrame back to a CSV file
	data.to_csv(new_new_file_path, index=False)
	print(new_new_file_path)

	# #Replace with Jisu's code to predict gender
	# with open(new_new_file_path, 'r') as file:
	# reader = csv.reader(file)
	# rows = list(reader)
	# header = rows[0]
	# header.append('Gender')
	# for row in rows[1:]:
	# character = row[0]
	# if character == 'NARRATOR' or character == 'FATHER':
	# row.append('Male')
	# else:
	# row.append('Female')
	# with open(new_new_file_path, 'w', newline='') as new_file:
	# writer = csv.writer(new_file)
	# writer.writerows(rows)

	# import requests

	# API configuration
	API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_sentimental_analysis"
	headers = {"Authorization": "Bearer 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"}

	# def query(payload):
	# response = requests.post(API_URL, headers=headers, json=payload)
	# return response.json()

	# Mapping for converting labels to more readable forms
	emotion_mapping = {'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive'}

	def get_emotion_from_api(line):
	api_response = query({"inputs": line})
	# Extracting the label with the highest score
	label = sorted(api_response[0], key=lambda x: x['score'], reverse=True)[0]['label']
	# Map the label to a more readable form
	emotion = emotion_mapping.get(label, 'Unknown')
	return emotion

	# Reading from the CSV, querying the API for each line, and appending the emotion
	# new_new_file_path = 'path_to_your_csv_file.csv' # Replace with your CSV file path
	with open(new_new_file_path, 'r') as file:
	reader = csv.reader(file)
	rows = list(reader)
	header = rows[0]
	header.append('Emotion')
	for row in rows[1:]:
	emotion = get_emotion_from_api(row[1])
	row.append(emotion)
	with open(new_new_file_path, 'w', newline='') as new_file:
	writer = csv.writer(new_file)
	writer.writerows(rows)

	# #Replace with Jisu's code to predict emotion
	# with open(new_new_file_path, 'r') as file:
	# reader = csv.reader(file)
	# rows = list(reader)
	# header = rows[0]
	# header.append('Emotion')
	# for row in rows[1:]:
	# emotion = random.choice(['Positive', 'Neutral', 'Negative'])
	# row.append(emotion)
	# with open(new_new_file_path, 'w', newline='') as new_file:
	# writer = csv.writer(new_file)
	# writer.writerows(rows)

	#go through each row in the csv and convert the text to mp3
	with open(new_new_file_path, 'r') as csv_file:
	reader = csv.reader(csv_file)
	rows = list(reader)
	header = rows[0]
	rows = rows[1:]
	#assign each character to a specific voice
	female_voices = ['Olivia', 'Amy', 'Danielle', 'Salli', 'Kimberly', 'Kendra', 'Joanna', 'Ruth']
	male_voices = ['Brian', 'Arthur', 'Gregory', 'Matthew', 'Joey', 'Stephen']
	character_voice_dict = {}
	for row in rows:
	character = row[0]
	if character not in character_voice_dict:
	if row[2] == 'Male':
	voice = random.choice(male_voices)
	male_voices.remove(voice)
	else:
	voice = random.choice(female_voices)
	female_voices.remove(voice)
	character_voice_dict[character] = voice
	output_folder = os.path.join(first_line, 'audio_files') # Specify the folder to save the audio files in the directory you created earlier
	os.makedirs(output_folder, exist_ok=True) # Create the output folder if it doesn't exist

	df = pd.read_csv(new_new_file_path)
	tts_client = texttospeech.TextToSpeechClient()
	used_male_voices, used_female_voices = set(), set()
	character_voices = {}
	for index, row in df.iterrows():
	character = row['Character']
	gender = row['Gender']
	emotion = row['Emotion']

	if character not in character_voices:
	if gender == 'Male':
	character_voices[character] = select_voice(gender, natural_male_voices, standard_male_voices, used_male_voices)
	else:
	character_voices[character] = select_voice(gender, natural_female_voices, standard_female_voices, used_female_voices)

	voice_name = character_voices[character]
	pitch, rate = adjust_parameters_based_on_emotion(emotion)
	text = f"<speak><prosody pitch='{pitch}%' rate='{rate}'>{row['Line']}</prosody></speak>"

	synthesis_input = texttospeech.SynthesisInput(ssml=text)
	voice_params = texttospeech.VoiceSelectionParams(language_code='en-US', name=voice_name)
	audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)

	response = tts_client.synthesize_speech(input=synthesis_input, voice=voice_params, audio_config=audio_config)
	file_name = os.path.join(output_folder, f'{index}.mp3')
	with open(file_name, 'wb') as out:
	out.write(response.audio_content)

	with open(new_new_file_path, 'r') as file:
	reader = csv.reader(file)
	groupOfThreeLines = []
	for i, row in enumerate(reader):
	if i == 0:
	continue
	groupOfThreeLines.append(row[0] + ':' + row[1] + '\n')
	if len(groupOfThreeLines) == 3:
	prompt = ''.join(groupOfThreeLines)
	groupOfThreeLines = []
	print(prompt)
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
	{"role": "user", "content": prompt}
	]
	)
	print(response.choices[0].message.content)
	responsePic = client.images.generate(
	model="dall-e-3",
	prompt=response.choices[0].message.content,
	size="1792x1024",
	quality="standard",
	n=1,
	)
	image_url = responsePic.data[0].url
	image_folder = os.path.join(first_line, 'images')
	os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
	image_path = os.path.join(image_folder, str(i) + '.png')
	urllib.request.urlretrieve(image_url, image_path)
	time.sleep(60)
	prompt = ' '.join(groupOfThreeLines)
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
	{"role": "user", "content": prompt}
	]
	)
	print(response.choices[0].message.content)
	responsePic = client.images.generate(
	model="dall-e-3",
	prompt=response.choices[0].message.content,
	size="1792x1024",
	quality="standard",
	n=1,
	)
	image_url = responsePic.data[0].url
	os.makedirs(image_folder, exist_ok=True) # Create the image folder if it doesn't exist
	image_path = os.path.join(image_folder, str(i) + '.png')
	urllib.request.urlretrieve(image_url, image_path)

	# Usage example
	txt_file_path = 'TheComedyOfErrors.txt'
	txtToMp3(txt_file_path)