Spaces:

Startup-Exchange
/

tech_poet_society

Runtime error

App Files Files Community

ddiddu commited on Nov 19, 2023

Commit

2416ba0

•

1 Parent(s): 58b312e

Create tts.py

Browse files

Files changed (1) hide show

tts.py +316 -0

tts.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import os
+import csv
+import random
+import urllib.request
+from PIL import Image
+import os
+from openai import OpenAI
+import time
+import pandas as pd
+from google.cloud import texttospeech
+os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'shaikespeare_ck.json'
+client = OpenAI(api_key='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')
+#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
+def adjust_parameters_based_on_emotion(emotion):
+    if emotion == 'Positive':
+        return 20, 1.2  # Higher pitch and faster rate for positive emotion
+    elif emotion == 'Negative':
+        return -20, 0.8  # Lower pitch and slower rate for negative emotion
+    else:
+        return 0, 1.0  # Neutral pitch and rate for neutral or unspecified emotion
+def select_voice(gender, natural_voices, standard_voices, used_voices):
+    # Prioritize natural voices first, then standard voices
+    voice_pool = natural_voices if any(voice not in used_voices for voice in natural_voices) else standard_voices
+    voice = random.choice(voice_pool)
+    while voice in used_voices:
+        voice = random.choice(voice_pool)
+    used_voices.add(voice)
+    return voice
+natural_male_voices = [
+    'en-US-Neural2-A', 'en-US-Neural2-D', 'en-US-Neural2-I', 'en-US-Neural2-J',
+    'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-D', 'en-US-Wavenet-I', 'en-US-Wavenet-J'
+]
+standard_male_voices = [
+    'en-US-News-M','en-US-News-N''en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-D', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-M', 'en-US-Studio-Q'
+]
+natural_female_voices = [
+    'en-US-Neural2-C', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H',
+    'en-US-Wavenet-C', 'en-US-Wavenet-E', 'en-US-Wavenet-F',  'en-US-Wavenet-H','en-US-Wavenet-G'
+]
+standard_female_voices = ['en-US-News-K', 'en-US-News-L',
+    'en-US-Standard-C', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Studio-O'
+]
+def txtToMp3(txt_file_path):
+    with open(txt_file_path, 'r') as file:
+        contents = file.readlines()
+    first_line = contents[0].strip()
+    os.makedirs(first_line)
+    title_file_path = os.path.join(first_line, 'title.txt')
+    with open(title_file_path, 'w') as title_file:
+        title_file.write(first_line)
+    title_file_path = os.path.join(first_line, 'author.txt')
+    second_line = contents[1].strip()
+    with open(title_file_path, 'w') as title_file:
+        title_file.write(second_line)
+    new_file_path = os.path.join(first_line, txt_file_path)
+    with open(txt_file_path, 'r') as file:
+        lines = file.readlines()
+    with open(new_file_path, 'w') as new_file:
+        for line in lines:
+            line = line.strip()
+            if line.startswith('(') and line.endswith(')'):
+                line = line[1:-1]
+                new_file.write("NARRATOR\n")
+            new_file.write(line + '\n')
+    with open(new_file_path, 'r') as file:
+        lines = file.readlines()
+    with open(new_file_path, 'w') as new_file:
+        for line in lines:
+            if line != '\n':
+                new_file.write(line)
+    with open(new_file_path, 'r') as file:
+        lines = file.readlines()
+    with open(new_file_path, 'w') as new_file:
+        for line in lines[2:]:
+            new_file.write(line)
+    new_new_file_path = new_file_path.replace('.txt', '.csv')
+    with open(new_file_path, 'r') as file:
+        lines = file.readlines()
+    odd_lines = lines[::2]
+    even_lines = lines[1::2]
+    with open(new_new_file_path, 'w', newline='') as csv_file:
+        writer = csv.writer(csv_file)
+        writer.writerow(['Character', 'Line'])
+        for odd, even in zip(odd_lines, even_lines):
+            writer.writerow([odd.strip(), even.strip()])
+    os.remove(new_file_path)
+    import requests
+    # API configuration
+    API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_gender_prediction"
+    headers = {"Authorization": "Bearer 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"}
+    def query(payload):
+        response = requests.post(API_URL, headers=headers, json=payload)
+        return response.json()
+    def predict_gender_aggregated(character, lines):
+        character_gender_mapping = {
+            "NARRATOR": "Neutral",
+            "EGEON": "Male",
+            "DUKE": "Male",
+            "JAILER": "Male"
+        }
+        # Check if the character is in the mapping
+        if character.upper() in character_gender_mapping:
+            return character_gender_mapping[character.upper()]
+        # For other characters, use the inference API
+        aggregated_text = " ".join(lines)
+        input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"
+        # Query the API
+        api_response = query({"inputs": input_text})
+        gender_prediction = api_response.get('gender', 'unknown')
+        return gender_prediction
+    # Read CSV data into a DataFrame
+    # new_new_file_path = 'path_to_your_csv_file.csv'  # Replace with your CSV file path
+    data = pd.read_csv(new_new_file_path)
+    # Aggregate lines for each character
+    character_lines = data.groupby('Character')['Line'].apply(list)
+    # Create a Series for character genders with the correct character names
+    character_genders = character_lines.index.to_series().apply(lambda character: predict_gender_aggregated(character, character_lines[character]))
+    # Map the predicted gender back to the original DataFrame
+    data['Gender'] = data['Character'].map(character_genders)
+    # Write the modified DataFrame back to a CSV file
+    data.to_csv(new_new_file_path, index=False)
+    print(new_new_file_path)
+    # #Replace with Jisu's code to predict gender
+    # with open(new_new_file_path, 'r') as file:
+    #     reader = csv.reader(file)
+    #     rows = list(reader)
+    # header = rows[0]
+    # header.append('Gender')
+    # for row in rows[1:]:
+    #     character = row[0]
+    #     if character == 'NARRATOR' or character == 'FATHER':
+    #         row.append('Male')
+    #     else:
+    #         row.append('Female')
+    # with open(new_new_file_path, 'w', newline='') as new_file:
+    #     writer = csv.writer(new_file)
+    #     writer.writerows(rows)
+    # import requests
+    # API configuration
+    API_URL = "https://api-inference.huggingface.co/models/Startup-Exchange/tps_sentimental_analysis"
+    headers = {"Authorization": "Bearer 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"}
+    # def query(payload):
+    #     response = requests.post(API_URL, headers=headers, json=payload)
+    #     return response.json()
+    # Mapping for converting labels to more readable forms
+    emotion_mapping = {'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive'}
+    def get_emotion_from_api(line):
+        api_response = query({"inputs": line})
+        # Extracting the label with the highest score
+        label = sorted(api_response[0], key=lambda x: x['score'], reverse=True)[0]['label']
+        # Map the label to a more readable form
+        emotion = emotion_mapping.get(label, 'Unknown')
+        return emotion
+    # Reading from the CSV, querying the API for each line, and appending the emotion
+    # new_new_file_path = 'path_to_your_csv_file.csv'  # Replace with your CSV file path
+    with open(new_new_file_path, 'r') as file:
+        reader = csv.reader(file)
+        rows = list(reader)
+    header = rows[0]
+    header.append('Emotion')
+    for row in rows[1:]:
+        emotion = get_emotion_from_api(row[1])
+        row.append(emotion)
+    with open(new_new_file_path, 'w', newline='') as new_file:
+        writer = csv.writer(new_file)
+        writer.writerows(rows)
+    # #Replace with Jisu's code to predict emotion
+    # with open(new_new_file_path, 'r') as file:
+    #     reader = csv.reader(file)
+    #     rows = list(reader)
+    # header = rows[0]
+    # header.append('Emotion')
+    # for row in rows[1:]:
+    #     emotion = random.choice(['Positive', 'Neutral', 'Negative'])
+    #     row.append(emotion)
+    # with open(new_new_file_path, 'w', newline='') as new_file:
+    #     writer = csv.writer(new_file)
+    #     writer.writerows(rows)
+    #go through each row in the csv and convert the text to mp3
+    with open(new_new_file_path, 'r') as csv_file:
+        reader = csv.reader(csv_file)
+        rows = list(reader)
+        header = rows[0]
+        rows = rows[1:]
+    #assign each character to a specific voice
+    female_voices = ['Olivia', 'Amy', 'Danielle', 'Salli', 'Kimberly', 'Kendra', 'Joanna', 'Ruth']
+    male_voices = ['Brian', 'Arthur', 'Gregory', 'Matthew', 'Joey', 'Stephen']
+    character_voice_dict = {}
+    for row in rows:
+        character = row[0]
+        if character not in character_voice_dict:
+            if row[2] == 'Male':
+                voice = random.choice(male_voices)
+                male_voices.remove(voice)
+            else:
+                voice = random.choice(female_voices)
+                female_voices.remove(voice)
+            character_voice_dict[character] = voice
+    output_folder = os.path.join(first_line, 'audio_files')  # Specify the folder to save the audio files in the directory you created earlier
+    os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist
+    df = pd.read_csv(new_new_file_path)
+    tts_client = texttospeech.TextToSpeechClient()
+    used_male_voices, used_female_voices = set(), set()
+    character_voices = {}
+    for index, row in df.iterrows():
+        character = row['Character']
+        gender = row['Gender']
+        emotion = row['Emotion']
+        if character not in character_voices:
+            if gender == 'Male':
+                character_voices[character] = select_voice(gender, natural_male_voices, standard_male_voices, used_male_voices)
+            else:
+                character_voices[character] = select_voice(gender, natural_female_voices, standard_female_voices, used_female_voices)
+        voice_name = character_voices[character]
+        pitch, rate = adjust_parameters_based_on_emotion(emotion)
+        text = f"<speak><prosody pitch='{pitch}%' rate='{rate}'>{row['Line']}</prosody></speak>"
+        synthesis_input = texttospeech.SynthesisInput(ssml=text)
+        voice_params = texttospeech.VoiceSelectionParams(language_code='en-US', name=voice_name)
+        audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
+        response = tts_client.synthesize_speech(input=synthesis_input, voice=voice_params, audio_config=audio_config)
+        file_name = os.path.join(output_folder, f'{index}.mp3')
+        with open(file_name, 'wb') as out:
+            out.write(response.audio_content)
+    with open(new_new_file_path, 'r') as file:
+        reader = csv.reader(file)
+        groupOfThreeLines = []
+        for i, row in enumerate(reader):
+            if i == 0:
+                continue
+            groupOfThreeLines.append(row[0] + ':' + row[1] + '\n')
+            if len(groupOfThreeLines) == 3:
+                prompt = ''.join(groupOfThreeLines)
+                groupOfThreeLines = []
+                print(prompt)
+                response = client.chat.completions.create(
+                    model="gpt-3.5-turbo",
+                    messages=[
+                        {"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
+                        {"role": "user", "content": prompt}
+                    ]
+                )
+                print(response.choices[0].message.content)
+                responsePic = client.images.generate(
+                    model="dall-e-3",
+                    prompt=response.choices[0].message.content,
+                    size="1792x1024",
+                    quality="standard",
+                    n=1,
+                )
+                image_url = responsePic.data[0].url
+                image_folder = os.path.join(first_line, 'images')
+                os.makedirs(image_folder, exist_ok=True)  # Create the image folder if it doesn't exist
+                image_path = os.path.join(image_folder, str(i) + '.png')
+                urllib.request.urlretrieve(image_url, image_path)
+                time.sleep(60)
+        prompt = ' '.join(groupOfThreeLines)
+        response = client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {"role": "system", "content": "Generate a description of what the scene looks like. It should be in quotes and it should be around 7 words."},
+                    {"role": "user", "content": prompt}
+                ]
+                )
+        print(response.choices[0].message.content)
+        responsePic = client.images.generate(
+                    model="dall-e-3",
+                    prompt=response.choices[0].message.content,
+                    size="1792x1024",
+                    quality="standard",
+                    n=1,
+                )
+        image_url = responsePic.data[0].url
+        os.makedirs(image_folder, exist_ok=True)  # Create the image folder if it doesn't exist
+        image_path = os.path.join(image_folder, str(i) + '.png')
+        urllib.request.urlretrieve(image_url, image_path)
+# Usage example
+txt_file_path = 'TheComedyOfErrors.txt'
+txtToMp3(txt_file_path)