File size: 4,747 Bytes
a8467fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
import os
import json
import uuid
from pprint import pprint
import dotenv
dotenv.load_dotenv()

from PIL import Image

from llm import answer
from tts import get_audio
from speechtovid import get_video
from gender import get_gender

SECRET = os.getenv("SECRET_WORD")

supported_languages = ["English", "Chinese", "Spanish", "Hindi", "Portuguese", "French", "German", "Japanese", "Arabic", "Korean", "Indonesian", "Italian", "Dutch", "Turkish", "Polish", "Swedish", "Filipino", "Malay", "Russian", "Romanian", "Ukrainian", "Greek", "Czech", "Danish", "Finnish", "Bulgarian", "Croatian", "Slovak", "Tamil"]

# create dirs for images, audio and video files if they don't exist
if not os.path.exists('./files/audio'):
    os.makedirs('./files/audio')
if not os.path.exists('./files/images'):
    os.makedirs('./files/images')
if not os.path.exists('./files/video'):
    os.makedirs('./files/video')

# image resizer
def resize_image(input_image_path, file_name):
    with Image.open(input_image_path) as im:
        original_width, original_height = im.size
        desired_width = 300
        aspect_ratio = original_width / original_height
        new_width = desired_width
        new_height = int(desired_width / aspect_ratio)
        resized_image = im.resize((new_width, new_height))
        resized_image.save("./files/images/"+file_name+".png")
    return "./files/images/"+file_name+".png"

# main func
def holiday_card(secret, brief, lang, photo):

    if not secret or secret.strip().lower() != SECRET.strip().lower():
        raise gr.Error("Please use the correct secret word!")

    if not brief:
        raise gr.Error("Please enter the kind of greeting you want to create!")

    if not photo:
        raise gr.Error("Please upload a photo!")


    # generate a unique id for this greeting
    uid = str(uuid.uuid4())

    # resize the image, otherwise it will be too big
    resized_photo = resize_image(photo, uid)

    # get the gender of the person in the photo - so that we can choose the voice
    gender = get_gender(resized_photo)
    if gender == 'female':
        voice = 'Bella'
    else:
        voice = 'Antoni'

    # generate the greeting
    system_prompt = f'''
You are a native {lang} copywriter with an excellent sense of humour. You help people write text for their holiday voice messages that they will send to their friends and colleagues. You take the user brief and then write a short, joyful, funny and beautiful short speech in {lang} that people say when wishing their colleagues and friends a happy 2024. It shouldn't be more than 2-3 sentences long. Please respond with valid JSON.

If the client brief was good - please return valid JSON like this:
{{
    "status": "OK",
    "text": "your greeting text here"
}}

If the client brief is inappropriate - please return valid JSON like this:
{{
    "status": "ERROR",
    "reason": "your reason for what is wrong with the brief"
}}

Please alsways return valid JSON and nothing else
'''

    # get the answer from the model
    answer_text = answer(
        system_message=system_prompt,
        user_message=brief
    )

    pprint(answer_text)

    try:
        answer_data = json.loads(answer_text)

    except Exception as e:
        pprint(e)
        raise gr.Error(f"Sorry, something went wrong with the AI. Please try again later")

    if answer_data.get('status') == 'ERROR':
        raise gr.Error(answer_data.get('reason'))

    text = answer_data.get('text')

    # now get audio
    try:
        audio_file = get_audio(uid, text, voice)
        pprint(audio_file)

    except Exception as e:
        pprint(e)
        raise gr.Error(f"Sorry, something went wrong with the audio generation AI. Please try again later")

    # now get video
    try:
        video_url = get_video(uid, resized_photo, audio_file)
    except Exception as e:
        pprint(e)
        raise gr.Error(f"Sorry, something went wrong with the video generation AI. Please try again later")

    return text, audio_file, video_url


# set up and launch gradio interface

inputs=[
    gr.Textbox(lines=1, label="What is the secret word?"),
    gr.Textbox(lines=5, label="What do you want your holiday greeting to be all about?"),
    gr.Dropdown(supported_languages, label="What language would you like your holiday greeting to be in?", value="French"),
    gr.Image(type="filepath", label="Upload Image")
]

outputs=[
    gr.Textbox(lines=5, label="Your Holiday Greeting Text"),
    gr.Audio(type="filepath", label="Your Holiday Greeting Audio"),
    gr.Video(label="Your Holiday Greeting Video")
]

demo = gr.Interface(
    holiday_card,
    inputs,
    outputs,
    allow_flagging="never"
)

demo.queue()

if __name__ == "__main__":
    demo.launch()