Spaces:
PlayHT
/
Running on CPU Upgrade

File size: 5,976 Bytes
69617e8
 
1101b16
0ed5bd6
d4daed1
eeaae00
 
1101b16
69617e8
 
 
 
b326bed
 
1101b16
346474a
 
 
 
 
44b7d9c
0ed5bd6
 
 
 
0b47c5d
69617e8
 
 
 
 
44b7d9c
 
 
 
 
 
 
 
 
 
 
 
cb02384
710b0e6
cb02384
01034c6
 
44b7d9c
 
 
 
 
b7ee106
44b7d9c
 
b7ee106
69617e8
0ed5bd6
99f3aa9
44b7d9c
53296c8
44b7d9c
53296c8
44b7d9c
53296c8
 
 
710b0e6
 
44b7d9c
422b172
44b7d9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b47c5d
d4daed1
 
44b7d9c
d4daed1
 
44b7d9c
d4daed1
 
 
 
b6a6b23
 
3d2d856
d4daed1
 
 
eeaae00
 
d4daed1
 
 
eeaae00
 
 
 
 
 
 
 
d4daed1
 
44b7d9c
 
d652179
404f122
 
 
 
d4daed1
404f122
 
0ed5bd6
d652179
b7ee106
 
404f122
69617e8
44b7d9c
d92281e
404f122
 
 
 
44b7d9c
404f122
b7ee106
404f122
1101b16
1f67cec
 
 
 
df76205
 
69617e8
d652179
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import google.generativeai as genai
import gradio as gr
import requests
from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip
from PIL import Image


# Configure Google Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# Play.ht API keys
API_KEY = os.getenv('PLAY_API_KEY')
USER_ID = os.getenv('PLAY_USER_ID')

# Ensure compatibility with updated PIL library
if not hasattr(Image, 'ANTIALIAS'):  # Image.ANTIALIAS is deprecated; LANCZOS is the replacement
    Image.ANTIALIAS = Image.LANCZOS


# Theme selection
theme = gr.themes.Base(
    primary_hue="emerald",
)

# Function to upload image to Gemini and get roasted text
def upload_to_gemini(path, mime_type="image/jpeg"):
    file = genai.upload_file(path, mime_type=mime_type)
    return file

def generate_roast(image_path):
    try:
        uploaded_file = upload_to_gemini(image_path)
        generation_config = {
            "temperature": 1,
            "top_p": 0.95,
            "top_k": 40,
            "max_output_tokens": 8192,
            "response_mime_type": "text/plain",
        }
        model = genai.GenerativeModel(
            model_name="gemini-1.5-flash-002",
            generation_config=generation_config,
            system_instruction = "Generate a conversation between two women flirtatiously complimenting the uploaded image in less than 100 words. Please abide by these guidelines. \
            1. Begin conversation turns with the prefix 'Host: 1' and 'Host: 2'. For example, Host 1: Hello how are you? Host 2: I'm good and yourself? Host 3: Thanks for asking! \
            2. Use humor, irony, and sarcasm to entertain and compliment \
            3. Your output should be a well-written text suitable for reading aloud. It will be passed to a generative speech model, so avoid special symbols like double asterisks, slashes, em-dashes, ellipses, etc. Also avoid output that isn't dialogue. \
            4. Conversation turns should be short and snappy",
        )
        
        chat_session = model.start_chat(
            history=[{"role": "user", "parts": [uploaded_file]}]
        )
        response = chat_session.send_message("Rizz this image!")
        return response.text
    except Exception as e:
        return f"Error generating rizz: {e}"

# Function to convert text to speech with Play.ht
def text_to_speech(text):
    try:
        url = "https://api.play.ai/api/v1/tts/stream"
        payload = {
            "model": "PlayDialog",
            "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
            "voice2": "s3://voice-cloning-zero-shot/fdb74aec-ede9-45f8-ad87-71cb45f01816/original/manifest.json",
            "turnPrefix": "Host 1:",
            "turnPrefix2": "Host 2:",
            'prompt': None,
            'prompt2': None,
            "output_format": "mp3",
            "text": text,   
        }
        headers = {
            "content-type": "application/json",
            "Authorization": API_KEY,
            "X-User-ID": USER_ID
        }
        
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            audio_path = "output_audio.mp3"
            with open(audio_path, "wb") as audio_file:
                audio_file.write(response.content)
            return audio_path
        else:
            return f"Error generating audio: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Error generating audio: {e}"

# Function to create video from image, audio, and add logo overlay
def create_video(image, audio):
    try:
        # Load the audio file
        audio_clip = AudioFileClip(audio)
        
        # Load the main image and set its duration to match the audio
        image_clip = ImageClip(image).set_duration(audio_clip.duration)
        
        # Load the logo image, resize it, and position it in the top-right corner
        #logo = ImageClip("Logo.png").resize(height=75)  # Adjust the height as needed
        logo = ImageClip("PlayAI-Logo-RIZZ-URL.png").resize(height=75)  # Adjust the height as needed
        logo = logo.margin(bottom=10, opacity=0).set_position(("center", "bottom")).set_duration(audio_clip.duration)
        
        # Create a composite video with the main image and the logo overlay
        video_clip = CompositeVideoClip([image_clip, logo]).set_audio(audio_clip)

        
        
        # Save the video to a temporary file
        output_path = "/tmp/output_video_with_logo.mp4"
        video_clip.write_videofile(
        output_path,
        fps=30,
        codec="libx264",
        audio_codec="aac",
        preset="slow",
        ffmpeg_params=["-b:v", "2000k"]  # Adjust bitrate if needed
        )
        
        return output_path
    except Exception as e:
        return f"Error generating video: {e}"

# Function to process all steps at once
def process_roast(image_path):
    roast_text = generate_roast(image_path)
    audio_path = text_to_speech(roast_text)
    video_path = create_video(image_path, audio_path)
    return roast_text, audio_path, video_path

# Gradio Interface
with gr.Blocks(theme=theme) as demo:
    gr.Markdown("# Get Rizzed, Ready?")
    gr.Markdown("Upload an image, click 'Rizz Image', and the AI will roast it")

    with gr.Row():
        image_input = gr.Image(type="filepath", label="Upload Image")
        
        with gr.Column():
            output_text = gr.Textbox(label="Roast Text")
            audio_output = gr.Audio(label="Roast Audio")
            video_output = gr.Video(label="Roast Video")

    # Single button to handle all actions
    roast_button = gr.Button("Rizz Image")
    roast_button.click(process_roast, inputs=image_input, outputs=[output_text, audio_output, video_output])

    gr.Examples(
        examples=[["elon_musk.png"], ["jensen_huang.png"]],
        inputs=image_input
    )


# Launch the app
demo.launch(debug=True)