get_rizzed

Running on CPU Upgrade

App Files Files Community

get_rizzed / app.py

legofan94

Update app.py

01034c6 verified 14 days ago

raw

history blame

5.98 kB

	import os
	import google.generativeai as genai
	import gradio as gr
	import requests
	from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip
	from PIL import Image


	# Configure Google Gemini API
	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

	# Play.ht API keys
	API_KEY = os.getenv('PLAY_API_KEY')
	USER_ID = os.getenv('PLAY_USER_ID')

	# Ensure compatibility with updated PIL library
	if not hasattr(Image, 'ANTIALIAS'): # Image.ANTIALIAS is deprecated; LANCZOS is the replacement
	Image.ANTIALIAS = Image.LANCZOS


	# Theme selection
	theme = gr.themes.Base(
	primary_hue="emerald",
	)

	# Function to upload image to Gemini and get roasted text
	def upload_to_gemini(path, mime_type="image/jpeg"):
	file = genai.upload_file(path, mime_type=mime_type)
	return file

	def generate_roast(image_path):
	try:
	uploaded_file = upload_to_gemini(image_path)
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}
	model = genai.GenerativeModel(
	model_name="gemini-1.5-flash-002",
	generation_config=generation_config,
	system_instruction = "Generate a conversation between two women flirtatiously complimenting the uploaded image in less than 100 words. Please abide by these guidelines. \
	1. Begin conversation turns with the prefix 'Host: 1' and 'Host: 2'. For example, Host 1: Hello how are you? Host 2: I'm good and yourself? Host 3: Thanks for asking! \
	2. Use humor, irony, and sarcasm to entertain and compliment \
	3. Your output should be a well-written text suitable for reading aloud. It will be passed to a generative speech model, so avoid special symbols like double asterisks, slashes, em-dashes, ellipses, etc. Also avoid output that isn't dialogue. \
	4. Conversation turns should be short and snappy",
	)

	chat_session = model.start_chat(
	history=[{"role": "user", "parts": [uploaded_file]}]
	)
	response = chat_session.send_message("Rizz this image!")
	return response.text
	except Exception as e:
	return f"Error generating rizz: {e}"

	# Function to convert text to speech with Play.ht
	def text_to_speech(text):
	try:
	url = "https://api.play.ai/api/v1/tts/stream"
	payload = {
	"model": "PlayDialog",
	"voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
	"voice2": "s3://voice-cloning-zero-shot/fdb74aec-ede9-45f8-ad87-71cb45f01816/original/manifest.json",
	"turnPrefix": "Host 1:",
	"turnPrefix2": "Host 2:",
	'prompt': None,
	'prompt2': None,
	"output_format": "mp3",
	"text": text,
	}
	headers = {
	"content-type": "application/json",
	"Authorization": API_KEY,
	"X-User-ID": USER_ID
	}

	response = requests.post(url, json=payload, headers=headers)
	if response.status_code == 200:
	audio_path = "output_audio.mp3"
	with open(audio_path, "wb") as audio_file:
	audio_file.write(response.content)
	return audio_path
	else:
	return f"Error generating audio: {response.status_code} - {response.text}"
	except Exception as e:
	return f"Error generating audio: {e}"

	# Function to create video from image, audio, and add logo overlay
	def create_video(image, audio):
	try:
	# Load the audio file
	audio_clip = AudioFileClip(audio)

	# Load the main image and set its duration to match the audio
	image_clip = ImageClip(image).set_duration(audio_clip.duration)

	# Load the logo image, resize it, and position it in the top-right corner
	#logo = ImageClip("Logo.png").resize(height=75) # Adjust the height as needed
	logo = ImageClip("PlayAI-Logo-RIZZ-URL.png").resize(height=75) # Adjust the height as needed
	logo = logo.margin(bottom=10, opacity=0).set_position(("center", "bottom")).set_duration(audio_clip.duration)

	# Create a composite video with the main image and the logo overlay
	video_clip = CompositeVideoClip([image_clip, logo]).set_audio(audio_clip)



	# Save the video to a temporary file
	output_path = "/tmp/output_video_with_logo.mp4"
	video_clip.write_videofile(
	output_path,
	fps=30,
	codec="libx264",
	audio_codec="aac",
	preset="slow",
	ffmpeg_params=["-b:v", "2000k"] # Adjust bitrate if needed
	)

	return output_path
	except Exception as e:
	return f"Error generating video: {e}"

	# Function to process all steps at once
	def process_roast(image_path):
	roast_text = generate_roast(image_path)
	audio_path = text_to_speech(roast_text)
	video_path = create_video(image_path, audio_path)
	return roast_text, audio_path, video_path

	# Gradio Interface
	with gr.Blocks(theme=theme) as demo:
	gr.Markdown("# Get Rizzed, Ready?")
	gr.Markdown("Upload an image, click 'Rizz Image', and the AI will roast it")

	with gr.Row():
	image_input = gr.Image(type="filepath", label="Upload Image")

	with gr.Column():
	output_text = gr.Textbox(label="Roast Text")
	audio_output = gr.Audio(label="Roast Audio")
	video_output = gr.Video(label="Roast Video")

	# Single button to handle all actions
	roast_button = gr.Button("Rizz Image")
	roast_button.click(process_roast, inputs=image_input, outputs=[output_text, audio_output, video_output])

	gr.Examples(
	examples=[["elon_musk.png"], ["jensen_huang.png"]],
	inputs=image_input
	)


	# Launch the app
	demo.launch(debug=True)