Spaces:

PlayHT
/

roast_your_pic

Running on CPU Upgrade

App Files Files Community

legofan94 commited on 16 days ago

Commit

abd10df

•

1 Parent(s): 3d2d856

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -41,7 +41,11 @@ def generate_roast(image_path):
         model = genai.GenerativeModel(
             model_name="gemini-1.5-flash-002",
             generation_config=generation_config,
-            system_instruction="You are a professional satirist and fashion expert. Roast the provided profile picture in less than 50 words.",
         )
         chat_session = model.start_chat(
@@ -55,14 +59,19 @@ def generate_roast(image_path):
 # Function to convert text to speech with Play.ht
 def text_to_speech(text):
     try:
-        url = "https://api.play.ht/api/v2/tts/stream"
         payload = {
-            "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
             "output_format": "mp3",
-            "text": text,
         }
         headers = {
-            "accept": "audio/mpeg",
             "content-type": "application/json",
             "Authorization": API_KEY,
             "X-User-ID": USER_ID

         model = genai.GenerativeModel(
             model_name="gemini-1.5-flash-002",
             generation_config=generation_config,
+            system_instruction="Generate a conversation between two women gently roasting the uploaded image in less than 100 words. Please abide by these guidelines. \
+            1. Begin conversation turns with the prefix 'Host: 1' and 'Host: 2'. For example, Host 1: Hello how are you? Host 2: I'm good and yourself? Host 3: Thanks for asking! \
+            2. Use humor, irony, and sarcasm to gently roast the picture \
+            3. Your output should be a well-written text suitable for reading aloud. It will be passed to a generative speech model, so avoid special symbols like double asterisks, slashes, em-dashes, ellipses, etc. Also avoid output that isn't dialogue. \
+            4. Conversation turns should be short and snappy",
         )
         chat_session = model.start_chat(
 # Function to convert text to speech with Play.ht
 def text_to_speech(text):
     try:
+        url = "https://api.play.ai/api/v1/tts/stream"
         payload = {
+            "model": "PlayDialog",
+            "voice": "s3://voice-cloning-zero-shot/adb83b67-8d75-48ff-ad4d-a0840d231ef1/original/manifest.json",
+            "voice2": "s3://voice-cloning-zero-shot/50381567-ff7b-46d2-bfdc-a9584a85e08d/original/manifest.json",
+            "turnPrefix": "Host 1:",
+            "turnPrefix2": "Host 2:",
+            'prompt': None,
+            'prompt2': None,
             "output_format": "mp3",
+            "text": text,
         }
         headers = {
             "content-type": "application/json",
             "Authorization": API_KEY,
             "X-User-ID": USER_ID