kivilaid commited on
Commit
a5f1a61
1 Parent(s): 912e7a2

Upload 2 files

Browse files
Files changed (2) hide show
  1. gpt4v.py +47 -0
  2. gpttts.py +47 -0
gpt4v.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import OpenAI
3
+
4
+
5
+ class GPT4Vision:
6
+ def __init__(self):
7
+ self.client = OpenAI()
8
+
9
+ def encode_image(self, image_path):
10
+ """
11
+ Encode the image to base64 format.
12
+
13
+ :param image_path: Path to the image file.
14
+ :return: Base64 encoded string of the image.
15
+ """
16
+ with open(image_path, "rb") as image_file:
17
+ return base64.b64encode(image_file.read()).decode('utf-8')
18
+
19
+ def describe(self, image_path, user_message):
20
+ """
21
+ Get a description of the image using OpenAI's GPT-4 Vision API.
22
+
23
+ :param image_path: Path to the image file.
24
+ :param user_message: Custom text message to send as user input.
25
+ :return: The API response.
26
+ """
27
+ base64_image = self.encode_image(image_path)
28
+
29
+ response = self.client.chat.completions.create(
30
+ model="gpt-4-vision-preview",
31
+ messages=[
32
+ {
33
+ "role": "user",
34
+ "content": [
35
+ {"type": "text", "text": user_message},
36
+ {
37
+ "type": "image_url",
38
+ "image_url": {
39
+ "url": f"data:image/png;base64,{base64_image}"
40
+ },
41
+ },
42
+ ],
43
+ }
44
+ ],
45
+ max_tokens=1000,
46
+ )
47
+ return response.choices[0]
gpttts.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from openai import OpenAI
3
+
4
+ class GPTTTS:
5
+ def __init__(self, client):
6
+ self.client = client
7
+
8
+ def generate_speech(self, text="No text", model="tts-1-hd", voice="alloy"):
9
+ """
10
+ Generate speech from text using OpenAI's text-to-speech API.
11
+
12
+ :param text: The text to convert to speech. Defaults to "No text".
13
+ :param model: The TTS model to use. Defaults to "tts-1-hd".
14
+ :param voice: The voice to use. Defaults to "alloy".
15
+ :return: Path to the generated speech file.
16
+ """
17
+ # Set default values if parameters are empty
18
+ model = model if model else "tts-1-hd"
19
+ voice = voice if voice else "alloy"
20
+
21
+ # Create the speech file path
22
+ speech_file_path = Path(__file__).parent / "speech.mp3"
23
+
24
+ # Generate the speech
25
+ response = self.client.audio.speech.create(
26
+ model=model,
27
+ voice=voice,
28
+ input=text
29
+ )
30
+
31
+ # Save the speech to a file
32
+ response.stream_to_file(speech_file_path)
33
+
34
+ return speech_file_path
35
+
36
+ # Example usage
37
+ if __name__ == "__main__":
38
+ client = OpenAI() # Initialize the OpenAI client
39
+ tts = GPTTTS(client)
40
+
41
+ # Generate speech
42
+ file_path = tts.generate_speech(
43
+ text="Today is a wonderful day to build something people love!",
44
+ model="tts-1-hd",
45
+ voice="alloy"
46
+ )
47
+ print(f"Speech generated at: {file_path}")