File size: 4,221 Bytes
1b9b794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Importing Necessary Packages and classes

from transformers import AutoImageProcessor, AutoModelForImageClassification
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from IPython.display import Image
import cv2
import openai
import pandas as pd
import time
from transformers import BarkModel, BarkProcessor
from IPython.display import Audio

# Defining the camera in the system

def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  binary = b64decode(data.split(',')[1])
  with open(filename, 'wb') as f:
    f.write(binary)
  return filename

# Capturing snaps using given button and saving them

try:
  filename = take_photo()
  print('Saved to {}'.format(filename))

  # Show the image which was just taken.
  display(Image(filename))
except Exception as err:
  # Errors will be thrown if the user does not have a webcam or if they do not
  # grant the page permission to access it.
  print(str(err))

# Using the pre-trained Dog Breed Identification Model

image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")

# Importing the saved image

img_path='/content/n02088094_60.jpg'

image=cv2.imread(img_path)

# Preprocessing the captured image using pre-trained model based preprocessor

inputs = image_processor(images=image, return_tensors="pt")

# Predicting the output using model from huggingface

outputs = dog_breed_model(**inputs)
logits = outputs.logits

# Finding the exact output class and corresponding label

predicted_class_idx = logits.argmax(-1).item()

predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx]
predicted_class_actual=predicted_class_actual.split("_")

str1=""

for ele in predicted_class_actual:
  str1+=ele+" "

print("Predicted class:", str1)

# Specifying the OpenAI API key

openai.api_key = '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'

# Specifying the chatGPT engine

def get_completion(prompt, model="gpt-3.5-turbo"):

  messages = [{"role": "user", "content": prompt}]
  response = openai.ChatCompletion.create(
  model=model,
  messages=messages,
  temperature=0,
  )
  return response.choices[0].message["content"]

# Getting simple data from ChatGPT API

prompt = "chracterstics and behaviour of "+str1+" in a paragraph"

response = get_completion(prompt)

print(response)

# Importing a English Text-To-Speech Model from huggingface

tts_model = BarkModel.from_pretrained("suno/bark-small")
tts_processor = BarkProcessor.from_pretrained("suno/bark-small")

# Preprocessing the text data using imported preprocessor and generating output from model

inputs = tts_processor(response, voice_preset="v2/en_speaker_3")

speech_output = tts_model.generate(**inputs).cpu().numpy()

# Output of generated speech

sampling_rate = tts_model.generation_config.sample_rate
Audio(speech_output[0], rate=sampling_rate)