Spaces:
Sleeping
Sleeping
File size: 4,221 Bytes
1b9b794 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# Importing Necessary Packages and classes
from transformers import AutoImageProcessor, AutoModelForImageClassification
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from IPython.display import Image
import cv2
import openai
import pandas as pd
import time
from transformers import BarkModel, BarkProcessor
from IPython.display import Audio
# Defining the camera in the system
def take_photo(filename='photo.jpg', quality=0.8):
js = Javascript('''
async function takePhoto(quality) {
const div = document.createElement('div');
const capture = document.createElement('button');
capture.textContent = 'Capture';
div.appendChild(capture);
const video = document.createElement('video');
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia({video: true});
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
// Wait for Capture to be clicked.
await new Promise((resolve) => capture.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
stream.getVideoTracks()[0].stop();
div.remove();
return canvas.toDataURL('image/jpeg', quality);
}
''')
display(js)
data = eval_js('takePhoto({})'.format(quality))
binary = b64decode(data.split(',')[1])
with open(filename, 'wb') as f:
f.write(binary)
return filename
# Capturing snaps using given button and saving them
try:
filename = take_photo()
print('Saved to {}'.format(filename))
# Show the image which was just taken.
display(Image(filename))
except Exception as err:
# Errors will be thrown if the user does not have a webcam or if they do not
# grant the page permission to access it.
print(str(err))
# Using the pre-trained Dog Breed Identification Model
image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
# Importing the saved image
img_path='/content/n02088094_60.jpg'
image=cv2.imread(img_path)
# Preprocessing the captured image using pre-trained model based preprocessor
inputs = image_processor(images=image, return_tensors="pt")
# Predicting the output using model from huggingface
outputs = dog_breed_model(**inputs)
logits = outputs.logits
# Finding the exact output class and corresponding label
predicted_class_idx = logits.argmax(-1).item()
predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx]
predicted_class_actual=predicted_class_actual.split("_")
str1=""
for ele in predicted_class_actual:
str1+=ele+" "
print("Predicted class:", str1)
# Specifying the OpenAI API key
openai.api_key = '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'
# Specifying the chatGPT engine
def get_completion(prompt, model="gpt-3.5-turbo"):
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0,
)
return response.choices[0].message["content"]
# Getting simple data from ChatGPT API
prompt = "chracterstics and behaviour of "+str1+" in a paragraph"
response = get_completion(prompt)
print(response)
# Importing a English Text-To-Speech Model from huggingface
tts_model = BarkModel.from_pretrained("suno/bark-small")
tts_processor = BarkProcessor.from_pretrained("suno/bark-small")
# Preprocessing the text data using imported preprocessor and generating output from model
inputs = tts_processor(response, voice_preset="v2/en_speaker_3")
speech_output = tts_model.generate(**inputs).cpu().numpy()
# Output of generated speech
sampling_rate = tts_model.generation_config.sample_rate
Audio(speech_output[0], rate=sampling_rate) |