Spaces:
Sleeping
Sleeping
# Importing Necessary Packages and classes | |
from transformers import AutoImageProcessor, AutoModelForImageClassification | |
from IPython.display import display, Javascript | |
from google.colab.output import eval_js | |
from base64 import b64decode | |
from IPython.display import Image | |
import cv2 | |
import openai | |
import pandas as pd | |
import time | |
from transformers import BarkModel, BarkProcessor | |
from IPython.display import Audio | |
# Defining the camera in the system | |
def take_photo(filename='photo.jpg', quality=0.8): | |
js = Javascript(''' | |
async function takePhoto(quality) { | |
const div = document.createElement('div'); | |
const capture = document.createElement('button'); | |
capture.textContent = 'Capture'; | |
div.appendChild(capture); | |
const video = document.createElement('video'); | |
video.style.display = 'block'; | |
const stream = await navigator.mediaDevices.getUserMedia({video: true}); | |
document.body.appendChild(div); | |
div.appendChild(video); | |
video.srcObject = stream; | |
await video.play(); | |
// Resize the output to fit the video element. | |
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true); | |
// Wait for Capture to be clicked. | |
await new Promise((resolve) => capture.onclick = resolve); | |
const canvas = document.createElement('canvas'); | |
canvas.width = video.videoWidth; | |
canvas.height = video.videoHeight; | |
canvas.getContext('2d').drawImage(video, 0, 0); | |
stream.getVideoTracks()[0].stop(); | |
div.remove(); | |
return canvas.toDataURL('image/jpeg', quality); | |
} | |
''') | |
display(js) | |
data = eval_js('takePhoto({})'.format(quality)) | |
binary = b64decode(data.split(',')[1]) | |
with open(filename, 'wb') as f: | |
f.write(binary) | |
return filename | |
# Capturing snaps using given button and saving them | |
try: | |
filename = take_photo() | |
print('Saved to {}'.format(filename)) | |
# Show the image which was just taken. | |
display(Image(filename)) | |
except Exception as err: | |
# Errors will be thrown if the user does not have a webcam or if they do not | |
# grant the page permission to access it. | |
print(str(err)) | |
# Using the pre-trained Dog Breed Identification Model | |
image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") | |
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") | |
# Importing the saved image | |
img_path='/content/n02088094_60.jpg' | |
image=cv2.imread(img_path) | |
# Preprocessing the captured image using pre-trained model based preprocessor | |
inputs = image_processor(images=image, return_tensors="pt") | |
# Predicting the output using model from huggingface | |
outputs = dog_breed_model(**inputs) | |
logits = outputs.logits | |
# Finding the exact output class and corresponding label | |
predicted_class_idx = logits.argmax(-1).item() | |
predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx] | |
predicted_class_actual=predicted_class_actual.split("_") | |
str1="" | |
for ele in predicted_class_actual: | |
str1+=ele+" " | |
print("Predicted class:", str1) | |
# Specifying the OpenAI API key | |
openai.api_key = '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' | |
# Specifying the chatGPT engine | |
def get_completion(prompt, model="gpt-3.5-turbo"): | |
messages = [{"role": "user", "content": prompt}] | |
response = openai.ChatCompletion.create( | |
model=model, | |
messages=messages, | |
temperature=0, | |
) | |
return response.choices[0].message["content"] | |
# Getting simple data from ChatGPT API | |
prompt = "chracterstics and behaviour of "+str1+" in a paragraph" | |
response = get_completion(prompt) | |
print(response) | |
# Importing a English Text-To-Speech Model from huggingface | |
tts_model = BarkModel.from_pretrained("suno/bark-small") | |
tts_processor = BarkProcessor.from_pretrained("suno/bark-small") | |
# Preprocessing the text data using imported preprocessor and generating output from model | |
inputs = tts_processor(response, voice_preset="v2/en_speaker_3") | |
speech_output = tts_model.generate(**inputs).cpu().numpy() | |
# Output of generated speech | |
sampling_rate = tts_model.generation_config.sample_rate | |
Audio(speech_output[0], rate=sampling_rate) |