new_space_1 / app.py
SuganthKrishna2003's picture
update app.py
5444843
raw
history blame
4.22 kB
# Importing Necessary Packages and classes
from transformers import AutoImageProcessor, AutoModelForImageClassification
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from IPython.display import Image
import cv2
import openai
import pandas as pd
import time
from transformers import BarkModel, BarkProcessor
from IPython.display import Audio
# Defining the camera in the system
def take_photo(filename='photo.jpg', quality=0.8):
js = Javascript('''
async function takePhoto(quality) {
const div = document.createElement('div');
const capture = document.createElement('button');
capture.textContent = 'Capture';
div.appendChild(capture);
const video = document.createElement('video');
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia({video: true});
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
// Wait for Capture to be clicked.
await new Promise((resolve) => capture.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
stream.getVideoTracks()[0].stop();
div.remove();
return canvas.toDataURL('image/jpeg', quality);
}
''')
display(js)
data = eval_js('takePhoto({})'.format(quality))
binary = b64decode(data.split(',')[1])
with open(filename, 'wb') as f:
f.write(binary)
return filename
# Capturing snaps using given button and saving them
try:
filename = take_photo()
print('Saved to {}'.format(filename))
# Show the image which was just taken.
display(Image(filename))
except Exception as err:
# Errors will be thrown if the user does not have a webcam or if they do not
# grant the page permission to access it.
print(str(err))
# Using the pre-trained Dog Breed Identification Model
image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
# Importing the saved image
img_path='/content/n02088094_60.jpg'
image=cv2.imread(img_path)
# Preprocessing the captured image using pre-trained model based preprocessor
inputs = image_processor(images=image, return_tensors="pt")
# Predicting the output using model from huggingface
outputs = dog_breed_model(**inputs)
logits = outputs.logits
# Finding the exact output class and corresponding label
predicted_class_idx = logits.argmax(-1).item()
predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx]
predicted_class_actual=predicted_class_actual.split("_")
str1=""
for ele in predicted_class_actual:
str1+=ele+" "
print("Predicted class:", str1)
# Specifying the OpenAI API key
openai.api_key = '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'
# Specifying the chatGPT engine
def get_completion(prompt, model="gpt-3.5-turbo"):
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0,
)
return response.choices[0].message["content"]
# Getting simple data from ChatGPT API
prompt = "chracterstics and behaviour of "+str1+" in a paragraph"
response = get_completion(prompt)
print(response)
# Importing a English Text-To-Speech Model from huggingface
tts_model = BarkModel.from_pretrained("suno/bark-small")
tts_processor = BarkProcessor.from_pretrained("suno/bark-small")
# Preprocessing the text data using imported preprocessor and generating output from model
inputs = tts_processor(response, voice_preset="v2/en_speaker_3")
speech_output = tts_model.generate(**inputs).cpu().numpy()
# Output of generated speech
sampling_rate = tts_model.generation_config.sample_rate
Audio(speech_output[0], rate=sampling_rate)