Spaces:
Sleeping
Sleeping
# Importing Necessary Packages and classes | |
from transformers import AutoImageProcessor, AutoModelForImageClassification | |
from IPython.display import display, Javascript | |
from base64 import b64decode | |
from IPython.display import Image | |
import cv2 | |
import openai | |
import pandas as pd | |
import time | |
from transformers import BarkModel, BarkProcessor | |
from IPython.display import Audio | |
import playsound | |
''' | |
# Using captured images | |
import cv2 | |
# Open a connection to the webcam (0 is usually the default webcam) | |
cap = cv2.VideoCapture(0) | |
# Check if the webcam is opened successfully | |
if not cap.isOpened(): | |
print("Error: Could not open the webcam.") | |
exit() | |
while True: | |
# Read a frame from the webcam | |
ret, frame = cap.read() | |
# Display the captured frame | |
cv2.imshow('Webcam', frame) | |
break | |
# Release the webcam and close the OpenCV windows | |
cap.release() | |
cv2.destroyAllWindows() | |
image=frame | |
''' | |
image = cv2.imread('n02106662_320.jpg') | |
# Using the pre-trained Dog Breed Identification Model | |
image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") | |
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") | |
# Importing the saved image | |
#img_path='/content/n02088094_60.jpg' | |
#image=cv2.imread(img_path) | |
# Preprocessing the captured image using pre-trained model based preprocessor | |
inputs = image_processor(images=image, return_tensors="pt") | |
# Predicting the output using model from huggingface | |
outputs = dog_breed_model(**inputs) | |
logits = outputs.logits | |
# Finding the exact output class and corresponding label | |
predicted_class_idx = logits.argmax(-1).item() | |
predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx] | |
predicted_class_actual=predicted_class_actual.split("_") | |
str1="" | |
for ele in predicted_class_actual: | |
str1+=ele+" " | |
print("Predicted class:", str1) | |
# Specifying the OpenAI API key | |
openai.api_key = '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' | |
# Specifying the chatGPT engine | |
def get_completion(prompt, model="gpt-3.5-turbo"): | |
messages = [{"role": "user", "content": prompt}] | |
response = openai.ChatCompletion.create( | |
model=model, | |
messages=messages, | |
temperature=0, | |
) | |
return response.choices[0].message["content"] | |
# Getting simple data from ChatGPT API | |
prompt = "chracterstics and behaviour of "+str1+" in a paragraph" | |
response = get_completion(prompt) | |
print(response) | |
# Import the Gtts module for text | |
# to speech conversion | |
from gtts import gTTS | |
# import Os module to start the audio file | |
import os | |
# Language we want to use | |
language = 'en' | |
output = gTTS(text=response, lang=language, slow=False) | |
output.save("output.mp3") | |
Audio("output.mp3",rate=24000) | |