# Importing Necessary Packages and classes from transformers import AutoImageProcessor, AutoModelForImageClassification from IPython.display import display, Javascript from google.colab.output import eval_js from base64 import b64decode from IPython.display import Image import cv2 import openai import pandas as pd import time from transformers import BarkModel, BarkProcessor from IPython.display import Audio # Defining the camera in the system def take_photo(filename='photo.jpg', quality=0.8): js = Javascript(''' async function takePhoto(quality) { const div = document.createElement('div'); const capture = document.createElement('button'); capture.textContent = 'Capture'; div.appendChild(capture); const video = document.createElement('video'); video.style.display = 'block'; const stream = await navigator.mediaDevices.getUserMedia({video: true}); document.body.appendChild(div); div.appendChild(video); video.srcObject = stream; await video.play(); // Resize the output to fit the video element. google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true); // Wait for Capture to be clicked. await new Promise((resolve) => capture.onclick = resolve); const canvas = document.createElement('canvas'); canvas.width = video.videoWidth; canvas.height = video.videoHeight; canvas.getContext('2d').drawImage(video, 0, 0); stream.getVideoTracks()[0].stop(); div.remove(); return canvas.toDataURL('image/jpeg', quality); } ''') display(js) data = eval_js('takePhoto({})'.format(quality)) binary = b64decode(data.split(',')[1]) with open(filename, 'wb') as f: f.write(binary) return filename # Capturing snaps using given button and saving them try: filename = take_photo() print('Saved to {}'.format(filename)) # Show the image which was just taken. display(Image(filename)) except Exception as err: # Errors will be thrown if the user does not have a webcam or if they do not # grant the page permission to access it. print(str(err)) # Using the pre-trained Dog Breed Identification Model image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit") # Importing the saved image img_path='/content/n02088094_60.jpg' image=cv2.imread(img_path) # Preprocessing the captured image using pre-trained model based preprocessor inputs = image_processor(images=image, return_tensors="pt") # Predicting the output using model from huggingface outputs = dog_breed_model(**inputs) logits = outputs.logits # Finding the exact output class and corresponding label predicted_class_idx = logits.argmax(-1).item() predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx] predicted_class_actual=predicted_class_actual.split("_") str1="" for ele in predicted_class_actual: str1+=ele+" " print("Predicted class:", str1) # Specifying the OpenAI API key openai.api_key = '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' # Specifying the chatGPT engine def get_completion(prompt, model="gpt-3.5-turbo"): messages = [{"role": "user", "content": prompt}] response = openai.ChatCompletion.create( model=model, messages=messages, temperature=0, ) return response.choices[0].message["content"] # Getting simple data from ChatGPT API prompt = "chracterstics and behaviour of "+str1+" in a paragraph" response = get_completion(prompt) print(response) # Importing a English Text-To-Speech Model from huggingface tts_model = BarkModel.from_pretrained("suno/bark-small") tts_processor = BarkProcessor.from_pretrained("suno/bark-small") # Preprocessing the text data using imported preprocessor and generating output from model inputs = tts_processor(response, voice_preset="v2/en_speaker_3") speech_output = tts_model.generate(**inputs).cpu().numpy() # Output of generated speech sampling_rate = tts_model.generation_config.sample_rate Audio(speech_output[0], rate=sampling_rate)