Spaces:

SuganthKrishna2003
/

new_space_1

Sleeping

File size: 4,221 Bytes

1b9b794

# Importing Necessary Packages and classes

from transformers import AutoImageProcessor, AutoModelForImageClassification
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from IPython.display import Image
import cv2
import openai
import pandas as pd
import time
from transformers import BarkModel, BarkProcessor
from IPython.display import Audio

# Defining the camera in the system

def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  binary = b64decode(data.split(',')[1])
  with open(filename, 'wb') as f:
    f.write(binary)
  return filename

# Capturing snaps using given button and saving them

try:
  filename = take_photo()
  print('Saved to {}'.format(filename))

  # Show the image which was just taken.
  display(Image(filename))
except Exception as err:
  # Errors will be thrown if the user does not have a webcam or if they do not
  # grant the page permission to access it.
  print(str(err))

# Using the pre-trained Dog Breed Identification Model

image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")

# Importing the saved image

img_path='/content/n02088094_60.jpg'

image=cv2.imread(img_path)

# Preprocessing the captured image using pre-trained model based preprocessor

inputs = image_processor(images=image, return_tensors="pt")

# Predicting the output using model from huggingface

outputs = dog_breed_model(**inputs)
logits = outputs.logits

# Finding the exact output class and corresponding label

predicted_class_idx = logits.argmax(-1).item()

predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx]
predicted_class_actual=predicted_class_actual.split("_")

str1=""

for ele in predicted_class_actual:
  str1+=ele+" "

print("Predicted class:", str1)

# Specifying the OpenAI API key

openai.api_key = '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'

# Specifying the chatGPT engine

def get_completion(prompt, model="gpt-3.5-turbo"):

  messages = [{"role": "user", "content": prompt}]
  response = openai.ChatCompletion.create(
  model=model,
  messages=messages,
  temperature=0,
  )
  return response.choices[0].message["content"]

# Getting simple data from ChatGPT API

prompt = "chracterstics and behaviour of "+str1+" in a paragraph"

response = get_completion(prompt)

print(response)

# Importing a English Text-To-Speech Model from huggingface

tts_model = BarkModel.from_pretrained("suno/bark-small")
tts_processor = BarkProcessor.from_pretrained("suno/bark-small")

# Preprocessing the text data using imported preprocessor and generating output from model

inputs = tts_processor(response, voice_preset="v2/en_speaker_3")

speech_output = tts_model.generate(**inputs).cpu().numpy()

# Output of generated speech

sampling_rate = tts_model.generation_config.sample_rate
Audio(speech_output[0], rate=sampling_rate)