Spaces:

SuganthKrishna2003
/

new_space_1

Sleeping

App Files Files Community

new_space_1 / app.py

SuganthKrishna2003

update app.py

5444843 about 1 year ago

raw

history blame

4.22 kB

	# Importing Necessary Packages and classes

	from transformers import AutoImageProcessor, AutoModelForImageClassification
	from IPython.display import display, Javascript
	from google.colab.output import eval_js
	from base64 import b64decode
	from IPython.display import Image
	import cv2
	import openai
	import pandas as pd
	import time
	from transformers import BarkModel, BarkProcessor
	from IPython.display import Audio

	# Defining the camera in the system

	def take_photo(filename='photo.jpg', quality=0.8):
	js = Javascript('''
	async function takePhoto(quality) {
	const div = document.createElement('div');
	const capture = document.createElement('button');
	capture.textContent = 'Capture';
	div.appendChild(capture);

	const video = document.createElement('video');
	video.style.display = 'block';
	const stream = await navigator.mediaDevices.getUserMedia({video: true});

	document.body.appendChild(div);
	div.appendChild(video);
	video.srcObject = stream;
	await video.play();

	// Resize the output to fit the video element.
	google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

	// Wait for Capture to be clicked.
	await new Promise((resolve) => capture.onclick = resolve);

	const canvas = document.createElement('canvas');
	canvas.width = video.videoWidth;
	canvas.height = video.videoHeight;
	canvas.getContext('2d').drawImage(video, 0, 0);
	stream.getVideoTracks()[0].stop();
	div.remove();
	return canvas.toDataURL('image/jpeg', quality);
	}
	''')
	display(js)
	data = eval_js('takePhoto({})'.format(quality))
	binary = b64decode(data.split(',')[1])
	with open(filename, 'wb') as f:
	f.write(binary)
	return filename

	# Capturing snaps using given button and saving them

	try:
	filename = take_photo()
	print('Saved to {}'.format(filename))

	# Show the image which was just taken.
	display(Image(filename))
	except Exception as err:
	# Errors will be thrown if the user does not have a webcam or if they do not
	# grant the page permission to access it.
	print(str(err))

	# Using the pre-trained Dog Breed Identification Model

	image_processor = AutoImageProcessor.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")
	dog_breed_model = AutoModelForImageClassification.from_pretrained("wesleyacheng/dog-breeds-multiclass-image-classification-with-vit")

	# Importing the saved image

	img_path='/content/n02088094_60.jpg'

	image=cv2.imread(img_path)

	# Preprocessing the captured image using pre-trained model based preprocessor

	inputs = image_processor(images=image, return_tensors="pt")

	# Predicting the output using model from huggingface

	outputs = dog_breed_model(**inputs)
	logits = outputs.logits

	# Finding the exact output class and corresponding label

	predicted_class_idx = logits.argmax(-1).item()

	predicted_class_actual=dog_breed_model.config.id2label[predicted_class_idx]
	predicted_class_actual=predicted_class_actual.split("_")

	str1=""

	for ele in predicted_class_actual:
	str1+=ele+" "

	print("Predicted class:", str1)

	# Specifying the OpenAI API key

	openai.api_key = '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'

	# Specifying the chatGPT engine

	def get_completion(prompt, model="gpt-3.5-turbo"):

	messages = [{"role": "user", "content": prompt}]
	response = openai.ChatCompletion.create(
	model=model,
	messages=messages,
	temperature=0,
	)
	return response.choices[0].message["content"]

	# Getting simple data from ChatGPT API

	prompt = "chracterstics and behaviour of "+str1+" in a paragraph"

	response = get_completion(prompt)

	print(response)

	# Importing a English Text-To-Speech Model from huggingface

	tts_model = BarkModel.from_pretrained("suno/bark-small")
	tts_processor = BarkProcessor.from_pretrained("suno/bark-small")

	# Preprocessing the text data using imported preprocessor and generating output from model

	inputs = tts_processor(response, voice_preset="v2/en_speaker_3")

	speech_output = tts_model.generate(**inputs).cpu().numpy()

	# Output of generated speech

	sampling_rate = tts_model.generation_config.sample_rate
	Audio(speech_output[0], rate=sampling_rate)