tallwhitestck's picture
changed image input shaape
f9f6205
from cProfile import label
import cv2
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import gradio as gr
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'space', 'space', 'space']
def recog(img):
model = load_model('hack36_2.h5')
img_array = np.asarray(img)
clone = img_array.copy()
clone_resized = cv2.resize(clone, (64,64))
img_array=clone_resized/255
img_final = np.expand_dims(img_array, axis=0)
prediction = model.predict(img_final).tolist()[0]
return {labels[i]: prediction[i] for i in range(29)}
title = "ASL Fingerspelling Recognition"
desc = "<p style='text-align:center'>A Gradio demonstration for ASL fingerspelling recognition. Use your webcam to take a snapshot of your hand forming any of the ASL alphabet signs.</p>"
input = [
gr.inputs.Image(type="pil", source="webcam", label="Image")
]
output = [
gr.outputs.Label(num_top_classes=5, label="")
]
sample_letters = ['A', 'B', 'E', 'L', 'Y']
examples = [["images/{}_test.jpg".format(letter)] for letter in sample_letters]
a1="<h2>How to Use</h2><p style='text-align: justify'>For better results, please make sure your hand covers majority of the frame of the image. It is also preferred to have a neutral light background as seen in the examples. You can refer to the images <a href='https://huggingface.co/spaces/tallwhitestck/asl-fingerspelling-recognition/tree/main/images' target='_blank'>here</a> to get an idea of how each letter is signed.</p>"
a2="<h2>More information</h2><p style='text-align: justify'>This demo was based on a project implemented for a hackathon. The GitHub repository can be found on <a href='https://github.com/namanmanchanda09/American-Sign-Language-Detection-using-Computer-Vision' target='_blank'>namanmanchanda09/American-Sign-Language-Detection-using-Computer-Vision</a>. The model was trained on <a href='https://www.kaggle.com/datasets/grassknoted/asl-alphabet' target='_blank'>this ASL Alphabet dataset on Kaggle</a> containing 87,000 200x200 images. The examples used in the demo are from the test set of the aforementioned dataset.</p>"
a3="<h2>Limitations</h2><p style='text-align: justify'>The model was originally designed to process a continuous feed of images in order to spell out full English words, however, this demo implementation only processes images. Furthermore, since the model only processes images, the letters that require motion such as <b>J</b> and <b>Z</b> may be inaccurate.</p>"
iface = gr.Interface(
fn=recog,
title=title,
description=desc,
examples=examples,
inputs=input,
outputs=output,
article=a1+a2+a3
)
iface.launch()