|
from cProfile import label |
|
import cv2 |
|
from tensorflow.keras.models import load_model |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from PIL import Image |
|
import gradio as gr |
|
|
|
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'space', 'space', 'space'] |
|
|
|
def recog(img): |
|
model = load_model('hack36_2.h5') |
|
|
|
img_array = np.asarray(img) |
|
clone = img_array.copy() |
|
clone_resized = cv2.resize(clone, (64,64)) |
|
img_array=clone_resized/255 |
|
img_final = np.expand_dims(img_array, axis=0) |
|
prediction = model.predict(img_final).tolist()[0] |
|
|
|
return {labels[i]: prediction[i] for i in range(29)} |
|
|
|
|
|
title = "ASL Fingerspelling Recognition" |
|
desc = "<p style='text-align:center'>A Gradio demonstration for ASL fingerspelling recognition. Use your webcam to take a snapshot of your hand forming any of the ASL alphabet signs.</p>" |
|
input = [ |
|
gr.inputs.Image(type="pil", source="webcam", label="Image") |
|
] |
|
|
|
output = [ |
|
gr.outputs.Label(num_top_classes=5, label="") |
|
] |
|
|
|
sample_letters = ['A', 'B', 'E', 'L', 'Y'] |
|
examples = [["images/{}_test.jpg".format(letter)] for letter in sample_letters] |
|
|
|
a1="<h2>How to Use</h2><p style='text-align: justify'>For better results, please make sure your hand covers majority of the frame of the image. It is also preferred to have a neutral light background as seen in the examples. You can refer to the images <a href='https://huggingface.co/spaces/tallwhitestck/asl-fingerspelling-recognition/tree/main/images' target='_blank'>here</a> to get an idea of how each letter is signed.</p>" |
|
a2="<h2>More information</h2><p style='text-align: justify'>This demo was based on a project implemented for a hackathon. The GitHub repository can be found on <a href='https://github.com/namanmanchanda09/American-Sign-Language-Detection-using-Computer-Vision' target='_blank'>namanmanchanda09/American-Sign-Language-Detection-using-Computer-Vision</a>. The model was trained on <a href='https://www.kaggle.com/datasets/grassknoted/asl-alphabet' target='_blank'>this ASL Alphabet dataset on Kaggle</a> containing 87,000 200x200 images. The examples used in the demo are from the test set of the aforementioned dataset.</p>" |
|
a3="<h2>Limitations</h2><p style='text-align: justify'>The model was originally designed to process a continuous feed of images in order to spell out full English words, however, this demo implementation only processes images. Furthermore, since the model only processes images, the letters that require motion such as <b>J</b> and <b>Z</b> may be inaccurate.</p>" |
|
|
|
iface = gr.Interface( |
|
fn=recog, |
|
title=title, |
|
description=desc, |
|
examples=examples, |
|
inputs=input, |
|
outputs=output, |
|
article=a1+a2+a3 |
|
) |
|
|
|
iface.launch() |