Spaces:
Runtime error
Runtime error
BilalSardar
commited on
Commit
•
56c7749
1
Parent(s):
bfac2f7
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
+
from tensorflow.keras.models import Sequential
|
4 |
+
from tensorflow.keras.optimizers import legacy
|
5 |
+
from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
|
6 |
+
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
|
7 |
+
|
8 |
+
def load_alignments(path:str) -> List[str]:
|
9 |
+
with open(path, 'r') as f:
|
10 |
+
lines = f.readlines()
|
11 |
+
tokens = []
|
12 |
+
for line in lines:
|
13 |
+
line = line.split()
|
14 |
+
if line[2] != 'sil':
|
15 |
+
tokens = [*tokens,' ',line[2]]
|
16 |
+
return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
|
17 |
+
def load_data(path: str):
|
18 |
+
path = bytes.decode(path.numpy())
|
19 |
+
#file_name = path.split('/')[-1].split('.')[0]
|
20 |
+
# File name splitting for windows
|
21 |
+
file_name = path.split('\\')[-1].split('.')[0]
|
22 |
+
video_path = os.path.join('data','s1',f'{file_name}.mpg')
|
23 |
+
alignment_path = os.path.join('data','alignments','s1',f'{file_name}.align')
|
24 |
+
frames = load_video(video_path)
|
25 |
+
alignments = load_alignments(alignment_path)
|
26 |
+
|
27 |
+
return frames, alignments
|
28 |
+
|
29 |
+
|
30 |
+
vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
|
31 |
+
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
|
32 |
+
num_to_char = tf.keras.layers.StringLookup(
|
33 |
+
vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
|
34 |
+
)
|
35 |
+
|
36 |
+
model = Sequential()
|
37 |
+
model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
|
38 |
+
model.add(Activation('relu'))
|
39 |
+
model.add(MaxPool3D((1,2,2)))
|
40 |
+
|
41 |
+
model.add(Conv3D(256, 3, padding='same'))
|
42 |
+
model.add(Activation('relu'))
|
43 |
+
model.add(MaxPool3D((1,2,2)))
|
44 |
+
|
45 |
+
model.add(Conv3D(75, 3, padding='same'))
|
46 |
+
model.add(Activation('relu'))
|
47 |
+
model.add(MaxPool3D((1,2,2)))
|
48 |
+
|
49 |
+
model.add(TimeDistributed(Flatten()))
|
50 |
+
|
51 |
+
model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
|
52 |
+
model.add(Dropout(.5))
|
53 |
+
|
54 |
+
model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
|
55 |
+
model.add(Dropout(.5))
|
56 |
+
|
57 |
+
model.add(Dense(char_to_num.vocabulary_size()+1, kernel_initializer='he_normal', activation='softmax'))
|
58 |
+
model.summary()
|
59 |
+
|
60 |
+
optimizer = legacy.Adam(learning_rate=0.001) # Replace legacy.Adam with the appropriate legacy optimizer you used during training
|
61 |
+
|
62 |
+
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
|
63 |
+
|
64 |
+
#Loading weights
|
65 |
+
model.load_weights('/content/models/checkpoint')
|
66 |
+
|
67 |
+
def Predict(Video):
|
68 |
+
sample = load_data(tf.convert_to_tensor(Video))
|
69 |
+
yhat = model.predict(tf.expand_dims(sample[0], axis=0))
|
70 |
+
decoded = tf.keras.backend.ctc_decode(yhat, input_length=[75], greedy=True)[0][0].numpy()
|
71 |
+
result=[tf.strings.reduce_join([num_to_char(word) for word in sentence]) for sentence in decoded]
|
72 |
+
return result[0].numpy().decode('utf-8')
|
73 |
+
|
74 |
+
|
75 |
+
interface = gr.Interface(fn=Predict,
|
76 |
+
inputs="video",
|
77 |
+
outputs=[gr.inputs.Textbox(label='Generated Output')],
|
78 |
+
title='Video Lip Reading')
|
79 |
+
|
80 |
+
|
81 |
+
interface.launch(debug=True)
|