deanna-emery commited on
Commit
79a2238
1 Parent(s): 4470668

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.DS_Store
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import gradio as gr
4
+
5
+ import os
6
+ os.chdir('models')
7
+
8
+ import tensorflow as tf, tf_keras
9
+ import tensorflow_hub as hub
10
+ from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
11
+
12
+ from official.projects.movinet.modeling import movinet
13
+ from official.projects.movinet.modeling import movinet_model_a2_modified as movinet_model_modified
14
+
15
+
16
+ movinet_path = 'movinet_checkpoints_a2_epoch9'
17
+ movinet_model = tf_keras.models.load_model(movinet_path)
18
+ movinet_model.trainable = False
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained("t5-base")
21
+ t5_model = TFAutoModelForSeq2SeqLM.from_pretrained("deanna-emery/t5_word_epoch12_1203")
22
+ t5_model.trainable = False
23
+
24
+ def crop_center_square(frame):
25
+ y, x = frame.shape[0:2]
26
+ if x > y:
27
+ start_x = (x-y)/2
28
+ end_x = start_x + y
29
+ start_x = int(start_x)
30
+ end_x = int(end_x)
31
+ return frame[:, int(start_x):int(end_x)]
32
+ else:
33
+ return frame
34
+
35
+
36
+ def preprocess(filename, max_frames=0, resize=(224,224)):
37
+ video_capture = cv2.VideoCapture(filename)
38
+ frames = []
39
+ try:
40
+ while video_capture.isOpened():
41
+ ret, frame = video_capture.read()
42
+ if not ret:
43
+ break
44
+ frame = crop_center_square(frame)
45
+ frame = cv2.resize(frame, resize)
46
+ frame = frame[:, :, [2, 1, 0]]
47
+ frames.append(frame)
48
+
49
+ if len(frames) == max_frames:
50
+ break
51
+ finally:
52
+ video_capture.release()
53
+
54
+ video = np.array(frames) / 255.0
55
+ video = np.expand_dims(video, axis=0)
56
+ return video
57
+
58
+ def translate(video_file):
59
+
60
+ video = preprocess(video_file, max_frames=0, resize=(224,224))
61
+
62
+ embeddings = movinet_model(video)['vid_embedding']
63
+ tokens = t5_model.generate(inputs_embeds = embeddings,
64
+ max_new_tokens=128,
65
+ temperature=0.1,
66
+ no_repeat_ngram_size=2,
67
+ do_sample=True,
68
+ top_k=80,
69
+ top_p=0.90,
70
+ )
71
+
72
+ translation = tokenizer.batch_decode(tokens, skip_special_tokens=True)
73
+
74
+ # Return dict {label:pred}
75
+ return {"translation":translation}
76
+
77
+ # Gradio App config
78
+ title = "ASL Translation (MoViNet + T5)"
79
+ examples = [
80
+ ['videos/no.mp4'],
81
+ ['videos/all.mp4'],
82
+ ['videos/before.mp4'],
83
+ ['videos/blue.mp4'],
84
+ ['videos/white.mp4'],
85
+ ['videos/accident2.mp4']
86
+ ]
87
+
88
+ # Gradio App interface
89
+ gr.Interface( fn=translate,
90
+ inputs=[gr.inputs.Video(label="Video (*.mp4)")],
91
+ outputs=[gr.outputs.Label(label='Translation')],
92
+ allow_flagging="never",
93
+ title=title,
94
+ examples=examples).launch()
models ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 882b879da2dfd2dc75795feb681575ed4320cf33
videos/videos_accident2.mp4 ADDED
Binary file (42.9 kB). View file
 
videos/videos_all.mp4 ADDED
Binary file (91 kB). View file
 
videos/videos_before.mp4 ADDED
Binary file (25.4 kB). View file
 
videos/videos_blue.mp4 ADDED
Binary file (718 kB). View file
 
videos/videos_no.mp4 ADDED
Binary file (235 kB). View file
 
videos/videos_white.mp4 ADDED
Binary file (302 kB). View file