deanna-emery commited on
Commit
09c1a2c
·
1 Parent(s): 4b278b1
Files changed (1) hide show
  1. app.py +21 -6
app.py CHANGED
@@ -75,11 +75,19 @@ def translate(video_file):
75
  return {"translation":translation}
76
 
77
  # Gradio App config
78
- title = "ASL Translation (MoViNet + T5)"
79
- description = "Gradio demo of word-level sign language classification using I3D model pretrained on the WLASL video dataset. " \
80
- "WLASL is a large-scale dataset containing more than 2000 words in American Sign Language. " \
81
- "Examples used in the demo are videos from the the test subset. " \
82
- "Note that WLASL100 contains 100 words while WLASL2000 contains 2000."
 
 
 
 
 
 
 
 
83
 
84
 
85
  examples = [
@@ -92,9 +100,16 @@ examples = [
92
 
93
  article = "More information about the models can be found <a href=https://github.com/deanna-emery/ASL-Translator>here</a>."
94
 
 
 
 
 
 
 
 
95
  # Gradio App interface
96
  gr.Interface(fn=translate,
97
- inputs="video",
98
  outputs="text",
99
  allow_flagging="never",
100
  title=title,
 
75
  return {"translation":translation}
76
 
77
  # Gradio App config
78
+ title = "American Sign Language Translation: An Approach Combining MoViNets and T5"
79
+
80
+ description = """
81
+ This application surfaces a model for translation of American Sign Language (ASL),
82
+ which comprises of a fine-tuned MoViNets CNN model and a T5 encoder-decoder model
83
+ to generate translations from the video embeddings. This model architecture achieves a BLEU score of 1.98
84
+ and an average cosine similarity score of 0.21 when trained and evaluated on the YouTube-ASL dataset.
85
+ More information about the models can be found in our GitHub repository <a href=https://github.com/deanna-emery/ASL-Translator>here</a>.
86
+
87
+ A limitation of this architecture is the size of the MoViNets model, making it especially slow during inference on a CPU.
88
+ We do not recommend uploading videos longer than 4 seconds as the video embedding generation may take some time.
89
+ We have provided some pre-cached videos with their original captions and translations as examples.
90
+ """
91
 
92
 
93
  examples = [
 
100
 
101
  article = "More information about the models can be found <a href=https://github.com/deanna-emery/ASL-Translator>here</a>."
102
 
103
+ article = """The captions for the example videos are as follows in order: \n
104
+ 1. 'My second ASL professor's name was Will White'
105
+ 2. 'You are my sunshine'
106
+ 3. 'scrub your hands for at least 20 seconds'
107
+ 4. 'no'
108
+ """
109
+
110
  # Gradio App interface
111
  gr.Interface(fn=translate,
112
+ inputs=["video", 'text'],
113
  outputs="text",
114
  allow_flagging="never",
115
  title=title,