deanna-emery commited on
Commit
31d4007
·
1 Parent(s): d2377c0
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -55,7 +55,7 @@ def preprocess(filename, max_frames=0, resize=(224,224)):
55
  video = np.expand_dims(video, axis=0)
56
  return video
57
 
58
- def translate(video_file, text):
59
 
60
  video = preprocess(video_file, max_frames=0, resize=(224,224))
61
 
@@ -82,7 +82,7 @@ This application surfaces a model for translation of American Sign Language (ASL
82
  which comprises of a fine-tuned MoViNets CNN model and a T5 encoder-decoder model
83
  to generate translations from the video embeddings. This model architecture achieves a BLEU score of 1.98
84
  and an average cosine similarity score of 0.21 when trained and evaluated on the YouTube-ASL dataset.
85
- More information about the models can be found in our GitHub repository <a href=https://github.com/deanna-emery/ASL-Translator>here</a>.
86
 
87
  A limitation of this architecture is the size of the MoViNets model, making it especially slow during inference on a CPU.
88
  We do not recommend uploading videos longer than 4 seconds as the video embedding generation may take some time.
@@ -108,7 +108,7 @@ article = """The captions for the example videos are as follows in order: \n
108
 
109
  # Gradio App interface
110
  gr.Interface(fn=translate,
111
- inputs=gr.Video(label='Video', show_label=True, max_length=10),
112
  outputs="text",
113
  allow_flagging="never",
114
  title=title,
 
55
  video = np.expand_dims(video, axis=0)
56
  return video
57
 
58
+ def translate(video_file, text=None):
59
 
60
  video = preprocess(video_file, max_frames=0, resize=(224,224))
61
 
 
82
  which comprises of a fine-tuned MoViNets CNN model and a T5 encoder-decoder model
83
  to generate translations from the video embeddings. This model architecture achieves a BLEU score of 1.98
84
  and an average cosine similarity score of 0.21 when trained and evaluated on the YouTube-ASL dataset.
85
+ More information about the model training and instructions to download the models can be found in our GitHub repository <a href=https://github.com/deanna-emery/ASL-Translator>here</a>.
86
 
87
  A limitation of this architecture is the size of the MoViNets model, making it especially slow during inference on a CPU.
88
  We do not recommend uploading videos longer than 4 seconds as the video embedding generation may take some time.
 
108
 
109
  # Gradio App interface
110
  gr.Interface(fn=translate,
111
+ inputs=[gr.Video(label='Video', show_label=True, max_length=10, sources='upload'), 'text'],
112
  outputs="text",
113
  allow_flagging="never",
114
  title=title,