Kaushik066 commited on
Commit
c95030a
·
1 Parent(s): a4998f1

adding ISL gesture reference tab

Browse files
Files changed (1) hide show
  1. app.py +37 -8
app.py CHANGED
@@ -45,6 +45,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
  MODEL_TRANSFORMER = 'google/vivit-b-16x2'
46
  # Set Paths
47
  model_path = 'vivit_pytorch_loss051.pt'
 
48
 
49
  # Create Mediapipe Objects
50
  mp_drawing = mp.solutions.drawing_utils
@@ -86,6 +87,7 @@ holistic = mp_holistic.Holistic(
86
  min_tracking_confidence=0.5
87
  )
88
 
 
89
  class CreateDatasetProd():
90
  def __init__(self
91
  , clip_len
@@ -232,19 +234,35 @@ def prod_function(model_pretrained, prod_ds):
232
  prod_logits = outputs.squeeze(1)
233
  prod_pred = prod_logits.argmax(-1)
234
  return prod_pred
235
-
 
236
  def tensor_to_video(video_tensor, fps=30):
237
- video_tensor = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
238
  # Normalize values to [0, 255] if necessary
239
- if video_tensor.max() <= 1.0:
240
- video_tensor = (video_tensor * 255).astype(np.uint8)
241
- # Define video writer
242
- return video_tensor
243
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  def translate_sign_language(gesture):
245
  # Create Dataset
246
  prod_ds = dataset_prod_obj.create_dataset(gesture)
247
- prod_video = tensor_to_video(prod_ds)
 
248
 
249
  # Run ML Model
250
  predicted_prod_label = prod_function(model_pretrained, prod_ds)
@@ -259,6 +277,8 @@ def translate_sign_language(gesture):
259
 
260
  with gr.Blocks() as demo:
261
  gr.Markdown("# Indian Sign Language Translation App")
 
 
262
  with gr.Tab("Gesture recognition"):
263
  with gr.Row():
264
  with gr.Column(scale=2, min_width=300):
@@ -273,6 +293,15 @@ with gr.Blocks() as demo:
273
  test_output = gr.Textbox(label="Translation in English")
274
  # Set up the interface
275
  video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
 
 
 
 
 
 
 
 
 
276
 
277
  if __name__ == "__main__":
278
  demo.launch()
 
45
  MODEL_TRANSFORMER = 'google/vivit-b-16x2'
46
  # Set Paths
47
  model_path = 'vivit_pytorch_loss051.pt'
48
+ data_path = 'signs'
49
 
50
  # Create Mediapipe Objects
51
  mp_drawing = mp.solutions.drawing_utils
 
87
  min_tracking_confidence=0.5
88
  )
89
 
90
+ # Creating Dataset
91
  class CreateDatasetProd():
92
  def __init__(self
93
  , clip_len
 
234
  prod_logits = outputs.squeeze(1)
235
  prod_pred = prod_logits.argmax(-1)
236
  return prod_pred
237
+
238
+ # Function to get landmarked video
239
  def tensor_to_video(video_tensor, fps=30):
240
+ video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
241
  # Normalize values to [0, 255] if necessary
242
+ if video_numpy.max() <= 1.0:
243
+ video_numpy = (video_numpy * 255).astype(np.uint8)
244
+ return video_numpy
245
+
246
+ # Function to list available videos dynamically
247
+ def list_videos():
248
+ if os.path.exists(data_path):
249
+ video_lst = [f for f in os.listdir(data_path) if f.endswith((".mp4", ".mov", ".MOV", ".webm", ".avi"))]
250
+ return video_lst
251
+
252
+ # Function to return the selected video path
253
+ def play_video(selected_video):
254
+ return os.path.join(data_path, selected_video) if selected_video else None
255
+
256
+ ## Function to refresh dropdown options
257
+ #def refresh_dropdown():
258
+ # return gr.Dropdown.update(choices=list_videos())
259
+
260
+ # Main Function for tab - Gesture recognition
261
  def translate_sign_language(gesture):
262
  # Create Dataset
263
  prod_ds = dataset_prod_obj.create_dataset(gesture)
264
+ #prod_video = tensor_to_video(prod_ds)
265
+ prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
266
 
267
  # Run ML Model
268
  predicted_prod_label = prod_function(model_pretrained, prod_ds)
 
277
 
278
  with gr.Blocks() as demo:
279
  gr.Markdown("# Indian Sign Language Translation App")
280
+
281
+ # Gesture recognition Tab
282
  with gr.Tab("Gesture recognition"):
283
  with gr.Row():
284
  with gr.Column(scale=2, min_width=300):
 
293
  test_output = gr.Textbox(label="Translation in English")
294
  # Set up the interface
295
  video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
296
+
297
+ # Indian Sign Language gesture reference tab
298
+ with gr.Tab("Indian Sign Language gesture reference"):
299
+ with gr.Row():
300
+ video_dropdown = gr.Dropdown(choices=list_videos(), label="ISL gestures", info="More gestures comming soon!")
301
+ search_button = gr.Button("Search Gesture")
302
+ search_output = gr.Video(streaming=True, label="ISL gestures Video")
303
+ # Set up the interface
304
+ search_button.click(play_video, inputs=video_dropdown, outputs=search_output)
305
 
306
  if __name__ == "__main__":
307
  demo.launch()