Spaces:

Kaushik066
/

indian_sign_language_translation

Running

App Files Files Community

Kaushik066 commited on Feb 13

Commit

c95030a

1 Parent(s): a4998f1

adding ISL gesture reference tab

Browse files

Files changed (1) hide show

app.py +37 -8

app.py CHANGED Viewed

@@ -45,6 +45,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 MODEL_TRANSFORMER = 'google/vivit-b-16x2'
 # Set Paths
 model_path = 'vivit_pytorch_loss051.pt'
 # Create Mediapipe Objects
 mp_drawing = mp.solutions.drawing_utils
@@ -86,6 +87,7 @@ holistic = mp_holistic.Holistic(
     min_tracking_confidence=0.5
     )
 class CreateDatasetProd():
     def __init__(self
                 , clip_len
@@ -232,19 +234,35 @@ def prod_function(model_pretrained, prod_ds):
     prod_logits = outputs.squeeze(1)
     prod_pred = prod_logits.argmax(-1)
     return prod_pred
 def tensor_to_video(video_tensor, fps=30):
-    video_tensor = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
     # Normalize values to [0, 255] if necessary
-    if video_tensor.max() <= 1.0:
-        video_tensor = (video_tensor * 255).astype(np.uint8)
-    # Define video writer
-    return video_tensor
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
-    prod_video = tensor_to_video(prod_ds)
     # Run ML Model
     predicted_prod_label = prod_function(model_pretrained, prod_ds)
@@ -259,6 +277,8 @@ def translate_sign_language(gesture):
 with gr.Blocks() as demo:
     gr.Markdown("# Indian Sign Language Translation App")
     with gr.Tab("Gesture recognition"):
         with gr.Row():
             with gr.Column(scale=2, min_width=300):
@@ -273,6 +293,15 @@ with gr.Blocks() as demo:
             test_output = gr.Textbox(label="Translation in English")
     # Set up the interface
     video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
 if __name__ == "__main__":
     demo.launch()

 MODEL_TRANSFORMER = 'google/vivit-b-16x2'
 # Set Paths
 model_path = 'vivit_pytorch_loss051.pt'
+data_path = 'signs'
 # Create Mediapipe Objects
 mp_drawing = mp.solutions.drawing_utils
     min_tracking_confidence=0.5
     )
+# Creating Dataset
 class CreateDatasetProd():
     def __init__(self
                 , clip_len
     prod_logits = outputs.squeeze(1)
     prod_pred = prod_logits.argmax(-1)
     return prod_pred
+# Function to get landmarked video
 def tensor_to_video(video_tensor, fps=30):
+    video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
     # Normalize values to [0, 255] if necessary
+    if video_numpy.max() <= 1.0:
+        video_numpy = (video_numpy * 255).astype(np.uint8)
+    return video_numpy
+# Function to list available videos dynamically
+def list_videos():
+    if os.path.exists(data_path):
+        video_lst = [f for f in os.listdir(data_path) if f.endswith((".mp4", ".mov", ".MOV", ".webm", ".avi"))]
+    return video_lst
+# Function to return the selected video path
+def play_video(selected_video):
+    return os.path.join(data_path, selected_video) if selected_video else None
+## Function to refresh dropdown options
+#def refresh_dropdown():
+#    return gr.Dropdown.update(choices=list_videos())
+# Main Function for tab - Gesture recognition
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
+    #prod_video = tensor_to_video(prod_ds)
+    prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
     # Run ML Model
     predicted_prod_label = prod_function(model_pretrained, prod_ds)
 with gr.Blocks() as demo:
     gr.Markdown("# Indian Sign Language Translation App")
+    # Gesture recognition Tab
     with gr.Tab("Gesture recognition"):
         with gr.Row():
             with gr.Column(scale=2, min_width=300):
             test_output = gr.Textbox(label="Translation in English")
     # Set up the interface
     video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
+    # Indian Sign Language gesture reference tab
+    with gr.Tab("Indian Sign Language gesture reference"):
+        with gr.Row():
+            video_dropdown = gr.Dropdown(choices=list_videos(), label="ISL gestures", info="More gestures comming soon!")
+            search_button = gr.Button("Search Gesture")
+            search_output = gr.Video(streaming=True, label="ISL gestures Video")
+    # Set up the interface
+    search_button.click(play_video, inputs=video_dropdown, outputs=search_output)
 if __name__ == "__main__":
     demo.launch()