Spaces:

Kaushik066
/

indian_sign_language_translation

Running

App Files Files Community

Kaushik066 commited on Feb 13

Commit

bf8bd09

1 Parent(s): a859a6a

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -23

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ import pandas as pd
 import numpy as np
 from torch.nn import Linear, Softmax
 import gradio as gr
 # Mediapipe Library
 import mediapipe as mp
 from mediapipe.tasks import python
@@ -85,20 +86,6 @@ holistic = mp_holistic.Holistic(
     min_tracking_confidence=0.5
     )
-## Creating Dataloader
-#class CustomDatasetProd(Dataset):
-#    def __init__(self, pixel_values):
-#        self.pixel_values = pixel_values.to('cpu')
-#
-#    def __len__(self):
-#        return len(self.pixel_values)
-#
-#    def __getitem__(self, idx):
-#        item = {
-#            'pixel_values': self.pixel_values[idx]
-#        }
-#        return item
 class CreateDatasetProd():
     def __init__(self
                 , clip_len
@@ -245,11 +232,19 @@ def prod_function(model_pretrained, prod_ds):
     prod_logits = outputs.squeeze(1)
     prod_pred = prod_logits.argmax(-1)
     return prod_pred
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
-    #prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
     # Run ML Model
     predicted_prod_label = prod_function(model_pretrained, prod_ds)
@@ -258,13 +253,9 @@ def translate_sign_language(gesture):
     predicted_prod_label = predicted_prod_label.squeeze(0)
     idx_to_label = model_pretrained.config.id2label
-    #gesture_translation = predicted_prod_label.cpu().numpy()
     gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
-    #for val in np.array(predicted_prod_label.cpu().numpy()):
-    #  gesture_translation = idx_to_label[val]
-    return gesture_translation #, prod_ds
 with gr.Blocks() as demo:
     gr.Markdown("# Indian Sign Language Translation App")
@@ -276,12 +267,12 @@ with gr.Blocks() as demo:
                 # Submit the Video
                 video_button = gr.Button("Submit")
             # Display the landmarked video
-            #video_output = gr.Video(streaming=True, label="Landmarked Gesture")
         with gr.Row():
             # Add a button or functionality to process the video
             test_output = gr.Textbox(label="Translation in English")
     # Set up the interface
-    video_button.click(translate_sign_language, inputs=video_input, outputs=test_output)
 if __name__ == "__main__":
     demo.launch()

 import numpy as np
 from torch.nn import Linear, Softmax
 import gradio as gr
+import cv2
 # Mediapipe Library
 import mediapipe as mp
 from mediapipe.tasks import python
     min_tracking_confidence=0.5
     )
 class CreateDatasetProd():
     def __init__(self
                 , clip_len
     prod_logits = outputs.squeeze(1)
     prod_pred = prod_logits.argmax(-1)
     return prod_pred
+def tensor_to_video(video_tensor, fps=30):
+    video_tensor = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
+    # Normalize values to [0, 255] if necessary
+    if video_tensor.max() <= 1.0:
+        video_tensor = (video_tensor * 255).astype(np.uint8)
+    # Define video writer
+    return video_tensor
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
+    prod_video = tensor_to_video(prod_ds)
     # Run ML Model
     predicted_prod_label = prod_function(model_pretrained, prod_ds)
     predicted_prod_label = predicted_prod_label.squeeze(0)
     idx_to_label = model_pretrained.config.id2label
     gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
+    return gesture_translation , prod_video
 with gr.Blocks() as demo:
     gr.Markdown("# Indian Sign Language Translation App")
                 # Submit the Video
                 video_button = gr.Button("Submit")
             # Display the landmarked video
+            video_output = gr.Video(streaming=True, label="Landmarked Gesture")
         with gr.Row():
             # Add a button or functionality to process the video
             test_output = gr.Textbox(label="Translation in English")
     # Set up the interface
+    video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
 if __name__ == "__main__":
     demo.launch()