Kaushik066 commited on
Commit
bf8bd09
·
1 Parent(s): a859a6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -23
app.py CHANGED
@@ -24,6 +24,7 @@ import pandas as pd
24
  import numpy as np
25
  from torch.nn import Linear, Softmax
26
  import gradio as gr
 
27
  # Mediapipe Library
28
  import mediapipe as mp
29
  from mediapipe.tasks import python
@@ -85,20 +86,6 @@ holistic = mp_holistic.Holistic(
85
  min_tracking_confidence=0.5
86
  )
87
 
88
- ## Creating Dataloader
89
- #class CustomDatasetProd(Dataset):
90
- # def __init__(self, pixel_values):
91
- # self.pixel_values = pixel_values.to('cpu')
92
- #
93
- # def __len__(self):
94
- # return len(self.pixel_values)
95
- #
96
- # def __getitem__(self, idx):
97
- # item = {
98
- # 'pixel_values': self.pixel_values[idx]
99
- # }
100
- # return item
101
-
102
  class CreateDatasetProd():
103
  def __init__(self
104
  , clip_len
@@ -245,11 +232,19 @@ def prod_function(model_pretrained, prod_ds):
245
  prod_logits = outputs.squeeze(1)
246
  prod_pred = prod_logits.argmax(-1)
247
  return prod_pred
248
-
 
 
 
 
 
 
 
 
249
  def translate_sign_language(gesture):
250
  # Create Dataset
251
  prod_ds = dataset_prod_obj.create_dataset(gesture)
252
- #prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
253
 
254
  # Run ML Model
255
  predicted_prod_label = prod_function(model_pretrained, prod_ds)
@@ -258,13 +253,9 @@ def translate_sign_language(gesture):
258
  predicted_prod_label = predicted_prod_label.squeeze(0)
259
 
260
  idx_to_label = model_pretrained.config.id2label
261
- #gesture_translation = predicted_prod_label.cpu().numpy()
262
  gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
263
 
264
- #for val in np.array(predicted_prod_label.cpu().numpy()):
265
- # gesture_translation = idx_to_label[val]
266
-
267
- return gesture_translation #, prod_ds
268
 
269
  with gr.Blocks() as demo:
270
  gr.Markdown("# Indian Sign Language Translation App")
@@ -276,12 +267,12 @@ with gr.Blocks() as demo:
276
  # Submit the Video
277
  video_button = gr.Button("Submit")
278
  # Display the landmarked video
279
- #video_output = gr.Video(streaming=True, label="Landmarked Gesture")
280
  with gr.Row():
281
  # Add a button or functionality to process the video
282
  test_output = gr.Textbox(label="Translation in English")
283
  # Set up the interface
284
- video_button.click(translate_sign_language, inputs=video_input, outputs=test_output)
285
 
286
  if __name__ == "__main__":
287
  demo.launch()
 
24
  import numpy as np
25
  from torch.nn import Linear, Softmax
26
  import gradio as gr
27
+ import cv2
28
  # Mediapipe Library
29
  import mediapipe as mp
30
  from mediapipe.tasks import python
 
86
  min_tracking_confidence=0.5
87
  )
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  class CreateDatasetProd():
90
  def __init__(self
91
  , clip_len
 
232
  prod_logits = outputs.squeeze(1)
233
  prod_pred = prod_logits.argmax(-1)
234
  return prod_pred
235
+
236
+ def tensor_to_video(video_tensor, fps=30):
237
+ video_tensor = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
238
+ # Normalize values to [0, 255] if necessary
239
+ if video_tensor.max() <= 1.0:
240
+ video_tensor = (video_tensor * 255).astype(np.uint8)
241
+ # Define video writer
242
+ return video_tensor
243
+
244
  def translate_sign_language(gesture):
245
  # Create Dataset
246
  prod_ds = dataset_prod_obj.create_dataset(gesture)
247
+ prod_video = tensor_to_video(prod_ds)
248
 
249
  # Run ML Model
250
  predicted_prod_label = prod_function(model_pretrained, prod_ds)
 
253
  predicted_prod_label = predicted_prod_label.squeeze(0)
254
 
255
  idx_to_label = model_pretrained.config.id2label
 
256
  gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
257
 
258
+ return gesture_translation , prod_video
 
 
 
259
 
260
  with gr.Blocks() as demo:
261
  gr.Markdown("# Indian Sign Language Translation App")
 
267
  # Submit the Video
268
  video_button = gr.Button("Submit")
269
  # Display the landmarked video
270
+ video_output = gr.Video(streaming=True, label="Landmarked Gesture")
271
  with gr.Row():
272
  # Add a button or functionality to process the video
273
  test_output = gr.Textbox(label="Translation in English")
274
  # Set up the interface
275
+ video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
276
 
277
  if __name__ == "__main__":
278
  demo.launch()