Commit
·
bf8bd09
1
Parent(s):
a859a6a
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ import pandas as pd
|
|
24 |
import numpy as np
|
25 |
from torch.nn import Linear, Softmax
|
26 |
import gradio as gr
|
|
|
27 |
# Mediapipe Library
|
28 |
import mediapipe as mp
|
29 |
from mediapipe.tasks import python
|
@@ -85,20 +86,6 @@ holistic = mp_holistic.Holistic(
|
|
85 |
min_tracking_confidence=0.5
|
86 |
)
|
87 |
|
88 |
-
## Creating Dataloader
|
89 |
-
#class CustomDatasetProd(Dataset):
|
90 |
-
# def __init__(self, pixel_values):
|
91 |
-
# self.pixel_values = pixel_values.to('cpu')
|
92 |
-
#
|
93 |
-
# def __len__(self):
|
94 |
-
# return len(self.pixel_values)
|
95 |
-
#
|
96 |
-
# def __getitem__(self, idx):
|
97 |
-
# item = {
|
98 |
-
# 'pixel_values': self.pixel_values[idx]
|
99 |
-
# }
|
100 |
-
# return item
|
101 |
-
|
102 |
class CreateDatasetProd():
|
103 |
def __init__(self
|
104 |
, clip_len
|
@@ -245,11 +232,19 @@ def prod_function(model_pretrained, prod_ds):
|
|
245 |
prod_logits = outputs.squeeze(1)
|
246 |
prod_pred = prod_logits.argmax(-1)
|
247 |
return prod_pred
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
def translate_sign_language(gesture):
|
250 |
# Create Dataset
|
251 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
252 |
-
|
253 |
|
254 |
# Run ML Model
|
255 |
predicted_prod_label = prod_function(model_pretrained, prod_ds)
|
@@ -258,13 +253,9 @@ def translate_sign_language(gesture):
|
|
258 |
predicted_prod_label = predicted_prod_label.squeeze(0)
|
259 |
|
260 |
idx_to_label = model_pretrained.config.id2label
|
261 |
-
#gesture_translation = predicted_prod_label.cpu().numpy()
|
262 |
gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
|
263 |
|
264 |
-
|
265 |
-
# gesture_translation = idx_to_label[val]
|
266 |
-
|
267 |
-
return gesture_translation #, prod_ds
|
268 |
|
269 |
with gr.Blocks() as demo:
|
270 |
gr.Markdown("# Indian Sign Language Translation App")
|
@@ -276,12 +267,12 @@ with gr.Blocks() as demo:
|
|
276 |
# Submit the Video
|
277 |
video_button = gr.Button("Submit")
|
278 |
# Display the landmarked video
|
279 |
-
|
280 |
with gr.Row():
|
281 |
# Add a button or functionality to process the video
|
282 |
test_output = gr.Textbox(label="Translation in English")
|
283 |
# Set up the interface
|
284 |
-
video_button.click(translate_sign_language, inputs=video_input, outputs=test_output)
|
285 |
|
286 |
if __name__ == "__main__":
|
287 |
demo.launch()
|
|
|
24 |
import numpy as np
|
25 |
from torch.nn import Linear, Softmax
|
26 |
import gradio as gr
|
27 |
+
import cv2
|
28 |
# Mediapipe Library
|
29 |
import mediapipe as mp
|
30 |
from mediapipe.tasks import python
|
|
|
86 |
min_tracking_confidence=0.5
|
87 |
)
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
class CreateDatasetProd():
|
90 |
def __init__(self
|
91 |
, clip_len
|
|
|
232 |
prod_logits = outputs.squeeze(1)
|
233 |
prod_pred = prod_logits.argmax(-1)
|
234 |
return prod_pred
|
235 |
+
|
236 |
+
def tensor_to_video(video_tensor, fps=30):
|
237 |
+
video_tensor = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
|
238 |
+
# Normalize values to [0, 255] if necessary
|
239 |
+
if video_tensor.max() <= 1.0:
|
240 |
+
video_tensor = (video_tensor * 255).astype(np.uint8)
|
241 |
+
# Define video writer
|
242 |
+
return video_tensor
|
243 |
+
|
244 |
def translate_sign_language(gesture):
|
245 |
# Create Dataset
|
246 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
247 |
+
prod_video = tensor_to_video(prod_ds)
|
248 |
|
249 |
# Run ML Model
|
250 |
predicted_prod_label = prod_function(model_pretrained, prod_ds)
|
|
|
253 |
predicted_prod_label = predicted_prod_label.squeeze(0)
|
254 |
|
255 |
idx_to_label = model_pretrained.config.id2label
|
|
|
256 |
gesture_translation = idx_to_label[predicted_prod_label.cpu().numpy().item()] # Convert to a scalar
|
257 |
|
258 |
+
return gesture_translation , prod_video
|
|
|
|
|
|
|
259 |
|
260 |
with gr.Blocks() as demo:
|
261 |
gr.Markdown("# Indian Sign Language Translation App")
|
|
|
267 |
# Submit the Video
|
268 |
video_button = gr.Button("Submit")
|
269 |
# Display the landmarked video
|
270 |
+
video_output = gr.Video(streaming=True, label="Landmarked Gesture")
|
271 |
with gr.Row():
|
272 |
# Add a button or functionality to process the video
|
273 |
test_output = gr.Textbox(label="Translation in English")
|
274 |
# Set up the interface
|
275 |
+
video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
|
276 |
|
277 |
if __name__ == "__main__":
|
278 |
demo.launch()
|