Commit
·
c95030a
1
Parent(s):
a4998f1
adding ISL gesture reference tab
Browse files
app.py
CHANGED
@@ -45,6 +45,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
45 |
MODEL_TRANSFORMER = 'google/vivit-b-16x2'
|
46 |
# Set Paths
|
47 |
model_path = 'vivit_pytorch_loss051.pt'
|
|
|
48 |
|
49 |
# Create Mediapipe Objects
|
50 |
mp_drawing = mp.solutions.drawing_utils
|
@@ -86,6 +87,7 @@ holistic = mp_holistic.Holistic(
|
|
86 |
min_tracking_confidence=0.5
|
87 |
)
|
88 |
|
|
|
89 |
class CreateDatasetProd():
|
90 |
def __init__(self
|
91 |
, clip_len
|
@@ -232,19 +234,35 @@ def prod_function(model_pretrained, prod_ds):
|
|
232 |
prod_logits = outputs.squeeze(1)
|
233 |
prod_pred = prod_logits.argmax(-1)
|
234 |
return prod_pred
|
235 |
-
|
|
|
236 |
def tensor_to_video(video_tensor, fps=30):
|
237 |
-
|
238 |
# Normalize values to [0, 255] if necessary
|
239 |
-
if
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
def translate_sign_language(gesture):
|
245 |
# Create Dataset
|
246 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
247 |
-
prod_video = tensor_to_video(prod_ds)
|
|
|
248 |
|
249 |
# Run ML Model
|
250 |
predicted_prod_label = prod_function(model_pretrained, prod_ds)
|
@@ -259,6 +277,8 @@ def translate_sign_language(gesture):
|
|
259 |
|
260 |
with gr.Blocks() as demo:
|
261 |
gr.Markdown("# Indian Sign Language Translation App")
|
|
|
|
|
262 |
with gr.Tab("Gesture recognition"):
|
263 |
with gr.Row():
|
264 |
with gr.Column(scale=2, min_width=300):
|
@@ -273,6 +293,15 @@ with gr.Blocks() as demo:
|
|
273 |
test_output = gr.Textbox(label="Translation in English")
|
274 |
# Set up the interface
|
275 |
video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
if __name__ == "__main__":
|
278 |
demo.launch()
|
|
|
45 |
MODEL_TRANSFORMER = 'google/vivit-b-16x2'
|
46 |
# Set Paths
|
47 |
model_path = 'vivit_pytorch_loss051.pt'
|
48 |
+
data_path = 'signs'
|
49 |
|
50 |
# Create Mediapipe Objects
|
51 |
mp_drawing = mp.solutions.drawing_utils
|
|
|
87 |
min_tracking_confidence=0.5
|
88 |
)
|
89 |
|
90 |
+
# Creating Dataset
|
91 |
class CreateDatasetProd():
|
92 |
def __init__(self
|
93 |
, clip_len
|
|
|
234 |
prod_logits = outputs.squeeze(1)
|
235 |
prod_pred = prod_logits.argmax(-1)
|
236 |
return prod_pred
|
237 |
+
|
238 |
+
# Function to get landmarked video
|
239 |
def tensor_to_video(video_tensor, fps=30):
|
240 |
+
video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
|
241 |
# Normalize values to [0, 255] if necessary
|
242 |
+
if video_numpy.max() <= 1.0:
|
243 |
+
video_numpy = (video_numpy * 255).astype(np.uint8)
|
244 |
+
return video_numpy
|
245 |
+
|
246 |
+
# Function to list available videos dynamically
|
247 |
+
def list_videos():
|
248 |
+
if os.path.exists(data_path):
|
249 |
+
video_lst = [f for f in os.listdir(data_path) if f.endswith((".mp4", ".mov", ".MOV", ".webm", ".avi"))]
|
250 |
+
return video_lst
|
251 |
+
|
252 |
+
# Function to return the selected video path
|
253 |
+
def play_video(selected_video):
|
254 |
+
return os.path.join(data_path, selected_video) if selected_video else None
|
255 |
+
|
256 |
+
## Function to refresh dropdown options
|
257 |
+
#def refresh_dropdown():
|
258 |
+
# return gr.Dropdown.update(choices=list_videos())
|
259 |
+
|
260 |
+
# Main Function for tab - Gesture recognition
|
261 |
def translate_sign_language(gesture):
|
262 |
# Create Dataset
|
263 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
264 |
+
#prod_video = tensor_to_video(prod_ds)
|
265 |
+
prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
|
266 |
|
267 |
# Run ML Model
|
268 |
predicted_prod_label = prod_function(model_pretrained, prod_ds)
|
|
|
277 |
|
278 |
with gr.Blocks() as demo:
|
279 |
gr.Markdown("# Indian Sign Language Translation App")
|
280 |
+
|
281 |
+
# Gesture recognition Tab
|
282 |
with gr.Tab("Gesture recognition"):
|
283 |
with gr.Row():
|
284 |
with gr.Column(scale=2, min_width=300):
|
|
|
293 |
test_output = gr.Textbox(label="Translation in English")
|
294 |
# Set up the interface
|
295 |
video_button.click(translate_sign_language, inputs=video_input, outputs=[test_output, video_output])
|
296 |
+
|
297 |
+
# Indian Sign Language gesture reference tab
|
298 |
+
with gr.Tab("Indian Sign Language gesture reference"):
|
299 |
+
with gr.Row():
|
300 |
+
video_dropdown = gr.Dropdown(choices=list_videos(), label="ISL gestures", info="More gestures comming soon!")
|
301 |
+
search_button = gr.Button("Search Gesture")
|
302 |
+
search_output = gr.Video(streaming=True, label="ISL gestures Video")
|
303 |
+
# Set up the interface
|
304 |
+
search_button.click(play_video, inputs=video_dropdown, outputs=search_output)
|
305 |
|
306 |
if __name__ == "__main__":
|
307 |
demo.launch()
|