Spaces:

sltAI
/

ConcatenativeSynthesis

Running

App Files Files Community

mdsr commited on Sep 25, 2024

Commit

50f0067

1 Parent(s): a57329a

english examples, move hands to pose wrist, Note

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +17 -8

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏆
 colorFrom: green
 colorTo: purple
 sdk: gradio
-sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: cc-by-4.0

 colorFrom: green
 colorTo: purple
 sdk: gradio
+sdk_version: 4.*
 app_file: app.py
 pinned: false
 license: cc-by-4.0

app.py CHANGED Viewed

@@ -10,9 +10,11 @@ DESCRIPTION = """Enter your text and select languages from the dropdowns, then c
 The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
-> *NOTE*: This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words.
-> This version needs to re-encode the generated video so that will take some extra time after translation.
-> Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)).
 """.strip()
 TITLE = "Concatenative Synthesis: Rule Based Text to Sign Language Translator"
@@ -275,9 +277,14 @@ def text_to_video(
     sign = translation_model.translate(text)
     if isinstance(sign, slt.Landmarks):
-        sign.data[:, 33:] *= 2
-        sign.data[:, 33:54, 0] += 0.25
-        sign.data[:, 54:, 0] -= 0.25
         sign.save_animation(output_path, overwrite=True)
     else:
@@ -407,10 +414,12 @@ with gradio.Blocks(title=TITLE, head=CUSTOM_JS, css=CUSTOM_CSS) as gradio_app:
     gradio.Examples(
         [
             ["یہ بہت اچھا ہے۔", "urdu", "pakistan-sign-language", "video"],
             ["وہ کام آسان تھا۔", "urdu", "pakistan-sign-language", "landmarks"],
-            ["पाँच घंटे।", "hindi", "pakistan-sign-language", "video"],
-            ["कैसे हैं आप?", "hindi", "pakistan-sign-language", "landmarks"],
         ],
         inputs=[
             source_textbox,

 The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
+> **NOTE**
+> - This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words.
+> - This version needs to re-encode the generated video so that will take some extra time after translation.
+> - Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)).
+> - Some signs correspond to words very specific in a particular language so their mapping in other languages will not make sense (e.g. in pakistan-sign-language, signs were recorded in reference to common Urdu words, hence English words "for" & "to" etc do not map well to their original Urdu words "کے لئے" and "کو" etc).
 """.strip()
 TITLE = "Concatenative Synthesis: Rule Based Text to Sign Language Translator"
     sign = translation_model.translate(text)
     if isinstance(sign, slt.Landmarks):
+        # large hands on sides
+        # sign.data[:, 33:] *= 2
+        # sign.data[:, 33:54, 0] += 0.25
+        # sign.data[:, 54:, 0] -= 0.25
+        # hands moved to pose wrists
+        sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
+        sign.data[:, 54:, :3] += - sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
         sign.save_animation(output_path, overwrite=True)
     else:
     gradio.Examples(
         [
+            ["We are here to use this.", "english", "pakistan-sign-language", "video"],
+            ["i(me) admire art.", "english", "pakistan-sign-language", "landmarks"],
             ["یہ بہت اچھا ہے۔", "urdu", "pakistan-sign-language", "video"],
             ["وہ کام آسان تھا۔", "urdu", "pakistan-sign-language", "landmarks"],
+            ["कैसे हैं आप?", "hindi", "pakistan-sign-language", "video"],
+            ["पाँच घंटे।", "hindi", "pakistan-sign-language", "landmarks"],
         ],
         inputs=[
             source_textbox,