Spaces:

walaa2022
/

signlanguage

Sleeping

App Files Files Community

walaa2022 commited on Feb 24

Commit

30e8ad8

verified ·

1 Parent(s): 9aa59b1

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -197

app.py CHANGED Viewed

@@ -1,214 +1,147 @@
 import os
-import re
-from datetime import datetime
-import gradio
-import sign_language_translator as slt
-DESCRIPTION = """Enter your English text and click Submit to generate a sign language video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator)
-The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
-> **NOTE**
-> - This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words.
-> - This version needs to re-encode the generated video so that will take some extra time after translation.
-> - Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)).
-""".strip()
 TITLE = "English to Sign Language Translator"
-CUSTOM_CSS = """
-#auto-complete-button {
-    border-color: var(--button-primary-border-color-hover);
-}
 """
-HF_TOKEN = os.getenv("HF_TOKEN")
-request_logger = (
-    gradio.HuggingFaceDatasetSaver(
-        HF_TOKEN,
-        "sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus",
-    )
-    if HF_TOKEN
-    else gradio.CSVLogger()
-)
-translation_model = slt.models.ConcatenativeSynthesis("en", "pk-sl", "video")
-language_models = {}
-def auto_complete_text(model_code: str, text: str):
-    if model_code not in language_models:
-        lm = slt.get_model(model_code)
-        language_models[model_code] = slt.models.BeamSampling(
-            lm,  # type: ignore
-            start_of_sequence_token=getattr(lm, "start_of_sequence_token", "<"),  # type: ignore
-            end_of_sequence_token=getattr(lm, "end_of_sequence_token", ">"),  # type: ignore
-        )
-    tokens = [w for w in re.split(r"\b", text) if w]
-    lm = language_models[model_code]
-    lm.max_length = len(tokens) + 10
-    completion, _ = lm.complete(tokens or None)
-    if completion[0] == lm.start_of_sequence_token:  # type: ignore
-        completion = completion[1:]  # type: ignore
-    if completion[-1] == lm.end_of_sequence_token:  # type: ignore
-        completion = completion[:-1]  # type: ignore
-    new_text = "".join(completion)
-    return new_text
-def text_to_video(
-    text: str,
-    sign_language: str = "pakistan-sign-language",
-    sign_format: str = "video",
-    output_path: str = "output.mp4",
-    codec="h264",  # ToDo: install h264 codec for opencv
-):
-    translation_model.text_language = "en"  # Hardcoded to English
-    translation_model.sign_language = sign_language
-    translation_model.sign_format = sign_format
-    if sign_format == "landmarks":
-        translation_model.sign_embedding_model = "mediapipe-world"
-    # Convert first letter to lowercase as per original code
-    text = text[:1].lower() + text[1:]
-    sign = translation_model.translate(text)
-    if isinstance(sign, slt.Landmarks):
-        # hands moved to pose wrists
-        sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
-        sign.data[:, 54:  , :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
-        sign.save_animation(output_path, overwrite=True)
-    else:
-        sign.save(output_path, overwrite=True, codec=codec)
-def translate(text: str, sign_lang: str, sign_format: str):
-    log = [
-        text,
-        "en",
-        sign_lang,
-        None,
-        datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
-    ]
     try:
-        path = "output.mp4"
-        text_to_video(
-            text,
-            sign_language=sign_lang,
-            sign_format=sign_format,
-            output_path=path,
-            codec="mp4v",
-        )
-        request_logger.flag(log)
-        return path
-    except Exception as exc:
-        log[3] = str(exc)
-        request_logger.flag(log)
-        raise gradio.Error(f"Error during translation: {exc}")
-with gradio.Blocks(title=TITLE, css=CUSTOM_CSS) as gradio_app:
-    gradio.Markdown(f"# {TITLE}")
-    gradio.Markdown(DESCRIPTION)
-    with gradio.Row():
-        with gradio.Column():  # Inputs
-            gradio.Markdown("## Select Output Options")
-            with gradio.Row():
-                sign_lang_dropdown = gradio.Dropdown(
-                    choices=[code.value for code in slt.SignLanguageCodes],
-                    value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
-                    label="Sign Language",
-                )
-                output_format_dropdown = gradio.Dropdown(
-                    choices=[
-                        slt.SignFormatCodes.VIDEO.value,
-                        slt.SignFormatCodes.LANDMARKS.value,
-                    ],
-                    value=slt.SignFormatCodes.VIDEO.value,
-                    label="Output Format",
-                )
-            gradio.Markdown("## Input English Text")
-            with gradio.Row():  # Source TextArea
-                source_textbox = gradio.Textbox(
-                    lines=4,
-                    placeholder="Enter English Text Here...",
-                    label="English Sentence",
-                    show_copy_button=True,
-                    elem_id="source-textbox",
-                )
-            with gradio.Row():  # clear/auto-complete/Language Model
-                # We'll keep the language model for English text completion
-                language_model_dropdown = gradio.Dropdown(
-                    choices=[
-                        slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
-                    ],
-                    value=slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
-                    label="Select language model to Generate sample text",
-                )
-                auto_complete_button = gradio.Button(
-                    "Auto-Complete", elem_id="auto-complete-button"
-                )
-                auto_complete_button.click(
-                    auto_complete_text,
-                    inputs=[language_model_dropdown, source_textbox],
-                    outputs=[source_textbox],
-                    api_name=False,
-                )
-                clear_button = gradio.ClearButton(source_textbox, api_name=False)
-            with gradio.Row():  # Translate Button
-                translate_button = gradio.Button("Translate", variant="primary")
-                translate_button.click(
-                    translate,
-                    inputs=[
-                        source_textbox,
-                        sign_lang_dropdown,
-                        output_format_dropdown,
-                    ],
-                    outputs=[output_video],
-                    api_name="translate",
-                )
-        with gradio.Column():  # Outputs
-            gradio.Markdown("## Output Sign Language")
-            output_video = gradio.Video(
                 format="mp4",
-                label="Synthesized Sign Language Video",
                 autoplay=True,
-                show_download_button=True,
-                include_audio=False,
             )
-    gradio.Examples(
-        [
-            ["We are here to use this.", "pakistan-sign-language", "video"],
-            ["I admire art.", "pakistan-sign-language", "landmarks"],
-            ["This is very good.", "pakistan-sign-language", "video"],
-            ["That work was easy.", "pakistan-sign-language", "landmarks"],
-            ["How are you?", "pakistan-sign-language", "video"],
-            ["Five hours.", "pakistan-sign-language", "landmarks"],
-        ],
-        inputs=[
-            source_textbox,
-            sign_lang_dropdown,
-            output_format_dropdown,
         ],
-        outputs=output_video,
     )
-    request_logger.setup(
-        [
-            source_textbox,
-            gradio.Markdown(label="Language", value="en"),
-            sign_lang_dropdown,
-            gradio.Markdown(label="Exception"),
-            gradio.Markdown(label="Timestamp"),
-        ],
-        "flagged",
     )
 if __name__ == "__main__":
-    gradio_app.launch()

 import os
+import sys
+import gradio as gr
+# Install required packages if not already installed
+try:
+    import sign_language_translator as slt
+except ImportError:
+    print("Installing sign-language-translator...")
+    os.system("pip install sign-language-translator --quiet")
+    import sign_language_translator as slt
 TITLE = "English to Sign Language Translator"
+DESCRIPTION = """Enter your English text and click Translate to generate a sign language video.
+The text is preprocessed, tokenized and each token is mapped to a prerecorded video which are concatenated and returned.
+**NOTE:**
+- This model only supports a fixed vocabulary of common words
+- First-time loading may take a moment as the model downloads
+- For best results, use simple sentences with common words
 """
+# Initialize the translation model (this may take some time on first run)
+def get_model():
+    try:
+        return slt.models.ConcatenativeSynthesis("en", "pk-sl", "video")
+    except Exception as e:
+        print(f"Error initializing model: {str(e)}")
+        return None
+# Global model variable
+model = None
+def initialize_model():
+    global model
+    if model is None:
+        model = get_model()
+    return model is not None
+def translate_text(text, format_type):
+    """Translate English text to sign language video"""
+    if not text:
+        return None, "Please enter some text to translate."
+    # Initialize model if not already done
+    if not initialize_model():
+        return None, "Failed to initialize the translation model. Please try again."
     try:
+        # Format the text (lowercase first letter as required by model)
+        text = text[:1].lower() + text[1:] if text else ""
+        # Configure model
+        model.text_language = "en"
+        model.sign_language = "pk-sl"
+        model.sign_format = format_type
+        if format_type == "landmarks":
+            model.sign_embedding_model = "mediapipe-world"
+        # Translate
+        output_path = "output.mp4"
+        sign = model.translate(text)
+        # Save output
+        if isinstance(sign, slt.Landmarks):
+            # Position hands correctly
+            sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
+            sign.data[:, 54:, :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
+            sign.save_animation(output_path, overwrite=True)
+        else:
+            sign.save(output_path, overwrite=True, codec="mp4v")
+        return output_path, f"Successfully translated: '{text}'"
+    except Exception as e:
+        error_msg = str(e)
+        print(f"Translation error: {error_msg}")
+        return None, f"Error during translation: {error_msg}"
+# Create the Gradio interface
+with gr.Blocks(title=TITLE) as demo:
+    gr.Markdown(f"# {TITLE}")
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            # Input area
+            text_input = gr.Textbox(
+                lines=4,
+                placeholder="Enter English text here...",
+                label="English Text"
+            )
+            format_dropdown = gr.Dropdown(
+                choices=["video", "landmarks"],
+                value="video",
+                label="Output Format"
+            )
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                translate_btn = gr.Button("Translate", variant="primary")
+            status_output = gr.Textbox(label="Status", interactive=False)
+        with gr.Column():
+            # Output video
+            video_output = gr.Video(
+                label="Sign Language Output",
                 format="mp4",
                 autoplay=True,
+                show_download_button=True
             )
+    # Examples
+    gr.Examples(
+        examples=[
+            ["Hello, how are you?", "video"],
+            ["My name is John.", "video"],
+            ["Nice to meet you.", "video"],
+            ["I want to learn sign language.", "video"]
         ],
+        inputs=[text_input, format_dropdown],
+        outputs=[video_output, status_output],
+        fn=translate_text
     )
+    # Event handlers
+    translate_btn.click(
+        fn=translate_text,
+        inputs=[text_input, format_dropdown],
+        outputs=[video_output, status_output]
+    )
+    clear_btn.click(
+        fn=lambda: ("", "Input cleared"),
+        inputs=None,
+        outputs=[text_input, status_output]
     )
+    # Initialize model on load (not blocking)
+    demo.load(lambda: None, None, None)
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()