Spaces:
Sleeping
Sleeping
import os | |
import re | |
from datetime import datetime | |
import gradio | |
import sign_language_translator as slt | |
DESCRIPTION = """Enter your English text and click Submit to generate a sign language video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator) | |
The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py) | |
> **NOTE** | |
> - This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words. | |
> - This version needs to re-encode the generated video so that will take some extra time after translation. | |
> - Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)). | |
""".strip() | |
TITLE = "English to Sign Language Translator" | |
CUSTOM_CSS = """ | |
#auto-complete-button { | |
border-color: var(--button-primary-border-color-hover); | |
} | |
""" | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
request_logger = ( | |
gradio.HuggingFaceDatasetSaver( | |
HF_TOKEN, | |
"sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus", | |
) | |
if HF_TOKEN | |
else gradio.CSVLogger() | |
) | |
translation_model = slt.models.ConcatenativeSynthesis("en", "pk-sl", "video") | |
language_models = {} | |
def auto_complete_text(model_code: str, text: str): | |
if model_code not in language_models: | |
lm = slt.get_model(model_code) | |
language_models[model_code] = slt.models.BeamSampling( | |
lm, # type: ignore | |
start_of_sequence_token=getattr(lm, "start_of_sequence_token", "<"), # type: ignore | |
end_of_sequence_token=getattr(lm, "end_of_sequence_token", ">"), # type: ignore | |
) | |
tokens = [w for w in re.split(r"\b", text) if w] | |
lm = language_models[model_code] | |
lm.max_length = len(tokens) + 10 | |
completion, _ = lm.complete(tokens or None) | |
if completion[0] == lm.start_of_sequence_token: # type: ignore | |
completion = completion[1:] # type: ignore | |
if completion[-1] == lm.end_of_sequence_token: # type: ignore | |
completion = completion[:-1] # type: ignore | |
new_text = "".join(completion) | |
return new_text | |
def text_to_video( | |
text: str, | |
sign_language: str = "pakistan-sign-language", | |
sign_format: str = "video", | |
output_path: str = "output.mp4", | |
codec="h264", # ToDo: install h264 codec for opencv | |
): | |
translation_model.text_language = "en" # Hardcoded to English | |
translation_model.sign_language = sign_language | |
translation_model.sign_format = sign_format | |
if sign_format == "landmarks": | |
translation_model.sign_embedding_model = "mediapipe-world" | |
# Convert first letter to lowercase as per original code | |
text = text[:1].lower() + text[1:] | |
sign = translation_model.translate(text) | |
if isinstance(sign, slt.Landmarks): | |
# hands moved to pose wrists | |
sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3] | |
sign.data[:, 54: , :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3] | |
sign.save_animation(output_path, overwrite=True) | |
else: | |
sign.save(output_path, overwrite=True, codec=codec) | |
def translate(text: str, sign_lang: str, sign_format: str): | |
log = [ | |
text, | |
"en", | |
sign_lang, | |
None, | |
datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"), | |
] | |
try: | |
path = "output.mp4" | |
text_to_video( | |
text, | |
sign_language=sign_lang, | |
sign_format=sign_format, | |
output_path=path, | |
codec="mp4v", | |
) | |
request_logger.flag(log) | |
return path | |
except Exception as exc: | |
log[3] = str(exc) | |
request_logger.flag(log) | |
raise gradio.Error(f"Error during translation: {exc}") | |
with gradio.Blocks(title=TITLE, css=CUSTOM_CSS) as gradio_app: | |
gradio.Markdown(f"# {TITLE}") | |
gradio.Markdown(DESCRIPTION) | |
with gradio.Row(): | |
with gradio.Column(): # Inputs | |
gradio.Markdown("## Select Output Options") | |
with gradio.Row(): | |
sign_lang_dropdown = gradio.Dropdown( | |
choices=[code.value for code in slt.SignLanguageCodes], | |
value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value, | |
label="Sign Language", | |
) | |
output_format_dropdown = gradio.Dropdown( | |
choices=[ | |
slt.SignFormatCodes.VIDEO.value, | |
slt.SignFormatCodes.LANDMARKS.value, | |
], | |
value=slt.SignFormatCodes.VIDEO.value, | |
label="Output Format", | |
) | |
gradio.Markdown("## Input English Text") | |
with gradio.Row(): # Source TextArea | |
source_textbox = gradio.Textbox( | |
lines=4, | |
placeholder="Enter English Text Here...", | |
label="English Sentence", | |
show_copy_button=True, | |
elem_id="source-textbox", | |
) | |
with gradio.Row(): # clear/auto-complete/Language Model | |
# We'll keep the language model for English text completion | |
language_model_dropdown = gradio.Dropdown( | |
choices=[ | |
slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value, | |
], | |
value=slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value, | |
label="Select language model to Generate sample text", | |
) | |
auto_complete_button = gradio.Button( | |
"Auto-Complete", elem_id="auto-complete-button" | |
) | |
auto_complete_button.click( | |
auto_complete_text, | |
inputs=[language_model_dropdown, source_textbox], | |
outputs=[source_textbox], | |
api_name=False, | |
) | |
clear_button = gradio.ClearButton(source_textbox, api_name=False) | |
with gradio.Row(): # Translate Button | |
translate_button = gradio.Button("Translate", variant="primary") | |
translate_button.click( | |
translate, | |
inputs=[ | |
source_textbox, | |
sign_lang_dropdown, | |
output_format_dropdown, | |
], | |
outputs=[output_video], | |
api_name="translate", | |
) | |
with gradio.Column(): # Outputs | |
gradio.Markdown("## Output Sign Language") | |
output_video = gradio.Video( | |
format="mp4", | |
label="Synthesized Sign Language Video", | |
autoplay=True, | |
show_download_button=True, | |
include_audio=False, | |
) | |
gradio.Examples( | |
[ | |
["We are here to use this.", "pakistan-sign-language", "video"], | |
["I admire art.", "pakistan-sign-language", "landmarks"], | |
["This is very good.", "pakistan-sign-language", "video"], | |
["That work was easy.", "pakistan-sign-language", "landmarks"], | |
["How are you?", "pakistan-sign-language", "video"], | |
["Five hours.", "pakistan-sign-language", "landmarks"], | |
], | |
inputs=[ | |
source_textbox, | |
sign_lang_dropdown, | |
output_format_dropdown, | |
], | |
outputs=output_video, | |
) | |
request_logger.setup( | |
[ | |
source_textbox, | |
gradio.Markdown(label="Language", value="en"), | |
sign_lang_dropdown, | |
gradio.Markdown(label="Exception"), | |
gradio.Markdown(label="Timestamp"), | |
], | |
"flagged", | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() | |