Spaces:
Running
Running
import os | |
import re | |
from datetime import datetime | |
from typing import Dict | |
import gradio | |
import sign_language_translator as slt | |
DESCRIPTION = """Enter your text and select languages from the dropdowns, then click Submit to generate a video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator) | |
The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py) | |
> *NOTE*: This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words. | |
> This version needs to re-encode the generated video so that will take some extra time after translation. | |
> Since this is a rule-based model, you will have to add *context* to ambiguous words (e.g. glass(material) vs glass(container)). | |
""".strip() | |
TITLE = "Concatenative Synthesis: Rule Based Text to Sign Language Translator" | |
CUSTOM_JS = """<script> | |
const rtlLanguages = ["ur", "ar"]; | |
function updateTextareaDir(language) { | |
const sourceTextarea = document.getElementById("source-textbox").querySelector("textarea"); | |
if (rtlLanguages.includes(language)) { | |
sourceTextarea.setAttribute("dir", "rtl"); | |
} else { | |
sourceTextarea.setAttribute("dir", "ltr"); | |
} | |
} | |
</script>""" | |
# todo: add dropdown keyboard custom component with key mapping | |
CUSTOM_CSS = """ | |
#auto-complete-button { | |
border-color: var(--button-primary-border-color-hover); | |
} | |
""" | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
request_logger = ( | |
gradio.HuggingFaceDatasetSaver( | |
HF_TOKEN, | |
"sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus", | |
) | |
if HF_TOKEN | |
else gradio.CSVLogger() | |
) | |
translation_model = slt.models.ConcatenativeSynthesis("ur", "pk-sl", "video") | |
language_models: Dict[str, slt.models.BeamSampling] = {} | |
def auto_complete_text(model_code: str, text: str): | |
if model_code not in language_models: | |
lm = slt.get_model(model_code) | |
language_models[model_code] = slt.models.BeamSampling( | |
lm, # type: ignore | |
start_of_sequence_token=getattr(lm, "start_of_sequence_token", "<"), # type: ignore | |
end_of_sequence_token=getattr(lm, "end_of_sequence_token", ">"), # type: ignore | |
) | |
# todo: better tokenize/detokenize | |
tokens = [w for w in re.split(r"\b", text) if w] | |
lm = language_models[model_code] | |
lm.max_length = len(tokens) + 10 | |
completion, _ = lm.complete(tokens or None) | |
if completion[0] == lm.start_of_sequence_token: # type: ignore | |
completion = completion[1:] # type: ignore | |
if completion[-1] == lm.end_of_sequence_token: # type: ignore | |
completion = completion[:-1] # type: ignore | |
new_text = "".join(completion) | |
return new_text | |
def text_to_video( | |
text: str, | |
text_language: str, | |
sign_language: str, | |
output_path: str = "output.mp4", | |
codec="h264", # ToDo: install h264 codec for opencv | |
): | |
translation_model.text_language = text_language | |
translation_model.sign_language = sign_language | |
video = translation_model.translate(text) | |
video.save(output_path, overwrite=True, codec=codec) | |
# ToDo: video.watermark("Sign Language Translator\nAI Generated Video") | |
def translate(text: str, text_lang: str, sign_lang: str): | |
log = [ | |
text, | |
text_lang, | |
sign_lang, | |
None, | |
datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"), | |
] | |
try: | |
path = "output.mp4" | |
text_to_video(text, text_lang, sign_lang, output_path=path, codec="mp4v") | |
request_logger.flag(log) | |
return path | |
except Exception as exc: | |
log[3] = str(exc) | |
request_logger.flag(log) | |
raise gradio.Error(f"Error during translation: {exc}") | |
with gradio.Blocks(title=TITLE, head=CUSTOM_JS, css=CUSTOM_CSS) as gradio_app: | |
gradio.Markdown(f"# {TITLE}") | |
gradio.Markdown(DESCRIPTION) | |
with gradio.Row(): | |
with gradio.Column(): | |
gradio.Markdown("## Input Text") | |
with gradio.Row(): | |
with gradio.Column(): | |
gradio.Markdown("Write here (in selected language):") | |
source_textbox = gradio.Textbox( | |
lines=1, | |
placeholder="Enter Text Here...", | |
label="Spoken Language Sentence", | |
show_copy_button=True, | |
elem_id="source-textbox", | |
) | |
with gradio.Column(): | |
gradio.Markdown("Generate sample text instead:") | |
with gradio.Row(): | |
language_model_dropdown = gradio.Dropdown( | |
choices=[ | |
slt.ModelCodes.MIXER_LM_NGRAM_URDU.value, | |
slt.ModelCodes.TRANSFORMER_LM_UR_SUPPORTED.value, | |
], | |
value=slt.ModelCodes.MIXER_LM_NGRAM_URDU.value, | |
label="Language Model for auto-complete", | |
) | |
with gradio.Row(): | |
clear_button = gradio.ClearButton( | |
source_textbox, api_name=False | |
) | |
auto_complete_button = gradio.Button( | |
"Auto-Complete", elem_id="auto-complete-button" | |
) | |
auto_complete_button.click( | |
auto_complete_text, | |
inputs=[language_model_dropdown, source_textbox], | |
outputs=[source_textbox], | |
api_name=False, | |
) | |
gradio.Markdown("## Select Languages") | |
with gradio.Row(): | |
text_lang_dropdown = gradio.Dropdown( | |
choices=[code.value for code in slt.TextLanguageCodes], | |
value=slt.TextLanguageCodes.URDU.value, | |
label="Text Language", | |
elem_id="text-lang-dropdown", | |
) | |
text_lang_dropdown.change( | |
None, inputs=text_lang_dropdown, js="updateTextareaDir" | |
) | |
sign_lang_dropdown = gradio.Dropdown( | |
choices=[code.value for code in slt.SignLanguageCodes], | |
value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value, | |
label="Sign Language", | |
) | |
# todo: sign format: video/landmarks (tabs?) | |
with gradio.Column(): | |
gradio.Markdown("## Output Sign Language") | |
output_video = gradio.Video( | |
format="mp4", | |
label="Synthesized Sign Language Video", | |
autoplay=True, | |
show_download_button=True, | |
include_audio=False, | |
) | |
with gradio.Row(): | |
translate_button = gradio.Button("Translate", variant="primary") | |
translate_button.click( | |
translate, | |
inputs=[source_textbox, text_lang_dropdown, sign_lang_dropdown], | |
outputs=[output_video], | |
api_name="translate", | |
) | |
gradio.Examples( | |
[ | |
["یہ بہت اچھا ہے۔", "ur", "pakistan-sign-language"], | |
["وہ کام آسان تھا۔", "ur", "pakistan-sign-language"], | |
["पाँच घंटे।", "hi", "pakistan-sign-language"], | |
# ["आप कैसे हैं?", "hi", "pakistan-sign-language"], | |
], | |
inputs=[source_textbox, text_lang_dropdown, sign_lang_dropdown], | |
outputs=output_video, | |
) | |
request_logger.setup( | |
[ | |
source_textbox, | |
text_lang_dropdown, | |
sign_lang_dropdown, | |
gradio.Markdown(label="Exception"), | |
gradio.Markdown(label="Timestamp"), | |
], | |
"flagged", | |
) | |
gradio_app.load(None, inputs=[text_lang_dropdown], js="updateTextareaDir") | |
if __name__ == "__main__": | |
gradio_app.launch() | |