Spaces:
Running
Running
Gundeep Singh
commited on
Commit
·
860d7e4
1
Parent(s):
ebee301
Update auto detect label on language detection
Browse files- .gitignore +1 -1
- app.py +52 -14
- examples.py +14 -0
- iso639_wrapper.py +22 -0
- language_directions.py +19 -18
- project-notes.md +3 -1
- utils.py +17 -1
.gitignore
CHANGED
@@ -1 +1 @@
|
|
1 |
-
*pycache*
|
|
|
1 |
+
*pycache*
|
app.py
CHANGED
@@ -1,15 +1,27 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from language_directions import *
|
3 |
from transformers import pipeline
|
|
|
4 |
|
5 |
source_lang_dict = get_all_source_languages()
|
6 |
target_lang_dict = {}
|
7 |
source_languages = source_lang_dict.keys()
|
8 |
|
9 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
global target_lang_dict
|
11 |
-
target_lang_dict = get_target_languages(source_lang_dict[
|
12 |
-
target_languages = target_lang_dict.keys()
|
13 |
default_target_value = None
|
14 |
if "English" in target_languages or "english" in target_languages:
|
15 |
default_target_value = "English"
|
@@ -19,16 +31,41 @@ def source_dropdown_changed(source_dropdown, input_text=""):
|
|
19 |
value=default_target_value,
|
20 |
label="Target Language")
|
21 |
return target_dropdown
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def translate(input_text, source, target):
|
24 |
-
|
|
|
25 |
source, _ = auto_detect_language_code(input_text)
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
with gr.Blocks() as demo:
|
@@ -55,14 +92,15 @@ with gr.Blocks() as demo:
|
|
55 |
value="English",
|
56 |
label="Target Language")
|
57 |
translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
|
|
|
58 |
btn = gr.Button("Translate")
|
59 |
-
source_language_dropdown.change(
|
|
|
60 |
btn.click(translate, inputs=[input_textbox,
|
61 |
source_language_dropdown,
|
62 |
target_language_dropdown],
|
63 |
-
outputs=translated_textbox)
|
64 |
-
gr.Examples(
|
65 |
-
inputs=[input_textbox])
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
demo.launch()
|
|
|
1 |
+
# from responses import start
|
2 |
import gradio as gr
|
3 |
from language_directions import *
|
4 |
from transformers import pipeline
|
5 |
+
from examples import example_sentences
|
6 |
|
7 |
source_lang_dict = get_all_source_languages()
|
8 |
target_lang_dict = {}
|
9 |
source_languages = source_lang_dict.keys()
|
10 |
|
11 |
+
def get_auto_detect_source_dropdown(input_text):
|
12 |
+
source, _ = auto_detect_language_code(input_text)
|
13 |
+
language_name = get_name_from_iso_code(source)
|
14 |
+
source_dropdown_text = "Detected - " + language_name
|
15 |
+
update_source_languages_dict(source_lang_dict, source_dropdown_text)
|
16 |
+
source_language_dropdown = gr.Dropdown(choices=source_languages,
|
17 |
+
value=source_dropdown_text,
|
18 |
+
label="Source Language")
|
19 |
+
return source_language_dropdown, language_name
|
20 |
+
|
21 |
+
def get_target_dropdown(source_language_name, input_text):
|
22 |
global target_lang_dict
|
23 |
+
target_lang_dict, source_language = get_target_languages(source_lang_dict[source_language_name], input_text)
|
24 |
+
target_languages = list(target_lang_dict.keys())
|
25 |
default_target_value = None
|
26 |
if "English" in target_languages or "english" in target_languages:
|
27 |
default_target_value = "English"
|
|
|
31 |
value=default_target_value,
|
32 |
label="Target Language")
|
33 |
return target_dropdown
|
34 |
+
|
35 |
+
def get_dropdown_value(dropdown):
|
36 |
+
if isinstance(dropdown, gr.Dropdown):
|
37 |
+
dropdown_value = dropdown.constructor_args.get('value')
|
38 |
+
elif isinstance(dropdown, str):
|
39 |
+
dropdown_value = dropdown
|
40 |
+
return dropdown_value
|
41 |
+
|
42 |
+
def get_dropdowns(source_dropdown, input_text):
|
43 |
+
source_language_name = get_dropdown_value(source_dropdown)
|
44 |
+
if input_text and source_language_name == "Auto Detect" or source_language_name.startswith("Detected"):
|
45 |
+
source_dropdown, source_language_name = get_auto_detect_source_dropdown(input_text)
|
46 |
+
target_dropdown = get_target_dropdown(source_language_name=source_language_name,
|
47 |
+
input_text=input_text)
|
48 |
+
return source_dropdown, target_dropdown
|
49 |
+
|
50 |
+
def input_changed(source_language_dropdown, input_text=""):
|
51 |
+
return get_dropdowns(source_dropdown=source_language_dropdown,
|
52 |
+
input_text=input_text)
|
53 |
|
54 |
def translate(input_text, source, target):
|
55 |
+
source_readable = source
|
56 |
+
if source == "Auto Detect" or source.startswith("Detected"):
|
57 |
source, _ = auto_detect_language_code(input_text)
|
58 |
+
if source in source_lang_dict.keys():
|
59 |
+
source = source_lang_dict[source]
|
60 |
+
target_lang_dict, _ = get_target_languages(source)
|
61 |
+
try:
|
62 |
+
target = target_lang_dict[target]
|
63 |
+
model = f"Helsinki-NLP/opus-mt-{source}-{target}"
|
64 |
+
pipe = pipeline("translation", model=model)
|
65 |
+
translation = pipe(input_text)
|
66 |
+
return translation[0]['translation_text'], ""
|
67 |
+
except KeyError:
|
68 |
+
return "", f"Error: Translation direction {source_readable} to {target} is not supported by Helsinki Translation Models"
|
69 |
|
70 |
|
71 |
with gr.Blocks() as demo:
|
|
|
92 |
value="English",
|
93 |
label="Target Language")
|
94 |
translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
|
95 |
+
info_label = gr.HTML("")
|
96 |
btn = gr.Button("Translate")
|
97 |
+
source_language_dropdown.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
|
98 |
+
input_textbox.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
|
99 |
btn.click(translate, inputs=[input_textbox,
|
100 |
source_language_dropdown,
|
101 |
target_language_dropdown],
|
102 |
+
outputs=[translated_textbox, info_label])
|
103 |
+
gr.Examples(example_sentences, inputs=[input_textbox])
|
|
|
104 |
|
105 |
if __name__ == "__main__":
|
106 |
demo.launch()
|
examples.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
example_sentences = [
|
2 |
+
"Je te rencontre au café", "Répétez s'il vous plaît.",
|
3 |
+
"The mountains stand tall, embracing the clouds with their majestic peaks.",
|
4 |
+
"सितारों का आकाश में खोया होने का एहसास मन को अद्वितीय सुख देता है।",
|
5 |
+
"ਜਟ ਦਾ ਮੁਕਾਬਲਾ ਦਸ ਮੈਨੂੰ ਕਿਥੇ ਆ ਨੀ।",
|
6 |
+
"Il profumo dei fiori primaverili riempie l'aria, portando gioia e speranza.",
|
7 |
+
"Güneş batarken, gökyüzünü altın rengine boyuyor ve doğayı sihirli bir atmosfere bürüyor.",
|
8 |
+
"De wind fluistert door de bomen, een symfonie van rust en harmonie.",
|
9 |
+
"눈이 하얗게 내리고, 숲은 고요로움으로 가득 차 있습니다.",
|
10 |
+
"הכוכבים מאירים בשמי הלילה, משאירים את הלב פתוח לקסמם.",
|
11 |
+
"Hương hoa lan tỏa trong không khí, mang lại cảm giác êm đềm và sự bình yên.",
|
12 |
+
"Regnet faller mjukt mot marken, skapar en känsla av förnyelse och friskhet.",
|
13 |
+
"Η θάλασσα χτυπά την ακτή με απαλές κύματα, φέρνοντας ηρεμία και γαλήνη στην ψυχή.",
|
14 |
+
]
|
iso639_wrapper.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from iso639 import Lang, iter_langs
|
|
|
2 |
|
3 |
|
4 |
langs = [lang for lang in iter_langs()]
|
@@ -24,6 +25,27 @@ iso5_name_to_code = {lg.name: lg.pt5 for lg in langs}
|
|
24 |
# https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
|
25 |
helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
|
28 |
for code_type in precedence:
|
29 |
if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
|
|
|
1 |
from iso639 import Lang, iter_langs
|
2 |
+
from regex import R
|
3 |
|
4 |
|
5 |
langs = [lang for lang in iter_langs()]
|
|
|
25 |
# https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
|
26 |
helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
|
27 |
|
28 |
+
rename_dict = {"Panjabi": "Punjabi"}
|
29 |
+
|
30 |
+
def rename_languages(language):
|
31 |
+
if language in rename_dict:
|
32 |
+
return rename_dict[language]
|
33 |
+
return language
|
34 |
+
|
35 |
+
def rename_return_value(func):
|
36 |
+
def wrapper(*args, **kwargs):
|
37 |
+
result = func(*args, **kwargs)
|
38 |
+
if isinstance(result, str):
|
39 |
+
return rename_languages(result)
|
40 |
+
elif isinstance(result, list):
|
41 |
+
return [rename_languages(item) for item in result]
|
42 |
+
elif isinstance(result, dict):
|
43 |
+
return {key: rename_languages(value) for key, value in result.items()}
|
44 |
+
else:
|
45 |
+
return result
|
46 |
+
return wrapper
|
47 |
+
|
48 |
+
@rename_return_value
|
49 |
def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
|
50 |
for code_type in precedence:
|
51 |
if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
|
language_directions.py
CHANGED
@@ -2,12 +2,12 @@ from helsinki_models import helsinki_models, get_clearly_formatted_langauge_dire
|
|
2 |
from iso639_wrapper import get_name_from_iso_code
|
3 |
from language_detection import detect_language
|
4 |
from collections import OrderedDict
|
5 |
-
from utils import convert_keys_to_lowercase
|
6 |
|
7 |
|
8 |
def get_all_source_languages():
|
9 |
"""
|
10 |
-
Returns a human-readable `dict
|
11 |
based on the available models.
|
12 |
"""
|
13 |
source_languages = {}
|
@@ -23,6 +23,9 @@ def get_all_source_languages():
|
|
23 |
{ **{'Auto Detect' : 'Auto Detect'}, **source_languages}
|
24 |
return all_source_langs_including_auto_detect
|
25 |
|
|
|
|
|
|
|
26 |
def get_target_languages(source_language_code, input_text=None):
|
27 |
"""
|
28 |
Returns a human-readable `dict of target languages names to codes`
|
@@ -40,26 +43,24 @@ def get_target_languages(source_language_code, input_text=None):
|
|
40 |
target_language_name = get_name_from_iso_code(target_language)
|
41 |
if target_language_name:
|
42 |
target_languages[target_language_name] = target_language
|
43 |
-
return OrderedDict(sorted(target_languages.items()))
|
44 |
|
45 |
def auto_detect_language_code(input_text):
|
|
|
|
|
46 |
if not input_text:
|
47 |
-
return
|
48 |
-
|
49 |
-
if
|
50 |
-
|
51 |
-
return "unknown", True
|
52 |
-
elif language in list(get_all_source_languages().keys())\
|
53 |
-
or language.lower() in [k.lower() for k in list(get_all_source_languages().keys())]:
|
54 |
-
source_languages_dict = convert_keys_to_lowercase(get_all_source_languages())
|
55 |
-
source_language_code = source_languages_dict.get(language.lower())
|
56 |
-
return source_language_code, False
|
57 |
-
elif language in list(get_all_source_languages().values())\
|
58 |
-
or language.lower() in [k.lower() for k in list(get_all_source_languages().values())]:
|
59 |
-
source_language_code = language
|
60 |
-
return source_language_code, False
|
61 |
else:
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
# Example usage:
|
|
|
2 |
from iso639_wrapper import get_name_from_iso_code
|
3 |
from language_detection import detect_language
|
4 |
from collections import OrderedDict
|
5 |
+
from utils import convert_keys_to_lowercase, match_in_keys, match_in_values
|
6 |
|
7 |
|
8 |
def get_all_source_languages():
|
9 |
"""
|
10 |
+
Returns a human-readable `dict source_languages_names:codes`
|
11 |
based on the available models.
|
12 |
"""
|
13 |
source_languages = {}
|
|
|
23 |
{ **{'Auto Detect' : 'Auto Detect'}, **source_languages}
|
24 |
return all_source_langs_including_auto_detect
|
25 |
|
26 |
+
def update_source_languages_dict(source_languages_dict, auto_detected_language):
|
27 |
+
source_languages_dict[auto_detected_language] = "Auto Detect"
|
28 |
+
|
29 |
def get_target_languages(source_language_code, input_text=None):
|
30 |
"""
|
31 |
Returns a human-readable `dict of target languages names to codes`
|
|
|
43 |
target_language_name = get_name_from_iso_code(target_language)
|
44 |
if target_language_name:
|
45 |
target_languages[target_language_name] = target_language
|
46 |
+
return OrderedDict(sorted(target_languages.items())), source_language_code
|
47 |
|
48 |
def auto_detect_language_code(input_text):
|
49 |
+
DEFAULT_SOURCE_LANGUAGE = "en"
|
50 |
+
detected_language_string = DEFAULT_SOURCE_LANGUAGE
|
51 |
if not input_text:
|
52 |
+
return DEFAULT_SOURCE_LANGUAGE, True
|
53 |
+
language_or_code = detect_language(input_text)
|
54 |
+
if language_or_code == "unknown":
|
55 |
+
return DEFAULT_SOURCE_LANGUAGE, True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
else:
|
57 |
+
detected_language_string = match_in_keys(get_all_source_languages(), language_or_code)
|
58 |
+
if not detected_language_string:
|
59 |
+
detected_language_string = match_in_values(get_all_source_languages(), language_or_code)
|
60 |
+
if detected_language_string:
|
61 |
+
return detected_language_string, False
|
62 |
+
else:
|
63 |
+
return DEFAULT_SOURCE_LANGUAGE, True
|
64 |
|
65 |
|
66 |
# Example usage:
|
project-notes.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
# Scope of project
|
2 |
1. Enable multiple languages translate based on helsinki models.✅
|
3 |
2. Enable auto detect langauge ✅
|
4 |
-
3. Show error message instead of gradio error
|
|
|
|
|
|
1 |
# Scope of project
|
2 |
1. Enable multiple languages translate based on helsinki models.✅
|
3 |
2. Enable auto detect langauge ✅
|
4 |
+
3. Show error message instead of gradio error ✅
|
5 |
+
4. Add examples ✅
|
6 |
+
5. Auto detect on text change ✅
|
utils.py
CHANGED
@@ -1,2 +1,18 @@
|
|
|
|
|
|
|
|
1 |
def convert_keys_to_lowercase(input_dict):
|
2 |
-
return {key.lower(): value for key, value in input_dict.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import cache
|
2 |
+
|
3 |
+
|
4 |
def convert_keys_to_lowercase(input_dict):
|
5 |
+
return {key.lower(): value for key, value in input_dict.items()}
|
6 |
+
|
7 |
+
def match_in_keys(dictionary, search_string):
|
8 |
+
lowercase_dict = convert_keys_to_lowercase(dictionary)
|
9 |
+
if search_string.lower() in list(lowercase_dict.keys()):
|
10 |
+
return lowercase_dict.get(search_string.lower())
|
11 |
+
for l_key in lowercase_dict.keys():
|
12 |
+
if l_key.startswith(search_string.lower()):
|
13 |
+
return lowercase_dict.get(l_key)
|
14 |
+
|
15 |
+
def match_in_values(dictionary, search_string):
|
16 |
+
lowercase_dict = convert_keys_to_lowercase(dictionary)
|
17 |
+
if search_string.lower() in list(lowercase_dict.values()):
|
18 |
+
return search_string
|