Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,43 +1,49 @@
|
|
1 |
# 1. Libraries
|
|
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoModelForSeq2SeqLM, NllbTokenizer
|
5 |
import pandas as pd
|
6 |
-
|
|
|
7 |
|
8 |
# 2. Constants
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
# LANGUAGE = pd.DataFrame({"language": ["Къарачай-Малкъар тил", "Русский язык", "English", "Türk dili"], "token": ["krc_Cyrl", "rus_Cyrl", "eng_Latn", "tur_Latn"]})
|
13 |
LANGUAGE = pd.DataFrame({"language": ["Къарачай-Малкъар тил", "Русский язык"], "token": ["krc_Cyrl", "rus_Cyrl"]})
|
14 |
DIALECT = pd.DataFrame({"dialect": ["дж\ч", "ж\ч", "з\ц"], "short_name": ["qrc", "hlm", "mqr"]})
|
15 |
-
TYPE = pd.DataFrame({"krc": ["Кёчюрюўчю", "Сёзлюк"], "rus": ["Переводчик", "Словарь"], "eng": ["Translator", "Dictionary"], "tur": ["Çevirmen", "Sözlük
|
16 |
|
17 |
SYSTEM_LANG = "rus"
|
18 |
-
# NAMES = pd.DataFrame({
|
19 |
-
# "id": ["title", "from", "to", "your_sent", "transl_sent", "dialect", "translate", "annotation"],
|
20 |
-
# "krc": ["# Къарачай-Малкъар кёчюрюўчю", "тилден", "тилге", "Мында джаз...", "Кёчюрюлгени", "Къарачай-Малкъарны диалекти", "Кёчюр","Къарачай-Малкъар тилде биринчи кёчюрюўчюдю. [Богдан Теўуналаны](https://t.me/bogdan_tewunalany), [Али Берберлени](https://t.me/ali_berberov) къурагъандыла\n\nМодель Орус бла Къарачай-Малкъар тилледе юйрене тургъаны себебли, Къарачай-Малкъар кёчюрюў башха тиллеге да осал болургъа боллукъду."],
|
21 |
-
# "rus": ["# Карачаево-Балкарский переводчик", "из", "на", "Напишите здесь...", "Переведённый текст", "Карачаево-Балкарский диалект", "Перевести","Первый переводчик на карачаево-балкарский язык. Создан [Богданом Теунаевым](https://t.me/bogdan_tewunalany), [Али Берберовым](https://t.me/ali_berberov)\n\nТак как модель обучалась на парах Русский и Карачаево-Балкарский, то Карачаево-Балкарский перевод для остальных языков может быть хуже."],
|
22 |
-
# "tur": ["# Karaçay-Malkar tercümanı", "dilden", "dile", "Buraya yaz...", "Çevrilmiş metin burada", "Karaçay-Malkar lehçesi", "Tercüme edin", "İlk çevirmen. [Bogdan Tewunalanı](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_berberov) tarafından oluşturuldu\n\nModel Rusça ve Karaçay-Malkar çiftleri halinde eğitildiğinden, diğer diller için Karaçay-Malkar çevirisi daha kötü olabilir."],
|
23 |
-
# "eng": ["# Qarachay-Malqar translator", "from", "to", "Write here...", "Translated text is here", "Qarachay-Malqar dialect", "Translate", "The first translator. Created by [Bogdan Tewunalany](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_berberov)\n\nSince the model was trained in pairs of Russian and Qarachay-Malqar, the Qarachay-Malqar translation for other languages may be worse."]
|
24 |
-
# })
|
25 |
NAMES = pd.DataFrame({
|
26 |
-
"id": ["title", "type", "from", "to", "your_sent", "transl_sent", "dialect", "translate", "annotation", "word_absence"],
|
27 |
-
"krc": ["# Къарачай-Малкъар сёзлюк бла кёчюрюўчю", "Тюрлюсю", "тилден", "тилге", "Мында джаз...", "Кёчюрюлгени", "Къарачай-Малкъарны диалекти", "Кёчюр","Къарачай-малкъар, орус тиллени арасында биринчи кёчюрюўчюдю. Сёзлюк да ичине салыннганды.\n\n[Богдан Теўуналаны](https://t.me/bogdan_tewunalany), [Али Берберлени](https://t.me/ali_berberov) къурагъандыла\n\nСоинвестированиени эмда спонсорлукъ болушлукъну юсюнден [Али Берберовгъа](https://t.me/ali_berberov) соругъуз", "Сорулгъаны сёзлюкде табылмагъанды."],
|
28 |
-
"rus": ["# Карачаево-балкарский словарь и переводчик", "Тип", "из", "на", "Напишите здесь...", "Переведённый текст", "Карачаево-балкарский диалект", "Перевести","Первый переводчик между карачаево-балкарским и русским языками.
|
29 |
-
"tur": ["# Karaçayca-Balkarca sözlük ve çevirmen", "Tür", "dilden", "dile", "Buraya yaz...", "Çevrilmiş metin burada", "Karaçay-Malkar lehçesi", "Tercüme edin", "Karaçay-Balkarca ve Rusça dilleri arasındaki ilk çevirmen. Tek tek kelimeler veya kısa ifadeler için bir sözlük de yerleşiktir.\n\nGeliştiriciler: [Bogdan Tewunalanı](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_berberov)\n\nOrtak yatırım ve sponsorluk ile ilgili sorularınız için [Ali Berberov](https://t.me/ali_berberov) ile iletişime geçin", "Sorge sözlükte bulunmuyor."],
|
30 |
-
"eng": ["# Qarachay-Malqar dictionary and translator", "Type", "from", "to", "Write here...", "Translated text is here", "Qarachay-Malqar dialect", "Translate", "The first translator between Qarachay-Malqar and Russian languages.
|
31 |
})
|
32 |
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
FILEPATH_SOURCE_PREPARED = "1.Data/Dictionary"
|
37 |
-
# dictionary = pd.read_csv("%s/dictionary.csv" % FILEPATH_SOURCE_PREPARED, sep = ";")
|
38 |
|
39 |
# 3. Upload
|
40 |
-
|
|
|
41 |
dictionary = pd.DataFrame(dictionary['train'])
|
42 |
|
43 |
dictionary["soz"] = dictionary.soz.str.upper()
|
@@ -47,9 +53,17 @@ dictionary["belgi_l"] = dictionary.belgi.str.lower()
|
|
47 |
dictionary_qm = dictionary[dictionary.til == "krc"]
|
48 |
dictionary_ru = dictionary[dictionary.til == "rus"]
|
49 |
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
# 4. Fix tokenizer
|
55 |
def fixTokenizer(tokenizer, new_lang='krc_Cyrl'):
|
@@ -362,9 +376,9 @@ def translatePy(text, src_lang='rus_Cyrl', tgt_lang='krc_Cyrl',
|
|
362 |
text, return_tensors='pt', padding=True, truncation=True,
|
363 |
max_length=max_input_length
|
364 |
)
|
365 |
-
|
366 |
-
result =
|
367 |
-
**inputs.to(
|
368 |
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
|
369 |
max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
|
370 |
num_beams=num_beams, **kwargs
|
@@ -372,22 +386,41 @@ def translatePy(text, src_lang='rus_Cyrl', tgt_lang='krc_Cyrl',
|
|
372 |
return tokenizer.batch_decode(result, skip_special_tokens=True)[0]
|
373 |
|
374 |
|
375 |
-
def
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
-
|
|
|
383 |
|
384 |
-
|
385 |
-
|
|
|
|
|
386 |
|
387 |
-
|
388 |
|
389 |
# 7. Dictionary function
|
390 |
-
def dictionaryDisp(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
str_l = text.lower()
|
392 |
filter_ = r"\W+" + str_l + r"|^" + str_l
|
393 |
|
@@ -416,39 +449,26 @@ def dictionaryDisp(from_, text):
|
|
416 |
sozluk = [x.soz + " ----- " + x.belgi + "\n\n----------\n\n" for x in sozluk.itertuples()]
|
417 |
sozluk = "".join(sozluk)
|
418 |
|
|
|
|
|
|
|
419 |
return sozluk
|
420 |
# len(sozluk)
|
421 |
|
422 |
|
423 |
-
# 8.
|
424 |
-
def
|
425 |
-
|
426 |
-
|
427 |
-
if dialect == "" or dialect is None:
|
428 |
-
dialect = "дж\ч"
|
429 |
-
if from_ == "" or from_ is None:
|
430 |
-
from_ = "Русский язык"
|
431 |
-
if to == "" or to is None:
|
432 |
-
to = "Къарачай-Малкъар тил"
|
433 |
-
if type_ == "" or type_ is None:
|
434 |
-
type_ = "Кёчюрюўчю"
|
435 |
-
type_col = "krc"
|
436 |
-
|
437 |
-
from_ = "".join(LANGUAGE[LANGUAGE.language == from_].token.to_list())
|
438 |
-
to = "".join(LANGUAGE[LANGUAGE.language == to].token.to_list())
|
439 |
-
dialect = "".join(DIALECT[DIALECT.dialect == dialect].short_name.to_list())
|
440 |
-
type_ = "".join(TYPE[TYPE[type_col] == type_].short_name.to_list())
|
441 |
-
|
442 |
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
return(str_)
|
452 |
|
453 |
# 9. Definition ui
|
454 |
_title = "".join(NAMES[NAMES.id == "title"][SYSTEM_LANG].to_list())
|
@@ -456,49 +476,77 @@ _type = "".join(NAMES[NAMES.id == "type"][SYSTEM_LANG].to_list())
|
|
456 |
_from = "".join(NAMES[NAMES.id == "from"][SYSTEM_LANG].to_list())
|
457 |
_to = "".join(NAMES[NAMES.id == "to"][SYSTEM_LANG].to_list())
|
458 |
_your_sent = "".join(NAMES[NAMES.id == "your_sent"][SYSTEM_LANG].to_list())
|
|
|
459 |
_transl_sent = "".join(NAMES[NAMES.id == "transl_sent"][SYSTEM_LANG].to_list())
|
460 |
_dialect = "".join(NAMES[NAMES.id == "dialect"][SYSTEM_LANG].to_list())
|
461 |
_translate = "".join(NAMES[NAMES.id == "translate"][SYSTEM_LANG].to_list())
|
462 |
_annotation = "".join(NAMES[NAMES.id == "annotation"][SYSTEM_LANG].to_list())
|
|
|
463 |
|
|
|
464 |
with gr.Blocks() as demo:
|
465 |
gr.Markdown(_title)
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
choices = LANGUAGE.language.to_list(), label=_to, value = "
|
482 |
-
|
483 |
dialect = gr.Dropdown(
|
484 |
-
choices = DIALECT.dialect.to_list(), label=_dialect, value = "дж\ч")
|
485 |
-
|
486 |
-
with gr.Row():
|
487 |
-
with gr.Column():
|
488 |
-
text_input = gr.Textbox(lines=15, placeholder=_your_sent, label = "", show_copy_button=True)
|
489 |
-
|
490 |
-
with gr.Column():
|
491 |
-
text_output = gr.Textbox(lines=15, placeholder=_transl_sent, label = "", autoscroll=False, show_copy_button=True)
|
492 |
-
|
493 |
-
text_button = gr.Button(_translate, variant = 'primary')
|
494 |
-
|
495 |
-
text_button.click(out, inputs=[text_input, choice_input, choice_output, dialect, choice_type], outputs=[text_output]) # text, from, to, dialect
|
496 |
-
|
497 |
-
gr.Markdown(_annotation)
|
498 |
|
499 |
-
|
500 |
-
|
501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
502 |
|
|
|
|
|
|
|
|
|
|
|
503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
|
|
|
|
|
|
|
|
|
|
1 |
# 1. Libraries
|
2 |
+
from datasets import load_dataset
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
from transformers import AutoModelForSeq2SeqLM, NllbTokenizer
|
6 |
import pandas as pd
|
7 |
+
import random
|
8 |
+
import string
|
9 |
|
10 |
# 2. Constants
|
11 |
+
# Translation
|
12 |
+
MODEL_TRANSLATE_PATH = 'TSjB/NLLB-201-600M-QM-V2'
|
13 |
+
|
14 |
+
# Dictionary
|
15 |
+
DATA_DICTIONARY_PATH = "TSjB/dictionary_krc_rus"
|
16 |
+
OUTPUT_ROW_BY_EVERY_DICTIONARY = 15
|
17 |
+
|
18 |
+
# TTS
|
19 |
+
LANGUAGE_KRC_TTS = 'cyrillic'
|
20 |
+
MODEL_ID_KRC_TTS = 'v4_cyrillic'
|
21 |
+
|
22 |
+
SAMPLE_RATE_TTS = 48000
|
23 |
+
SPEAKER_KRC_TTS = 'b_krc'
|
24 |
+
|
25 |
+
REPO_TTS_PATH = "snakers4/silero-models"
|
26 |
+
MODEL_TTS_PATH = "silero_tts"
|
27 |
|
|
|
28 |
LANGUAGE = pd.DataFrame({"language": ["Къарачай-Малкъар тил", "Русский язык"], "token": ["krc_Cyrl", "rus_Cyrl"]})
|
29 |
DIALECT = pd.DataFrame({"dialect": ["дж\ч", "ж\ч", "з\ц"], "short_name": ["qrc", "hlm", "mqr"]})
|
30 |
+
TYPE = pd.DataFrame({"krc": ["Кёчюрюўчю", "Сёзлюк", "Сёлешиўчю"], "rus": ["Переводчик", "Словарь", "Озвучка"], "eng": ["Translator", "Dictionary", "Voice"], "tur": ["Çevirmen", "Sözlük", "Seslendirme"], "short_name": ["translator", "dictionary", "tts"]})
|
31 |
|
32 |
SYSTEM_LANG = "rus"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
NAMES = pd.DataFrame({
|
34 |
+
"id": ["title", "type", "from", "to", "your_sent", "your_sent_tts", "transl_sent", "dialect", "translate", "annotation", "word_absence", "sound"],
|
35 |
+
"krc": ["# Къарачай-Малкъар сёзлюк бла кёчюрюўчю", "Тюрлюсю", "тилден", "тилге", "Мында джаз...", "Къарачай-Малкъарча мында джаз...", "Кёчюрюлгени", "Къарачай-Малкъарны диалекти", "Кёчюр","Къарачай-малкъар, орус тиллени арасында биринчи кёчюрюўчюдю. Сёзлюк да эмда Къарачай-Малкъар сёлешиўчю ичине салыннганды.\n\n[Богдан Теўуналаны](https://t.me/bogdan_tewunalany), [Али Берберлени](https://t.me/ali_berberov) къурагъандыла\n\nСоинвестированиени эмда спонсорлукъ болушлукъну юсюнден [Али Берберовгъа](https://t.me/ali_berberov) соругъуз", "Сорулгъаны сёзлюкде табылмагъанды.", "Сёлешдир"],
|
36 |
+
"rus": ["# Карачаево-балкарский словарь и переводчик", "Тип", "из", "на", "Напишите здесь...", "Напиши здесь по-карачаево-балкарски...", "Переведённый текст", "Карачаево-балкарский диалект", "Перевести","Первый переводчик между карачаево-балкарским и русским языками. Встроен словарь для отдельных слов или коротких фраз и озвучка карачаево-балкарского текста.\n\nРазработчики: [Богдан Теунаев](https://t.me/bogdan_tewunalany), [Али Берберов](https://t.me/ali_berberov)\n\nПо вопросам соинвестирования и спонсорской поддержки обращайтесь к [Али Берберову](https://t.me/ali_berberov)", "Запрашиваемое в словаре не найдено.", "Озвучить"],
|
37 |
+
"tur": ["# Karaçayca-Balkarca sözlük ve çevirmen", "Tür", "dilden", "dile", "Buraya yaz...", "Buraya Karaçay-Balkarca yaz...", "Çevrilmiş metin burada", "Karaçay-Malkar lehçesi", "Tercüme edin", "Karaçay-Balkarca ve Rusça dilleri arasındaki ilk çevirmen. Tek tek kelimeler veya kısa ifadeler için bir sözlük ve Karaçay-Balkar metninin seslendirmesi de yerleşiktir.\n\nGeliştiriciler: [Bogdan Tewunalanı](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_berberov)\n\nOrtak yatırım ve sponsorluk ile ilgili sorularınız için [Ali Berberov](https://t.me/ali_berberov) ile iletişime geçin", "Sorge sözlükte bulunmuyor.", "Ses vermek"],
|
38 |
+
"eng": ["# Qarachay-Malqar dictionary and translator", "Type", "from", "to", "Write here...", "Write here in Karachay-Balkar...", "Translated text is here", "Qarachay-Malqar dialect", "Translate", "The first translator between Qarachay-Malqar and Russian languages. There is also a built-in dictionary for individual words or short phrases and voice acting of the Karachay-Balkar text.\n\nDevelopers: [Bogdan Tewunalany](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_berberov)\n\nFor co-investment and sponsorship, please contact [Ali Berberov] (https://t.me/ali_berberov)", "The requested was not found in the dictionary.", "Voice over"]
|
39 |
})
|
40 |
|
41 |
|
42 |
+
device = torch.device('cpu')
|
|
|
|
|
|
|
43 |
|
44 |
# 3. Upload
|
45 |
+
# Dictionary
|
46 |
+
dictionary = load_dataset(DATA_DICTIONARY_PATH)
|
47 |
dictionary = pd.DataFrame(dictionary['train'])
|
48 |
|
49 |
dictionary["soz"] = dictionary.soz.str.upper()
|
|
|
53 |
dictionary_qm = dictionary[dictionary.til == "krc"]
|
54 |
dictionary_ru = dictionary[dictionary.til == "rus"]
|
55 |
|
56 |
+
# Tranlation
|
57 |
+
tokenizer = NllbTokenizer.from_pretrained(MODEL_TRANSLATE_PATH)
|
58 |
+
model_translate = AutoModelForSeq2SeqLM.from_pretrained(MODEL_TRANSLATE_PATH)
|
59 |
|
60 |
+
# TTS
|
61 |
+
model_tts, _ = torch.hub.load(repo_or_dir = REPO_TTS_PATH,
|
62 |
+
model = MODEL_TTS_PATH,
|
63 |
+
language = LANGUAGE_KRC_TTS,
|
64 |
+
speaker = MODEL_ID_KRC_TTS)
|
65 |
+
|
66 |
+
model_tts.to(device)
|
67 |
|
68 |
# 4. Fix tokenizer
|
69 |
def fixTokenizer(tokenizer, new_lang='krc_Cyrl'):
|
|
|
376 |
text, return_tensors='pt', padding=True, truncation=True,
|
377 |
max_length=max_input_length
|
378 |
)
|
379 |
+
model_translate.eval() # turn off training mode
|
380 |
+
result = model_translate.generate(
|
381 |
+
**inputs.to(model_translate.device),
|
382 |
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
|
383 |
max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
|
384 |
num_beams=num_beams, **kwargs
|
|
|
386 |
return tokenizer.batch_decode(result, skip_special_tokens=True)[0]
|
387 |
|
388 |
|
389 |
+
def translateDisp(text, from_, to, dialect):
|
390 |
+
# print(from_)
|
391 |
+
# print(to)
|
392 |
+
# print(dialect)
|
393 |
+
if dialect == "" or dialect is None:
|
394 |
+
dialect = DIALECT.dialect[0] # "дж\ч"
|
395 |
+
if from_ == "" or from_ is None:
|
396 |
+
from_ = LANGUAGE.language[1] # "Русский язык"
|
397 |
+
if to == "" or to is None:
|
398 |
+
to = LANGUAGE.language[0] # "Къарачай-Малкъар тил"
|
399 |
+
|
400 |
+
from_ = "".join(LANGUAGE[LANGUAGE.language == from_].token.to_list())
|
401 |
+
to = "".join(LANGUAGE[LANGUAGE.language == to].token.to_list())
|
402 |
+
dialect = "".join(DIALECT[DIALECT.dialect == dialect].short_name.to_list())
|
403 |
+
|
404 |
|
405 |
+
if from_ == 'krc_Cyrl':
|
406 |
+
text = toModel(text)
|
407 |
|
408 |
+
str_ = translatePy(text, src_lang = from_, tgt_lang = to)
|
409 |
+
|
410 |
+
if to == 'krc_Cyrl':
|
411 |
+
str_ = fromModel(str_, dialect = dialect)
|
412 |
|
413 |
+
return str_
|
414 |
|
415 |
# 7. Dictionary function
|
416 |
+
def dictionaryDisp(text, from_):
|
417 |
+
|
418 |
+
if from_ == "" or from_ is None:
|
419 |
+
from_ = LANGUAGE.language[1] # "Русский язык"
|
420 |
+
|
421 |
+
from_ = "".join(LANGUAGE[LANGUAGE.language == from_].token.to_list())
|
422 |
+
|
423 |
+
|
424 |
str_l = text.lower()
|
425 |
filter_ = r"\W+" + str_l + r"|^" + str_l
|
426 |
|
|
|
449 |
sozluk = [x.soz + " ----- " + x.belgi + "\n\n----------\n\n" for x in sozluk.itertuples()]
|
450 |
sozluk = "".join(sozluk)
|
451 |
|
452 |
+
if(len(sozluk) == 0):
|
453 |
+
sozluk = NAMES[NAMES.id == "word_absence"][SYSTEM_LANG].values[0]
|
454 |
+
|
455 |
return sozluk
|
456 |
# len(sozluk)
|
457 |
|
458 |
|
459 |
+
# 8. Voice function
|
460 |
+
def tts(text):
|
461 |
+
file_voice = ''.join(random.choices(string.ascii_letters, k=8))
|
462 |
+
file_voice = f'{file_voice}.wav'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
|
464 |
+
model_tts.save_wav(
|
465 |
+
audio_path = file_voice,
|
466 |
+
text = text,
|
467 |
+
speaker=SPEAKER_KRC_TTS,
|
468 |
+
sample_rate=SAMPLE_RATE_TTS
|
469 |
+
)
|
470 |
+
|
471 |
+
return file_voice
|
|
|
472 |
|
473 |
# 9. Definition ui
|
474 |
_title = "".join(NAMES[NAMES.id == "title"][SYSTEM_LANG].to_list())
|
|
|
476 |
_from = "".join(NAMES[NAMES.id == "from"][SYSTEM_LANG].to_list())
|
477 |
_to = "".join(NAMES[NAMES.id == "to"][SYSTEM_LANG].to_list())
|
478 |
_your_sent = "".join(NAMES[NAMES.id == "your_sent"][SYSTEM_LANG].to_list())
|
479 |
+
_your_sent_tts = "".join(NAMES[NAMES.id == "your_sent_tts"][SYSTEM_LANG].to_list())
|
480 |
_transl_sent = "".join(NAMES[NAMES.id == "transl_sent"][SYSTEM_LANG].to_list())
|
481 |
_dialect = "".join(NAMES[NAMES.id == "dialect"][SYSTEM_LANG].to_list())
|
482 |
_translate = "".join(NAMES[NAMES.id == "translate"][SYSTEM_LANG].to_list())
|
483 |
_annotation = "".join(NAMES[NAMES.id == "annotation"][SYSTEM_LANG].to_list())
|
484 |
+
_sound = "".join(NAMES[NAMES.id == "sound"][SYSTEM_LANG].to_list())
|
485 |
|
486 |
+
|
487 |
with gr.Blocks() as demo:
|
488 |
gr.Markdown(_title)
|
489 |
+
|
490 |
+
# Translation
|
491 |
+
with gr.Tab(TYPE[SYSTEM_LANG][0]):
|
492 |
+
with gr.Row():
|
493 |
+
with gr.Column():
|
494 |
+
with gr.Row():
|
495 |
+
# choice_type = gr.Dropdown(
|
496 |
+
# choices = TYPE[SYSTEM_LANG].to_list(), label=_type, value = TYPE[SYSTEM_LANG][0])
|
497 |
+
translate_lang_input = gr.Dropdown(
|
498 |
+
choices = LANGUAGE.language.to_list(), label=_from, value = LANGUAGE["language"][1])
|
|
|
499 |
|
500 |
+
with gr.Column():
|
501 |
+
with gr.Row():
|
502 |
+
translate_lang_output = gr.Dropdown(
|
503 |
+
choices = LANGUAGE.language.to_list(), label=_to, value = LANGUAGE["language"][0])
|
504 |
+
|
505 |
dialect = gr.Dropdown(
|
506 |
+
# choices = DIALECT.dialect.to_list(), label=_dialect, value = "дж\ч")
|
507 |
+
choices = DIALECT.dialect.to_list(), label=_dialect, value = DIALECT["dialect"][0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
508 |
|
509 |
+
with gr.Row():
|
510 |
+
with gr.Column():
|
511 |
+
translate_text_input = gr.Textbox(lines=15, placeholder=_your_sent, label = "", show_copy_button=True)
|
512 |
+
with gr.Column():
|
513 |
+
translate_text_output = gr.Textbox(lines=15, placeholder=_transl_sent, label = "", autoscroll=False, show_copy_button=True)
|
514 |
+
|
515 |
+
translate_button = gr.Button(_translate, variant = 'primary')
|
516 |
+
|
517 |
+
# Dictionary
|
518 |
+
with gr.Tab(TYPE[SYSTEM_LANG][1]):
|
519 |
+
with gr.Row():
|
520 |
+
with gr.Column():
|
521 |
+
with gr.Row():
|
522 |
+
dict_lang_input = gr.Dropdown(
|
523 |
+
choices = LANGUAGE.language.to_list(), label=_from, value = LANGUAGE["language"][1])
|
524 |
+
|
525 |
|
526 |
+
with gr.Row():
|
527 |
+
with gr.Column():
|
528 |
+
dict_text_input = gr.Textbox(lines=15, placeholder=_your_sent, label = "", show_copy_button=True)
|
529 |
+
with gr.Column():
|
530 |
+
dict_text_output = gr.Textbox(lines=15, placeholder=_transl_sent, label = "", autoscroll=False, show_copy_button=True)
|
531 |
|
532 |
+
dict_button = gr.Button(_translate, variant = 'primary')
|
533 |
+
|
534 |
+
# TTS
|
535 |
+
with gr.Tab(TYPE[SYSTEM_LANG][2]):
|
536 |
+
with gr.Row():
|
537 |
+
with gr.Column():
|
538 |
+
tts_text_input = gr.Textbox(lines=3, placeholder=_your_sent_tts, label = "", show_copy_button=True)
|
539 |
+
with gr.Column():
|
540 |
+
tts_text_output = gr.Audio(label = "", type = 'filepath')
|
541 |
+
|
542 |
+
tts_button = gr.Button(_sound, variant = 'primary')
|
543 |
+
|
544 |
+
|
545 |
+
translate_button.click(translateDisp, inputs=[translate_text_input, translate_lang_input, translate_lang_output, dialect], outputs=[translate_text_output]) # text, from, to, dialect
|
546 |
+
dict_button.click(dictionaryDisp, inputs=[dict_text_input, dict_lang_input], outputs=[dict_text_output]) # text, from
|
547 |
+
tts_button.click(tts, inputs=[tts_text_input], outputs=[tts_text_output]) # text
|
548 |
|
549 |
+
gr.Markdown(_annotation)
|
550 |
+
|
551 |
+
# 10. Launch
|
552 |
+
demo.launch()
|