jisukim8873 commited on
Commit
0a7cea6
β€’
1 Parent(s): 59db409

translation

Browse files
Files changed (2) hide show
  1. app.py +38 -85
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,104 +1,57 @@
1
- # import os
2
- # import gradio as gr
3
- # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
-
5
- # en2ko = "KoJLabs/nllb-finetuned-en2ko"
6
- # ko2en = "KoJLabs/nllb-finetuned-ko2en"
7
- # style = "KoJLabs/bart-speech-style-converter"
8
-
9
- # en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
10
- # ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
11
- # style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
12
-
13
- # en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
14
- # ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
15
- # style_tokenizer = AutoTokenizer.from_pretrained(style)
16
-
17
- # def translate(source, target, text):
18
- # formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
19
- # src = formats[source]
20
- # tgt = formats[target]
21
-
22
- # if src == "eng_Latn":
23
- # translator = pipeline(
24
- # 'translation',
25
- # model=en2ko_model,
26
- # tokenizer=ko2en_tokenizer,
27
- # src_lang=src,
28
- # tgt_lang=tgt,
29
- # )
30
-
31
- # if src == "kor_Hang":
32
- # translator = pipeline(
33
- # 'translation',
34
- # model=ko2en_model,
35
- # tokenizer=en2ko_tokenizer,
36
- # src_lang=src,
37
- # tgt_lang=tgt
38
- # )
39
-
40
- # output = translator(text)
41
- # translated_text = output[0]['translation_text']
42
-
43
- # return translated_text
44
-
45
- # title = 'KoTAN Translator & Speech-style converter'
46
- # lang = ['English','Korean']
47
-
48
- # translator_app = gr.Interface(
49
- # fn=translate,
50
- # inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
51
- # outputs=[gr.outputs.Textbox(label='Translated Text')],
52
- # title=title,
53
- # description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
54
- # article='Jisu, Kim. Juhwan, Lee',
55
- # enable_queue=True,
56
- # )
57
-
58
- # translator_app.launch()
59
-
60
  import os
61
  import gradio as gr
62
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
63
 
 
 
 
64
 
65
- checkpoint = 'facebook/nllb-200-distilled-1.3B'
66
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
67
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 
 
 
 
68
 
69
  def translate(source, target, text):
70
- formats = {'English':'eng_Latn', 'Asante': 'aka_Latn', 'Akuapem': 'twi_Latn', 'Ewe':'ewe_Latn', 'Hausa': 'hau_Latn'}
71
- source_fmt = formats[source]
72
- target_fmt = formats[target]
73
- translator = pipeline('translation',
74
- model=model,
75
- tokenizer=tokenizer,
76
- src_lang=source_fmt,
77
- tgt_lang=target_fmt,
78
- max_length = 400)
79
-
 
 
 
 
 
 
 
 
 
 
 
 
80
  output = translator(text)
81
  translated_text = output[0]['translation_text']
 
82
  return translated_text
83
 
84
- # hf_token = os.getenv('HF_TOKEN')
85
- title = 'Ananse AI | Ghanaian Language Translator'
86
- # hf_writer = gr.HuggingFaceDatasetSaver(hf_token, 'crowdsourced-GLT')
87
- lang = ['Akuapem','Asante', 'English', 'Ewe','Hausa']
88
  translator_app = gr.Interface(
89
  fn=translate,
90
  inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
91
  outputs=[gr.outputs.Textbox(label='Translated Text')],
92
  title=title,
93
- description = 'The Ghanaian Languages considered now are the Asante, Akuapem, Ewe, and Hausa. Kindly use the Flag button to rate (πŸ‘πŸΌ, πŸ‘ŽπŸΌ) your translation to help us improve. Thanks',
94
- article='Ananse AI | hnmensah',
95
- examples = [['English','Asante','Kwame went to Kaneshie to buy tomates.'],
96
- ['English','Ewe','The event should be hosted at the Accra Mall.'],
97
- ['English','Akuapem','The trader is suffering from Malaria so she did not go to work.'],
98
- ['English','Hausa','The last person to get to the class will be sacked.']],
99
- #allow_flagging='manual',
100
- #flagging_options=['πŸ‘πŸΌ','πŸ‘ŽπŸΌ'],
101
- #flagging_callback=hf_writer,
102
  enable_queue=True,
103
  )
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
+ en2ko = "KoJLabs/nllb-finetuned-en2ko"
6
+ ko2en = "KoJLabs/nllb-finetuned-ko2en"
7
+ style = "KoJLabs/bart-speech-style-converter"
8
 
9
+ en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
10
+ ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
11
+ style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
12
+
13
+ en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
14
+ ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
15
+ style_tokenizer = AutoTokenizer.from_pretrained(style)
16
 
17
  def translate(source, target, text):
18
+ formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
19
+ src = formats[source]
20
+ tgt = formats[target]
21
+
22
+ if src == "eng_Latn":
23
+ translator = pipeline(
24
+ 'translation',
25
+ model=en2ko_model,
26
+ tokenizer=ko2en_tokenizer,
27
+ src_lang=src,
28
+ tgt_lang=tgt,
29
+ )
30
+
31
+ if src == "kor_Hang":
32
+ translator = pipeline(
33
+ 'translation',
34
+ model=ko2en_model,
35
+ tokenizer=en2ko_tokenizer,
36
+ src_lang=src,
37
+ tgt_lang=tgt
38
+ )
39
+
40
  output = translator(text)
41
  translated_text = output[0]['translation_text']
42
+
43
  return translated_text
44
 
45
+ title = 'KoTAN Translator & Speech-style converter'
46
+ lang = ['English','Korean']
47
+
 
48
  translator_app = gr.Interface(
49
  fn=translate,
50
  inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
51
  outputs=[gr.outputs.Textbox(label='Translated Text')],
52
  title=title,
53
+ description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
54
+ article='Jisu, Kim. Juhwan, Lee',
 
 
 
 
 
 
 
55
  enable_queue=True,
56
  )
57
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ sentencepiece