JJFrancisco commited on
Commit
5a542b6
1 Parent(s): e60666a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -0
app.py CHANGED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import gradio as gr
4
+ from AinaTheme import theme
5
+ from huggingface_hub import snapshot_download
6
+ import subprocess
7
+ import os
8
+
9
+ from translate import translate_nos
10
+
11
+ load_dotenv()
12
+
13
+ MODELS_PATH = "./models"
14
+ HF_CACHE_DIR = "./hf_cache"
15
+ MAX_INPUT_CHARACTERS = int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000))
16
+ LANGS_WITHOUT_SUBWORDING = ["English","Spanish","Galician"]
17
+ LANGS_WITH_SUBWORDING = ["Catalan","Basque"]
18
+
19
+ # Model paths e languages avaliables -----------------------------------------------------------
20
+
21
+ def download_model(repo_id, revision="main"):
22
+ return snapshot_download(repo_id=repo_id, revision=revision, local_dir=os.path.join(MODELS_PATH, repo_id), cache_dir=HF_CACHE_DIR)
23
+
24
+ def write_text_to_file(filename, text):
25
+ with open(filename, 'w') as file:
26
+ file.write(text)
27
+
28
+ """"
29
+ print("Downloading model gl-es...")
30
+ model_dir_gl_es = download_model("proxectonos/Nos_MT-OpenNMT-gl-es", revision="main")
31
+ print("Downloading model es-gl...")
32
+ model_dir_es_gl = download_model("proxectonos/Nos_MT-OpenNMT-es-gl", revision="main")
33
+ print("Downloading model gl-en...")
34
+ model_dir_gl_en = download_model("proxectonos/Nos_MT-OpenNMT-gl-en", revision="main")
35
+ print("Downloading model en-gl...")
36
+ model_dir_en_gl = download_model("proxectonos/Nos_MT-OpenNMT-en-gl", revision="main")
37
+
38
+ model_dir_gl_ca = ""
39
+ print("Downloading model ca-gl...")
40
+ model_dir_ca_gl = download_model("proxectonos/Nos_MT-OpenNMT-ca-gl", revision="main")
41
+ """
42
+ model_dir_gl_es = model_dir_es_gl = model_dir_gl_en = model_dir_en_gl = model_dir_gl_ca = model_dir_gl_eu= " "
43
+ print("Downloading model ca-gl...")
44
+ model_dir_ca_gl = download_model("proxectonos/Nos_MT-OpenNMT-ca-gl", revision="main")
45
+ print("Downloading model eu-gl...")
46
+ model_dir_eu_gl = download_model("proxectonos/Nos_MT-OpenNMT-eu-gl", revision="main")
47
+ print("Models downloaded correctly!")
48
+ print(f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}")
49
+ print(os.listdir(f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}"))
50
+
51
+ directions_reduced = {
52
+ "Catalan": {
53
+ "target": {
54
+ "Galician": {"model": (f"{os.path.join(MODELS_PATH, model_dir_ca_gl)}/ca-detok10k.code", f"{os.path.join(MODELS_PATH, model_dir_ca_gl)}/ct2_detok-ca-gl_sint_10k")},
55
+ }
56
+ },
57
+ "Basque": {
58
+ "target": {
59
+ "Galician": {"model": (f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}/gl-detok10k.code", f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}/eu_gl.ct2_10k")},
60
+ }
61
+ }
62
+ }
63
+
64
+ directions = {
65
+ "Galician": {
66
+ "target": {
67
+ "Spanish": {"src": "gl", "tgt":"es","model": (f"{os.path.join(MODELS_PATH, model_dir_gl_es)}/bpe/es.code", f"{os.path.join(MODELS_PATH, model_dir_gl_es)}")},
68
+ "English": {"model": (f"{os.path.join(MODELS_PATH, model_dir_gl_en)}/bpe/en.code", f"{os.path.join(MODELS_PATH, model_dir_gl_en)}")},
69
+ "Catalan": {"model": (f"{os.path.join(MODELS_PATH, model_dir_gl_ca)}/bpe/ca.code", f"{os.path.join(MODELS_PATH, model_dir_gl_ca)}")},
70
+ "Basque": {"model": (f"{os.path.join(MODELS_PATH, model_dir_gl_eu)}/bpe/eu.code", f"{os.path.join(MODELS_PATH, model_dir_gl_eu)}")},
71
+
72
+ }
73
+ },
74
+ "Spanish": {
75
+ "target": {
76
+ "Galician": {"src": "es", "tgt":"gl","model": (f"{os.path.join(MODELS_PATH, model_dir_es_gl)}/bpe/gl.code", f"{os.path.join(MODELS_PATH, model_dir_es_gl)}")},
77
+ }
78
+ },
79
+ "English": {
80
+ "target": {
81
+ "Galician": {"model": (f"{os.path.join(MODELS_PATH, model_dir_en_gl)}/bpe/gl.code", f"{os.path.join(MODELS_PATH, model_dir_en_gl)}")},
82
+ }
83
+ },
84
+ "Catalan": {
85
+ "target": {
86
+ "Galician": {"model": (f"{os.path.join(MODELS_PATH, model_dir_ca_gl)}/ca-detok10k.code", f"{os.path.join(MODELS_PATH, model_dir_ca_gl)}/ct2_detok-ca-gl_sint_10k")},
87
+ }
88
+ },
89
+ "Basque": {
90
+ "target": {
91
+ "Galician": {"model": (f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}/gl-detok10k.code", f"{os.path.join(MODELS_PATH, model_dir_eu_gl)}/eu_gl.ct2_10k")},
92
+ }
93
+ }
94
+ }
95
+
96
+ DEFAULT_SOURCE_LANGUAGE = list(directions.keys())[0]
97
+
98
+ # Translation fuctions ------------------------------------------------------------------------------
99
+ def get_target_languages(source_language):
100
+ return list(directions.get(source_language, {}).get("target", {}).keys())
101
+
102
+
103
+ def get_target_language_model(source_language, target_language):
104
+ # return directions.get(source_language, {}).get("target", {}).get(target_language, {}).get("model")
105
+ return directions.get(source_language, {}).get("target", {}).get(target_language, {})
106
+
107
+
108
+ def translate(input, source_language, target_language):
109
+ translation = ""
110
+ if source_language in LANGS_WITHOUT_SUBWORDING: #ES, GL, EN
111
+ translation = translate_without_subwording(input, source_language, target_language)
112
+ elif source_language in LANGS_WITH_SUBWORDING: #CA, EU
113
+ translation = translate_with_subwording(input, source_language, target_language)
114
+ else:
115
+ raise Exception(f"Language {source_language} not supported")
116
+ return translation
117
+
118
+
119
+ def translate_without_subwording(input, source_language, target_language):
120
+ write_text_to_file('input.txt', input)
121
+ target_language_model = get_target_language_model(source_language, target_language)
122
+ command = f"onmt_translate -src input.txt -model {target_language_model.get('model')[1]} --output ./output_file.txt --replace_unk"
123
+ print("Comando: ",command)
124
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
125
+ stdout, stderr = process.communicate()
126
+ if process.returncode != 0:
127
+ raise Exception(f"Error occurred: {stderr.decode().strip()}")
128
+ with open ('./output_file.txt','r') as f:
129
+ resultado= f.read()
130
+ return resultado
131
+
132
+ def translate_with_subwording(input, source_language, target_language):
133
+ target_language_model = get_target_language_model(source_language, target_language)
134
+ translation = translate_nos(input,target_language_model.get('model'))
135
+ return translation
136
+
137
+ # Gradio UI ------------------------------------------------------------------------------
138
+ def clear():
139
+ return None, None
140
+
141
+ def change_interactive(text):
142
+ if len(text.strip()) > MAX_INPUT_CHARACTERS:
143
+ return gr.update(interactive = True), gr.update(interactive = False)
144
+ return gr.update(interactive = True), gr.update(interactive = True)
145
+
146
+ def update_target_languages_dropdown(source_language):
147
+ output_languages = get_target_languages(source_language)
148
+ return gr.update(choices=output_languages, value=output_languages[0], interactive=True)
149
+
150
+
151
+ with gr.Blocks(theme=theme) as app:
152
+ with gr.Row(variant="panel"):
153
+ with gr.Column(scale=2):
154
+ placeholder_max_token = gr.Textbox(
155
+ visible=False,
156
+ interactive=False,
157
+ value= MAX_INPUT_CHARACTERS
158
+ )
159
+ source_language = gr.Dropdown(label="Source Language", choices=list(directions.keys()), value=DEFAULT_SOURCE_LANGUAGE)
160
+ input = gr.Textbox(placeholder="Enter a text here to translate.", max_lines=100, lines=12, show_label=False, interactive=True)
161
+ with gr.Row(variant="panel", equal_height=True):
162
+ gr.HTML("""<span id="countertext" style="display: flex; justify-content: start; color:#ef4444; font-weight: bold;"></span>""")
163
+ gr.HTML(f"""<span id="counter" style="display: flex; justify-content: end;"> <span id="inputlenght">0</span>&nbsp;/&nbsp;{MAX_INPUT_CHARACTERS}</span>""")
164
+
165
+ with gr.Column(scale=2):
166
+
167
+ target_outputs = get_target_languages(DEFAULT_SOURCE_LANGUAGE)
168
+ #target_language = gr.Dropdown(choices=target_outputs, label="Target Language", value=target_outputs[0])
169
+
170
+ target_language = gr.Radio(choices=target_outputs, label="Target Language", value=target_outputs[0])
171
+ output = gr.Textbox(max_lines=100, lines=12, show_label=False, interactive=False, show_copy_button=True)
172
+
173
+ with gr.Row(variant="panel"):
174
+ clear_btn = gr.Button(
175
+ "Clear",
176
+ )
177
+ submit_btn = gr.Button(
178
+ "Submit",
179
+ variant="primary",
180
+ )
181
+
182
+ source_language.change(fn=update_target_languages_dropdown, inputs=[source_language], outputs=target_language)
183
+
184
+ input.change(
185
+ fn=change_interactive,
186
+ inputs=[input],
187
+ outputs=[clear_btn, submit_btn],
188
+ api_name=False
189
+ )
190
+
191
+ input.change(
192
+ fn=None,
193
+ inputs=[input],
194
+ js=f"""(i) => document.getElementById('countertext').textContent = i.length > {MAX_INPUT_CHARACTERS} && 'Max length {MAX_INPUT_CHARACTERS} characters. ' || '' """,
195
+ api_name=False
196
+ )
197
+
198
+ input.change(
199
+ fn=None,
200
+ inputs=[input, placeholder_max_token],
201
+ js="""(i, m) => {
202
+ document.getElementById('inputlenght').textContent = i.length + ' '
203
+ document.getElementById('inputlenght').style.color = (i.length > m) ? "#ef4444" : "";
204
+ }""",
205
+ api_name=False
206
+ )
207
+
208
+ clear_btn.click(
209
+ fn=clear,
210
+ inputs=[],
211
+ outputs=[input, output],
212
+ queue=False,
213
+ api_name=False
214
+ )
215
+
216
+ submit_btn.click(
217
+ fn=translate,
218
+ inputs=[input, source_language, target_language],
219
+ outputs=[output],
220
+ api_name="translate",
221
+ concurrency_limit=1,
222
+ )
223
+
224
+ app.launch(show_api=True)