Merge pull request #217 from Prasanta-Hembram/Mymemory-add-lang
Browse files- deep_translator/constants.py +325 -0
- deep_translator/mymemory.py +2 -1
- tests/test_mymemory.py +3 -3
deep_translator/constants.py
CHANGED
@@ -216,6 +216,331 @@ LINGUEE_LANGUAGES_TO_CODES = {
|
|
216 |
"japanese": "japanese",
|
217 |
}
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
DEEPL_LANGUAGE_TO_CODE = {
|
220 |
"bulgarian": "bg",
|
221 |
"czech": "cs",
|
|
|
216 |
"japanese": "japanese",
|
217 |
}
|
218 |
|
219 |
+
MY_MEMORY_LANGUAGES_TO_CODES = {
|
220 |
+
"acehnese": "ace-ID",
|
221 |
+
"afrikaans": "af-ZA",
|
222 |
+
"akan": "ak-GH",
|
223 |
+
"albanian": "sq-AL",
|
224 |
+
"amharic": "am-ET",
|
225 |
+
"antigua and barbuda creole english": "aig-AG",
|
226 |
+
"arabic": "ar-SA",
|
227 |
+
"arabic egyptian": "ar-EG",
|
228 |
+
"aragonese": "an-ES",
|
229 |
+
"armenian": "hy-AM",
|
230 |
+
"assamese": "as-IN",
|
231 |
+
"asturian": "ast-ES",
|
232 |
+
"austrian german": "de-AT",
|
233 |
+
"awadhi": "awa-IN",
|
234 |
+
"ayacucho quechua": "quy-PE",
|
235 |
+
"azerbaijani": "az-AZ",
|
236 |
+
"bahamas creole english": "bah-BS",
|
237 |
+
"bajan": "bjs-BB",
|
238 |
+
"balinese": "ban-ID",
|
239 |
+
"balkan gipsy": "rm-RO",
|
240 |
+
"bambara": "bm-ML",
|
241 |
+
"banjar": "bjn-ID",
|
242 |
+
"bashkir": "ba-RU",
|
243 |
+
"basque": "eu-ES",
|
244 |
+
"belarusian": "be-BY",
|
245 |
+
"belgian french": "fr-BE",
|
246 |
+
"bemba": "bem-ZM",
|
247 |
+
"bengali": "bn-IN",
|
248 |
+
"bhojpuri": "bho-IN",
|
249 |
+
"bihari": "bh-IN",
|
250 |
+
"bislama": "bi-VU",
|
251 |
+
"borana": "gax-KE",
|
252 |
+
"bosnian": "bs-BA",
|
253 |
+
"bosnian (cyrillic)": "bs-Cyrl-BA",
|
254 |
+
"breton": "br-FR",
|
255 |
+
"buginese": "bug-ID",
|
256 |
+
"bulgarian": "bg-BG",
|
257 |
+
"burmese": "my-MM",
|
258 |
+
"catalan": "ca-ES",
|
259 |
+
"catalan valencian": "cav-ES",
|
260 |
+
"cebuano": "ceb-PH",
|
261 |
+
"central atlas tamazight": "tzm-MA",
|
262 |
+
"central aymara": "ayr-BO",
|
263 |
+
"central kanuri (latin script)": "knc-NG",
|
264 |
+
"chadian arabic": "shu-TD",
|
265 |
+
"chamorro": "ch-GU",
|
266 |
+
"cherokee": "chr-US",
|
267 |
+
"chhattisgarhi": "hne-IN",
|
268 |
+
"chinese simplified": "zh-CN",
|
269 |
+
"chinese trad. (hong kong)": "zh-HK",
|
270 |
+
"chinese traditional": "zh-TW",
|
271 |
+
"chinese traditional macau": "zh-MO",
|
272 |
+
"chittagonian": "ctg-BD",
|
273 |
+
"chokwe": "cjk-AO",
|
274 |
+
"classical greek": "grc-GR",
|
275 |
+
"comorian ngazidja": "zdj-KM",
|
276 |
+
"coptic": "cop-EG",
|
277 |
+
"crimean tatar": "crh-RU",
|
278 |
+
"crioulo upper guinea": "pov-GW",
|
279 |
+
"croatian": "hr-HR",
|
280 |
+
"czech": "cs-CZ",
|
281 |
+
"danish": "da-DK",
|
282 |
+
"dari": "prs-AF",
|
283 |
+
"dimli": "diq-TR",
|
284 |
+
"dutch": "nl-NL",
|
285 |
+
"dyula": "dyu-CI",
|
286 |
+
"dzongkha": "dz-BT",
|
287 |
+
"eastern yiddish": "ydd-US",
|
288 |
+
"emakhuwa": "vmw-MZ",
|
289 |
+
"english": "en-GB",
|
290 |
+
"english australia": "en-AU",
|
291 |
+
"english canada": "en-CA",
|
292 |
+
"english india": "en-IN",
|
293 |
+
"english ireland": "en-IE",
|
294 |
+
"english new zealand": "en-NZ",
|
295 |
+
"english singapore": "en-SG",
|
296 |
+
"english south africa": "en-ZA",
|
297 |
+
"english us": "en-US",
|
298 |
+
"esperanto": "eo-EU",
|
299 |
+
"estonian": "et-EE",
|
300 |
+
"ewe": "ee-GH",
|
301 |
+
"fanagalo": "fn-FNG",
|
302 |
+
"faroese": "fo-FO",
|
303 |
+
"fijian": "fj-FJ",
|
304 |
+
"filipino": "fil-PH",
|
305 |
+
"finnish": "fi-FI",
|
306 |
+
"flemish": "nl-BE",
|
307 |
+
"fon": "fon-BJ",
|
308 |
+
"french": "fr-FR",
|
309 |
+
"french canada": "fr-CA",
|
310 |
+
"french swiss": "fr-CH",
|
311 |
+
"friulian": "fur-IT",
|
312 |
+
"fula": "ff-FUL",
|
313 |
+
"galician": "gl-ES",
|
314 |
+
"gamargu": "mfi-NG",
|
315 |
+
"garo": "grt-IN",
|
316 |
+
"georgian": "ka-GE",
|
317 |
+
"german": "de-DE",
|
318 |
+
"gilbertese": "gil-KI",
|
319 |
+
"glavda": "glw-NG",
|
320 |
+
"greek": "el-GR",
|
321 |
+
"grenadian creole english": "gcl-GD",
|
322 |
+
"guarani": "gn-PY",
|
323 |
+
"gujarati": "gu-IN",
|
324 |
+
"guyanese creole english": "gyn-GY",
|
325 |
+
"haitian creole french": "ht-HT",
|
326 |
+
"halh mongolian": "khk-MN",
|
327 |
+
"hausa": "ha-NE",
|
328 |
+
"hawaiian": "haw-US",
|
329 |
+
"hebrew": "he-IL",
|
330 |
+
"higi": "hig-NG",
|
331 |
+
"hiligaynon": "hil-PH",
|
332 |
+
"hill mari": "mrj-RU",
|
333 |
+
"hindi": "hi-IN",
|
334 |
+
"hmong": "hmn-CN",
|
335 |
+
"hungarian": "hu-HU",
|
336 |
+
"icelandic": "is-IS",
|
337 |
+
"igbo ibo": "ibo-NG",
|
338 |
+
"igbo ig": "ig-NG",
|
339 |
+
"ilocano": "ilo-PH",
|
340 |
+
"indonesian": "id-ID",
|
341 |
+
"inuktitut greenlandic": "kl-GL",
|
342 |
+
"irish gaelic": "ga-IE",
|
343 |
+
"italian": "it-IT",
|
344 |
+
"italian swiss": "it-CH",
|
345 |
+
"jamaican creole english": "jam-JM",
|
346 |
+
"japanese": "ja-JP",
|
347 |
+
"javanese": "jv-ID",
|
348 |
+
"jingpho": "kac-MM",
|
349 |
+
"k'iche'": "quc-GT",
|
350 |
+
"kabiyè": "kbp-TG",
|
351 |
+
"kabuverdianu": "kea-CV",
|
352 |
+
"kabylian": "kab-DZ",
|
353 |
+
"kalenjin": "kln-KE",
|
354 |
+
"kamba": "kam-KE",
|
355 |
+
"kannada": "kn-IN",
|
356 |
+
"kanuri": "kr-KAU",
|
357 |
+
"karen": "kar-MM",
|
358 |
+
"kashmiri (devanagari script)": "ks-IN",
|
359 |
+
"kashmiri (arabic script)": "kas-IN",
|
360 |
+
"kazakh": "kk-KZ",
|
361 |
+
"khasi": "kha-IN",
|
362 |
+
"khmer": "km-KH",
|
363 |
+
"kikuyu kik": "kik-KE",
|
364 |
+
"kikuyu ki": "ki-KE",
|
365 |
+
"kimbundu": "kmb-AO",
|
366 |
+
"kinyarwanda": "rw-RW",
|
367 |
+
"kirundi": "rn-BI",
|
368 |
+
"kisii": "guz-KE",
|
369 |
+
"kongo": "kg-CG",
|
370 |
+
"konkani": "kok-IN",
|
371 |
+
"korean": "ko-KR",
|
372 |
+
"northern kurdish": "kmr-TR",
|
373 |
+
"kurdish sorani": "ckb-IQ",
|
374 |
+
"kyrgyz": "ky-KG",
|
375 |
+
"lao": "lo-LA",
|
376 |
+
"latgalian": "ltg-LV",
|
377 |
+
"latin": "la-XN",
|
378 |
+
"latvian": "lv-LV",
|
379 |
+
"ligurian": "lij-IT",
|
380 |
+
"limburgish": "li-NL",
|
381 |
+
"lingala": "ln-LIN",
|
382 |
+
"lithuanian": "lt-LT",
|
383 |
+
"lombard": "lmo-IT",
|
384 |
+
"luba-kasai": "lua-CD",
|
385 |
+
"luganda": "lg-UG",
|
386 |
+
"luhya": "luy-KE",
|
387 |
+
"luo": "luo-KE",
|
388 |
+
"luxembourgish": "lb-LU",
|
389 |
+
"maa": "mas-KE",
|
390 |
+
"macedonian": "mk-MK",
|
391 |
+
"magahi": "mag-IN",
|
392 |
+
"maithili": "mai-IN",
|
393 |
+
"malagasy": "mg-MG",
|
394 |
+
"malay": "ms-MY",
|
395 |
+
"malayalam": "ml-IN",
|
396 |
+
"maldivian": "dv-MV",
|
397 |
+
"maltese": "mt-MT",
|
398 |
+
"mandara": "mfi-CM",
|
399 |
+
"manipuri": "mni-IN",
|
400 |
+
"manx gaelic": "gv-IM",
|
401 |
+
"maori": "mi-NZ",
|
402 |
+
"marathi": "mr-IN",
|
403 |
+
"margi": "mrt-NG",
|
404 |
+
"mari": "mhr-RU",
|
405 |
+
"marshallese": "mh-MH",
|
406 |
+
"mende": "men-SL",
|
407 |
+
"meru": "mer-KE",
|
408 |
+
"mijikenda": "nyf-KE",
|
409 |
+
"minangkabau": "min-ID",
|
410 |
+
"mizo": "lus-IN",
|
411 |
+
"mongolian": "mn-MN",
|
412 |
+
"montenegrin": "sr-ME",
|
413 |
+
"morisyen": "mfe-MU",
|
414 |
+
"moroccan arabic": "ar-MA",
|
415 |
+
"mossi": "mos-BF",
|
416 |
+
"ndau": "ndc-MZ",
|
417 |
+
"ndebele": "nr-ZA",
|
418 |
+
"nepali": "ne-NP",
|
419 |
+
"nigerian fulfulde": "fuv-NG",
|
420 |
+
"niuean": "niu-NU",
|
421 |
+
"north azerbaijani": "azj-AZ",
|
422 |
+
"sesotho": "nso-ZA",
|
423 |
+
"northern uzbek": "uzn-UZ",
|
424 |
+
"norwegian bokmål": "nb-NO",
|
425 |
+
"norwegian nynorsk": "nn-NO",
|
426 |
+
"nuer": "nus-SS",
|
427 |
+
"nyanja": "ny-MW",
|
428 |
+
"occitan": "oc-FR",
|
429 |
+
"occitan aran": "oc-ES",
|
430 |
+
"odia": "or-IN",
|
431 |
+
"oriya": "ory-IN",
|
432 |
+
"urdu": "ur-PK",
|
433 |
+
"palauan": "pau-PW",
|
434 |
+
"pali": "pi-IN",
|
435 |
+
"pangasinan": "pag-PH",
|
436 |
+
"papiamentu": "pap-CW",
|
437 |
+
"pashto": "ps-PK",
|
438 |
+
"persian": "fa-IR",
|
439 |
+
"pijin": "pis-SB",
|
440 |
+
"plateau malagasy": "plt-MG",
|
441 |
+
"polish": "pl-PL",
|
442 |
+
"portuguese": "pt-PT",
|
443 |
+
"portuguese brazil": "pt-BR",
|
444 |
+
"potawatomi": "pot-US",
|
445 |
+
"punjabi": "pa-IN",
|
446 |
+
"punjabi (pakistan)": "pnb-PK",
|
447 |
+
"quechua": "qu-PE",
|
448 |
+
"rohingya": "rhg-MM",
|
449 |
+
"rohingyalish": "rhl-MM",
|
450 |
+
"romanian": "ro-RO",
|
451 |
+
"romansh": "roh-CH",
|
452 |
+
"rundi": "run-BI",
|
453 |
+
"russian": "ru-RU",
|
454 |
+
"saint lucian creole french": "acf-LC",
|
455 |
+
"samoan": "sm-WS",
|
456 |
+
"sango": "sg-CF",
|
457 |
+
"sanskrit": "sa-IN",
|
458 |
+
"santali": "sat-IN",
|
459 |
+
"sardinian": "sc-IT",
|
460 |
+
"scots gaelic": "gd-GB",
|
461 |
+
"sena": "seh-ZW",
|
462 |
+
"serbian cyrillic": "sr-Cyrl-RS",
|
463 |
+
"serbian latin": "sr-Latn-RS",
|
464 |
+
"seselwa creole french": "crs-SC",
|
465 |
+
"setswana (south africa)": "tn-ZA",
|
466 |
+
"shan": "shn-MM",
|
467 |
+
"shona": "sn-ZW",
|
468 |
+
"sicilian": "scn-IT",
|
469 |
+
"silesian": "szl-PL",
|
470 |
+
"sindhi snd": "snd-PK",
|
471 |
+
"sindhi sd": "sd-PK",
|
472 |
+
"sinhala": "si-LK",
|
473 |
+
"slovak": "sk-SK",
|
474 |
+
"slovenian": "sl-SI",
|
475 |
+
"somali": "so-SO",
|
476 |
+
"sotho southern": "st-LS",
|
477 |
+
"south azerbaijani": "azb-AZ",
|
478 |
+
"southern pashto": "pbt-PK",
|
479 |
+
"southwestern dinka": "dik-SS",
|
480 |
+
"spanish": "es-ES",
|
481 |
+
"spanish argentina": "es-AR",
|
482 |
+
"spanish colombia": "es-CO",
|
483 |
+
"spanish latin america": "es-419",
|
484 |
+
"spanish mexico": "es-MX",
|
485 |
+
"spanish united states": "es-US",
|
486 |
+
"sranan tongo": "srn-SR",
|
487 |
+
"standard latvian": "lvs-LV",
|
488 |
+
"standard malay": "zsm-MY",
|
489 |
+
"sundanese": "su-ID",
|
490 |
+
"swahili": "sw-KE",
|
491 |
+
"swati": "ss-SZ",
|
492 |
+
"swedish": "sv-SE",
|
493 |
+
"swiss german": "de-CH",
|
494 |
+
"syriac (aramaic)": "syc-TR",
|
495 |
+
"tagalog": "tl-PH",
|
496 |
+
"tahitian": "ty-PF",
|
497 |
+
"tajik": "tg-TJ",
|
498 |
+
"tamashek (tuareg)": "tmh-DZ",
|
499 |
+
"tamasheq": "taq-ML",
|
500 |
+
"tamil india": "ta-IN",
|
501 |
+
"tamil sri lanka": "ta-LK",
|
502 |
+
"taroko": "trv-TW",
|
503 |
+
"tatar": "tt-RU",
|
504 |
+
"telugu": "te-IN",
|
505 |
+
"tetum": "tet-TL",
|
506 |
+
"thai": "th-TH",
|
507 |
+
"tibetan": "bo-CN",
|
508 |
+
"tigrinya": "ti-ET",
|
509 |
+
"tok pisin": "tpi-PG",
|
510 |
+
"tokelauan": "tkl-TK",
|
511 |
+
"tongan": "to-TO",
|
512 |
+
"tosk albanian": "als-AL",
|
513 |
+
"tsonga": "ts-ZA",
|
514 |
+
"tswa": "tsc-MZ",
|
515 |
+
"tswana": "tn-BW",
|
516 |
+
"tumbuka": "tum-MW",
|
517 |
+
"turkish": "tr-TR",
|
518 |
+
"turkmen": "tk-TM",
|
519 |
+
"tuvaluan": "tvl-TV",
|
520 |
+
"twi": "tw-GH",
|
521 |
+
"udmurt": "udm-RU",
|
522 |
+
"ukrainian": "uk-UA",
|
523 |
+
"uma": "ppk-ID",
|
524 |
+
"umbundu": "umb-AO",
|
525 |
+
"uyghur uig": "uig-CN",
|
526 |
+
"uyghur ug": "ug-CN",
|
527 |
+
"uzbek": "uz-UZ",
|
528 |
+
"venetian": "vec-IT",
|
529 |
+
"vietnamese": "vi-VN",
|
530 |
+
"vincentian creole english": "svc-VC",
|
531 |
+
"virgin islands creole english": "vic-US",
|
532 |
+
"wallisian": "wls-WF",
|
533 |
+
"waray (philippines)": "war-PH",
|
534 |
+
"welsh": "cy-GB",
|
535 |
+
"west central oromo": "gaz-ET",
|
536 |
+
"western persian": "pes-IR",
|
537 |
+
"wolof": "wo-SN",
|
538 |
+
"xhosa": "xh-ZA",
|
539 |
+
"yiddish": "yi-YD",
|
540 |
+
"yoruba": "yo-NG",
|
541 |
+
"zulu": "zu-ZA",
|
542 |
+
}
|
543 |
+
|
544 |
DEEPL_LANGUAGE_TO_CODE = {
|
545 |
"bulgarian": "bg",
|
546 |
"czech": "cs",
|
deep_translator/mymemory.py
CHANGED
@@ -9,7 +9,7 @@ from typing import List, Optional, Union
|
|
9 |
import requests
|
10 |
|
11 |
from deep_translator.base import BaseTranslator
|
12 |
-
from deep_translator.constants import BASE_URLS
|
13 |
from deep_translator.exceptions import (
|
14 |
RequestError,
|
15 |
TooManyRequests,
|
@@ -41,6 +41,7 @@ class MyMemoryTranslator(BaseTranslator):
|
|
41 |
source=source,
|
42 |
target=target,
|
43 |
payload_key="q",
|
|
|
44 |
)
|
45 |
|
46 |
def translate(
|
|
|
9 |
import requests
|
10 |
|
11 |
from deep_translator.base import BaseTranslator
|
12 |
+
from deep_translator.constants import BASE_URLS, MY_MEMORY_LANGUAGES_TO_CODES
|
13 |
from deep_translator.exceptions import (
|
14 |
RequestError,
|
15 |
TooManyRequests,
|
|
|
41 |
source=source,
|
42 |
target=target,
|
43 |
payload_key="q",
|
44 |
+
languages=MY_MEMORY_LANGUAGES_TO_CODES,
|
45 |
)
|
46 |
|
47 |
def translate(
|
tests/test_mymemory.py
CHANGED
@@ -9,7 +9,7 @@ from deep_translator import MyMemoryTranslator, exceptions
|
|
9 |
|
10 |
@pytest.fixture
|
11 |
def mymemory():
|
12 |
-
return MyMemoryTranslator(source="en", target="fr")
|
13 |
|
14 |
|
15 |
def test_content(mymemory):
|
@@ -27,9 +27,9 @@ def test_inputs():
|
|
27 |
MyMemoryTranslator(source="auto", target="")
|
28 |
|
29 |
with pytest.raises(exceptions.InvalidSourceOrTargetLanguage):
|
30 |
-
MyMemoryTranslator(source="", target="en")
|
31 |
|
32 |
-
m1 = MyMemoryTranslator("en", "fr")
|
33 |
m2 = MyMemoryTranslator("english", "french")
|
34 |
assert m1._source == m2._source
|
35 |
assert m1._target == m2._target
|
|
|
9 |
|
10 |
@pytest.fixture
|
11 |
def mymemory():
|
12 |
+
return MyMemoryTranslator(source="en-GB", target="fr-FR")
|
13 |
|
14 |
|
15 |
def test_content(mymemory):
|
|
|
27 |
MyMemoryTranslator(source="auto", target="")
|
28 |
|
29 |
with pytest.raises(exceptions.InvalidSourceOrTargetLanguage):
|
30 |
+
MyMemoryTranslator(source="", target="en-GB")
|
31 |
|
32 |
+
m1 = MyMemoryTranslator("en-GB", "fr-FR")
|
33 |
m2 = MyMemoryTranslator("english", "french")
|
34 |
assert m1._source == m2._source
|
35 |
assert m1._target == m2._target
|