Nidhal Baccouri commited on
Commit
4522b30
·
unverified ·
2 Parent(s): 4328b37 bfee80b

Merge pull request #217 from Prasanta-Hembram/Mymemory-add-lang

Browse files
deep_translator/constants.py CHANGED
@@ -216,6 +216,331 @@ LINGUEE_LANGUAGES_TO_CODES = {
216
  "japanese": "japanese",
217
  }
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  DEEPL_LANGUAGE_TO_CODE = {
220
  "bulgarian": "bg",
221
  "czech": "cs",
 
216
  "japanese": "japanese",
217
  }
218
 
219
+ MY_MEMORY_LANGUAGES_TO_CODES = {
220
+ "acehnese": "ace-ID",
221
+ "afrikaans": "af-ZA",
222
+ "akan": "ak-GH",
223
+ "albanian": "sq-AL",
224
+ "amharic": "am-ET",
225
+ "antigua and barbuda creole english": "aig-AG",
226
+ "arabic": "ar-SA",
227
+ "arabic egyptian": "ar-EG",
228
+ "aragonese": "an-ES",
229
+ "armenian": "hy-AM",
230
+ "assamese": "as-IN",
231
+ "asturian": "ast-ES",
232
+ "austrian german": "de-AT",
233
+ "awadhi": "awa-IN",
234
+ "ayacucho quechua": "quy-PE",
235
+ "azerbaijani": "az-AZ",
236
+ "bahamas creole english": "bah-BS",
237
+ "bajan": "bjs-BB",
238
+ "balinese": "ban-ID",
239
+ "balkan gipsy": "rm-RO",
240
+ "bambara": "bm-ML",
241
+ "banjar": "bjn-ID",
242
+ "bashkir": "ba-RU",
243
+ "basque": "eu-ES",
244
+ "belarusian": "be-BY",
245
+ "belgian french": "fr-BE",
246
+ "bemba": "bem-ZM",
247
+ "bengali": "bn-IN",
248
+ "bhojpuri": "bho-IN",
249
+ "bihari": "bh-IN",
250
+ "bislama": "bi-VU",
251
+ "borana": "gax-KE",
252
+ "bosnian": "bs-BA",
253
+ "bosnian (cyrillic)": "bs-Cyrl-BA",
254
+ "breton": "br-FR",
255
+ "buginese": "bug-ID",
256
+ "bulgarian": "bg-BG",
257
+ "burmese": "my-MM",
258
+ "catalan": "ca-ES",
259
+ "catalan valencian": "cav-ES",
260
+ "cebuano": "ceb-PH",
261
+ "central atlas tamazight": "tzm-MA",
262
+ "central aymara": "ayr-BO",
263
+ "central kanuri (latin script)": "knc-NG",
264
+ "chadian arabic": "shu-TD",
265
+ "chamorro": "ch-GU",
266
+ "cherokee": "chr-US",
267
+ "chhattisgarhi": "hne-IN",
268
+ "chinese simplified": "zh-CN",
269
+ "chinese trad. (hong kong)": "zh-HK",
270
+ "chinese traditional": "zh-TW",
271
+ "chinese traditional macau": "zh-MO",
272
+ "chittagonian": "ctg-BD",
273
+ "chokwe": "cjk-AO",
274
+ "classical greek": "grc-GR",
275
+ "comorian ngazidja": "zdj-KM",
276
+ "coptic": "cop-EG",
277
+ "crimean tatar": "crh-RU",
278
+ "crioulo upper guinea": "pov-GW",
279
+ "croatian": "hr-HR",
280
+ "czech": "cs-CZ",
281
+ "danish": "da-DK",
282
+ "dari": "prs-AF",
283
+ "dimli": "diq-TR",
284
+ "dutch": "nl-NL",
285
+ "dyula": "dyu-CI",
286
+ "dzongkha": "dz-BT",
287
+ "eastern yiddish": "ydd-US",
288
+ "emakhuwa": "vmw-MZ",
289
+ "english": "en-GB",
290
+ "english australia": "en-AU",
291
+ "english canada": "en-CA",
292
+ "english india": "en-IN",
293
+ "english ireland": "en-IE",
294
+ "english new zealand": "en-NZ",
295
+ "english singapore": "en-SG",
296
+ "english south africa": "en-ZA",
297
+ "english us": "en-US",
298
+ "esperanto": "eo-EU",
299
+ "estonian": "et-EE",
300
+ "ewe": "ee-GH",
301
+ "fanagalo": "fn-FNG",
302
+ "faroese": "fo-FO",
303
+ "fijian": "fj-FJ",
304
+ "filipino": "fil-PH",
305
+ "finnish": "fi-FI",
306
+ "flemish": "nl-BE",
307
+ "fon": "fon-BJ",
308
+ "french": "fr-FR",
309
+ "french canada": "fr-CA",
310
+ "french swiss": "fr-CH",
311
+ "friulian": "fur-IT",
312
+ "fula": "ff-FUL",
313
+ "galician": "gl-ES",
314
+ "gamargu": "mfi-NG",
315
+ "garo": "grt-IN",
316
+ "georgian": "ka-GE",
317
+ "german": "de-DE",
318
+ "gilbertese": "gil-KI",
319
+ "glavda": "glw-NG",
320
+ "greek": "el-GR",
321
+ "grenadian creole english": "gcl-GD",
322
+ "guarani": "gn-PY",
323
+ "gujarati": "gu-IN",
324
+ "guyanese creole english": "gyn-GY",
325
+ "haitian creole french": "ht-HT",
326
+ "halh mongolian": "khk-MN",
327
+ "hausa": "ha-NE",
328
+ "hawaiian": "haw-US",
329
+ "hebrew": "he-IL",
330
+ "higi": "hig-NG",
331
+ "hiligaynon": "hil-PH",
332
+ "hill mari": "mrj-RU",
333
+ "hindi": "hi-IN",
334
+ "hmong": "hmn-CN",
335
+ "hungarian": "hu-HU",
336
+ "icelandic": "is-IS",
337
+ "igbo ibo": "ibo-NG",
338
+ "igbo ig": "ig-NG",
339
+ "ilocano": "ilo-PH",
340
+ "indonesian": "id-ID",
341
+ "inuktitut greenlandic": "kl-GL",
342
+ "irish gaelic": "ga-IE",
343
+ "italian": "it-IT",
344
+ "italian swiss": "it-CH",
345
+ "jamaican creole english": "jam-JM",
346
+ "japanese": "ja-JP",
347
+ "javanese": "jv-ID",
348
+ "jingpho": "kac-MM",
349
+ "k'iche'": "quc-GT",
350
+ "kabiyè": "kbp-TG",
351
+ "kabuverdianu": "kea-CV",
352
+ "kabylian": "kab-DZ",
353
+ "kalenjin": "kln-KE",
354
+ "kamba": "kam-KE",
355
+ "kannada": "kn-IN",
356
+ "kanuri": "kr-KAU",
357
+ "karen": "kar-MM",
358
+ "kashmiri (devanagari script)": "ks-IN",
359
+ "kashmiri (arabic script)": "kas-IN",
360
+ "kazakh": "kk-KZ",
361
+ "khasi": "kha-IN",
362
+ "khmer": "km-KH",
363
+ "kikuyu kik": "kik-KE",
364
+ "kikuyu ki": "ki-KE",
365
+ "kimbundu": "kmb-AO",
366
+ "kinyarwanda": "rw-RW",
367
+ "kirundi": "rn-BI",
368
+ "kisii": "guz-KE",
369
+ "kongo": "kg-CG",
370
+ "konkani": "kok-IN",
371
+ "korean": "ko-KR",
372
+ "northern kurdish": "kmr-TR",
373
+ "kurdish sorani": "ckb-IQ",
374
+ "kyrgyz": "ky-KG",
375
+ "lao": "lo-LA",
376
+ "latgalian": "ltg-LV",
377
+ "latin": "la-XN",
378
+ "latvian": "lv-LV",
379
+ "ligurian": "lij-IT",
380
+ "limburgish": "li-NL",
381
+ "lingala": "ln-LIN",
382
+ "lithuanian": "lt-LT",
383
+ "lombard": "lmo-IT",
384
+ "luba-kasai": "lua-CD",
385
+ "luganda": "lg-UG",
386
+ "luhya": "luy-KE",
387
+ "luo": "luo-KE",
388
+ "luxembourgish": "lb-LU",
389
+ "maa": "mas-KE",
390
+ "macedonian": "mk-MK",
391
+ "magahi": "mag-IN",
392
+ "maithili": "mai-IN",
393
+ "malagasy": "mg-MG",
394
+ "malay": "ms-MY",
395
+ "malayalam": "ml-IN",
396
+ "maldivian": "dv-MV",
397
+ "maltese": "mt-MT",
398
+ "mandara": "mfi-CM",
399
+ "manipuri": "mni-IN",
400
+ "manx gaelic": "gv-IM",
401
+ "maori": "mi-NZ",
402
+ "marathi": "mr-IN",
403
+ "margi": "mrt-NG",
404
+ "mari": "mhr-RU",
405
+ "marshallese": "mh-MH",
406
+ "mende": "men-SL",
407
+ "meru": "mer-KE",
408
+ "mijikenda": "nyf-KE",
409
+ "minangkabau": "min-ID",
410
+ "mizo": "lus-IN",
411
+ "mongolian": "mn-MN",
412
+ "montenegrin": "sr-ME",
413
+ "morisyen": "mfe-MU",
414
+ "moroccan arabic": "ar-MA",
415
+ "mossi": "mos-BF",
416
+ "ndau": "ndc-MZ",
417
+ "ndebele": "nr-ZA",
418
+ "nepali": "ne-NP",
419
+ "nigerian fulfulde": "fuv-NG",
420
+ "niuean": "niu-NU",
421
+ "north azerbaijani": "azj-AZ",
422
+ "sesotho": "nso-ZA",
423
+ "northern uzbek": "uzn-UZ",
424
+ "norwegian bokmål": "nb-NO",
425
+ "norwegian nynorsk": "nn-NO",
426
+ "nuer": "nus-SS",
427
+ "nyanja": "ny-MW",
428
+ "occitan": "oc-FR",
429
+ "occitan aran": "oc-ES",
430
+ "odia": "or-IN",
431
+ "oriya": "ory-IN",
432
+ "urdu": "ur-PK",
433
+ "palauan": "pau-PW",
434
+ "pali": "pi-IN",
435
+ "pangasinan": "pag-PH",
436
+ "papiamentu": "pap-CW",
437
+ "pashto": "ps-PK",
438
+ "persian": "fa-IR",
439
+ "pijin": "pis-SB",
440
+ "plateau malagasy": "plt-MG",
441
+ "polish": "pl-PL",
442
+ "portuguese": "pt-PT",
443
+ "portuguese brazil": "pt-BR",
444
+ "potawatomi": "pot-US",
445
+ "punjabi": "pa-IN",
446
+ "punjabi (pakistan)": "pnb-PK",
447
+ "quechua": "qu-PE",
448
+ "rohingya": "rhg-MM",
449
+ "rohingyalish": "rhl-MM",
450
+ "romanian": "ro-RO",
451
+ "romansh": "roh-CH",
452
+ "rundi": "run-BI",
453
+ "russian": "ru-RU",
454
+ "saint lucian creole french": "acf-LC",
455
+ "samoan": "sm-WS",
456
+ "sango": "sg-CF",
457
+ "sanskrit": "sa-IN",
458
+ "santali": "sat-IN",
459
+ "sardinian": "sc-IT",
460
+ "scots gaelic": "gd-GB",
461
+ "sena": "seh-ZW",
462
+ "serbian cyrillic": "sr-Cyrl-RS",
463
+ "serbian latin": "sr-Latn-RS",
464
+ "seselwa creole french": "crs-SC",
465
+ "setswana (south africa)": "tn-ZA",
466
+ "shan": "shn-MM",
467
+ "shona": "sn-ZW",
468
+ "sicilian": "scn-IT",
469
+ "silesian": "szl-PL",
470
+ "sindhi snd": "snd-PK",
471
+ "sindhi sd": "sd-PK",
472
+ "sinhala": "si-LK",
473
+ "slovak": "sk-SK",
474
+ "slovenian": "sl-SI",
475
+ "somali": "so-SO",
476
+ "sotho southern": "st-LS",
477
+ "south azerbaijani": "azb-AZ",
478
+ "southern pashto": "pbt-PK",
479
+ "southwestern dinka": "dik-SS",
480
+ "spanish": "es-ES",
481
+ "spanish argentina": "es-AR",
482
+ "spanish colombia": "es-CO",
483
+ "spanish latin america": "es-419",
484
+ "spanish mexico": "es-MX",
485
+ "spanish united states": "es-US",
486
+ "sranan tongo": "srn-SR",
487
+ "standard latvian": "lvs-LV",
488
+ "standard malay": "zsm-MY",
489
+ "sundanese": "su-ID",
490
+ "swahili": "sw-KE",
491
+ "swati": "ss-SZ",
492
+ "swedish": "sv-SE",
493
+ "swiss german": "de-CH",
494
+ "syriac (aramaic)": "syc-TR",
495
+ "tagalog": "tl-PH",
496
+ "tahitian": "ty-PF",
497
+ "tajik": "tg-TJ",
498
+ "tamashek (tuareg)": "tmh-DZ",
499
+ "tamasheq": "taq-ML",
500
+ "tamil india": "ta-IN",
501
+ "tamil sri lanka": "ta-LK",
502
+ "taroko": "trv-TW",
503
+ "tatar": "tt-RU",
504
+ "telugu": "te-IN",
505
+ "tetum": "tet-TL",
506
+ "thai": "th-TH",
507
+ "tibetan": "bo-CN",
508
+ "tigrinya": "ti-ET",
509
+ "tok pisin": "tpi-PG",
510
+ "tokelauan": "tkl-TK",
511
+ "tongan": "to-TO",
512
+ "tosk albanian": "als-AL",
513
+ "tsonga": "ts-ZA",
514
+ "tswa": "tsc-MZ",
515
+ "tswana": "tn-BW",
516
+ "tumbuka": "tum-MW",
517
+ "turkish": "tr-TR",
518
+ "turkmen": "tk-TM",
519
+ "tuvaluan": "tvl-TV",
520
+ "twi": "tw-GH",
521
+ "udmurt": "udm-RU",
522
+ "ukrainian": "uk-UA",
523
+ "uma": "ppk-ID",
524
+ "umbundu": "umb-AO",
525
+ "uyghur uig": "uig-CN",
526
+ "uyghur ug": "ug-CN",
527
+ "uzbek": "uz-UZ",
528
+ "venetian": "vec-IT",
529
+ "vietnamese": "vi-VN",
530
+ "vincentian creole english": "svc-VC",
531
+ "virgin islands creole english": "vic-US",
532
+ "wallisian": "wls-WF",
533
+ "waray (philippines)": "war-PH",
534
+ "welsh": "cy-GB",
535
+ "west central oromo": "gaz-ET",
536
+ "western persian": "pes-IR",
537
+ "wolof": "wo-SN",
538
+ "xhosa": "xh-ZA",
539
+ "yiddish": "yi-YD",
540
+ "yoruba": "yo-NG",
541
+ "zulu": "zu-ZA",
542
+ }
543
+
544
  DEEPL_LANGUAGE_TO_CODE = {
545
  "bulgarian": "bg",
546
  "czech": "cs",
deep_translator/mymemory.py CHANGED
@@ -9,7 +9,7 @@ from typing import List, Optional, Union
9
  import requests
10
 
11
  from deep_translator.base import BaseTranslator
12
- from deep_translator.constants import BASE_URLS
13
  from deep_translator.exceptions import (
14
  RequestError,
15
  TooManyRequests,
@@ -41,6 +41,7 @@ class MyMemoryTranslator(BaseTranslator):
41
  source=source,
42
  target=target,
43
  payload_key="q",
 
44
  )
45
 
46
  def translate(
 
9
  import requests
10
 
11
  from deep_translator.base import BaseTranslator
12
+ from deep_translator.constants import BASE_URLS, MY_MEMORY_LANGUAGES_TO_CODES
13
  from deep_translator.exceptions import (
14
  RequestError,
15
  TooManyRequests,
 
41
  source=source,
42
  target=target,
43
  payload_key="q",
44
+ languages=MY_MEMORY_LANGUAGES_TO_CODES,
45
  )
46
 
47
  def translate(
tests/test_mymemory.py CHANGED
@@ -9,7 +9,7 @@ from deep_translator import MyMemoryTranslator, exceptions
9
 
10
  @pytest.fixture
11
  def mymemory():
12
- return MyMemoryTranslator(source="en", target="fr")
13
 
14
 
15
  def test_content(mymemory):
@@ -27,9 +27,9 @@ def test_inputs():
27
  MyMemoryTranslator(source="auto", target="")
28
 
29
  with pytest.raises(exceptions.InvalidSourceOrTargetLanguage):
30
- MyMemoryTranslator(source="", target="en")
31
 
32
- m1 = MyMemoryTranslator("en", "fr")
33
  m2 = MyMemoryTranslator("english", "french")
34
  assert m1._source == m2._source
35
  assert m1._target == m2._target
 
9
 
10
  @pytest.fixture
11
  def mymemory():
12
+ return MyMemoryTranslator(source="en-GB", target="fr-FR")
13
 
14
 
15
  def test_content(mymemory):
 
27
  MyMemoryTranslator(source="auto", target="")
28
 
29
  with pytest.raises(exceptions.InvalidSourceOrTargetLanguage):
30
+ MyMemoryTranslator(source="", target="en-GB")
31
 
32
+ m1 = MyMemoryTranslator("en-GB", "fr-FR")
33
  m2 = MyMemoryTranslator("english", "french")
34
  assert m1._source == m2._source
35
  assert m1._target == m2._target