anzorq commited on
Commit
dfd6c72
·
1 Parent(s): 273178c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -46
app.py CHANGED
@@ -1,61 +1,42 @@
1
  import gradio as gr
2
-
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
4
 
5
- model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
- src_lang="ru"
7
- tgt_lang="zu"
8
 
9
- # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
- tokenizer = AutoTokenizer.from_pretrained(model_path)
11
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto")
12
 
13
  def translate(text, num_beams=4, num_return_sequences=4):
14
- inputs = tokenizer(text, return_tensors="pt")
15
-
16
- num_return_sequences = min(num_return_sequences, num_beams)
17
-
18
- translated_tokens = model.generate(
19
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
20
- )
21
-
22
- translations = []
23
- for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
24
- translations.append(translation)
25
-
26
- # result = {"input":text, "translations":translations}
27
- return text, translations
28
-
29
- output = gr.Textbox()
30
- # with gr.Accordion("Advanced Options"):
31
- num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4)
32
- num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4)
33
-
34
 
35
  title = "Russian-Circassian translator demo"
36
  article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
37
 
38
- examples = [
39
- ["Мы идем домой"],
40
- ["Сегодня хорошая погода"],
41
- ["Дети играют во дворе"],
42
- ["We live in a big house"],
43
- ["Tu es une bonne personne."],
44
- ["أين تعيش؟"],
45
- ["Bir şeyler yapmak istiyorum."],
46
- ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."],
47
- ["Как только старик ушел, Сатаней пошла к Саусырыко."],
48
- ["我永远不会放弃你。"],
49
- ["우리는 소치에 살고 있습니다."],
50
- ]
51
 
52
  gr.Interface(
53
- fn=translate,
54
- inputs=["text", num_beams, num_return_sequences],
55
- outputs=["text", output],
56
- title=title,
57
- # examples=examples,
58
- article=article).launch()
59
 
60
  # import gradio as gr
61
 
 
1
  import gradio as gr
 
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
+ import fasttext
4
 
5
+ # Initialize fastText model
6
+ model_path = 'lid.323.ftz'
7
+ language_model = fasttext.load_model(model_path)
8
 
9
+ model_path_translation = "anzorq/m2m100_418M_ft_ru-kbd_44K"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_path_translation)
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path_translation, use_safetensors=True)
12
 
13
  def translate(text, num_beams=4, num_return_sequences=4):
14
+ # Detect language
15
+ languages, _ = language_model.predict(text, k=1)
16
+ detected_language = languages[0].replace("__label__", "")
17
+
18
+ inputs = tokenizer(text, return_tensors="pt")
19
+ num_return_sequences = min(num_return_sequences, num_beams)
20
+ translated_tokens = model.generate(
21
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
22
+ )
23
+ translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
24
+
25
+ return detected_language, text, translations
 
 
 
 
 
 
 
 
26
 
27
  title = "Russian-Circassian translator demo"
28
  article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
29
 
30
+ num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4)
31
+ num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4)
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  gr.Interface(
34
+ fn=translate,
35
+ inputs=["text", num_beams, num_return_sequences],
36
+ outputs=["text", "text", gr.Textbox()],
37
+ titles=["Detected Language", "Input", "Translations"],
38
+ title=title,
39
+ article=article).launch()
40
 
41
  # import gradio as gr
42