lampongyuen commited on
Commit
c509ad0
·
1 Parent(s): b3c8be6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py CHANGED
@@ -2,6 +2,33 @@ import gradio as gr
2
  from transformers import pipeline
3
  p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def transcribe(audio):
6
  text = p(audio)["text"]
7
  return text
 
2
  from transformers import pipeline
3
  p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
4
 
5
+
6
+
7
+ import gradio as gr
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
9
+ import torch
10
+
11
+ # this model was loaded from https://hf.co/models
12
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
13
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
14
+ device = 0 if torch.cuda.is_available() else -1
15
+ LANGS = ["ace_Arab", "eng_Latn", "fra_Latn", "spa_Latn", "yue_Hant","zho_Hans","zho_Hant"]
16
+ LANGS_source = ["eng_Latn"]
17
+
18
+ # Yue Chinese - yue_Hant, Chinese (Simplified)-Zho_Hans, Chinese(Traditional)-zho_Hant
19
+ # https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200
20
+
21
+ def translate(text, src_lang, tgt_lang):
22
+ """
23
+ Translate the text from source lang to target lang
24
+ """
25
+ translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
26
+ result = translation_pipeline(text)
27
+ return result[0]['translation_text']
28
+
29
+
30
+
31
+
32
  def transcribe(audio):
33
  text = p(audio)["text"]
34
  return text