lampongyuen commited on
Commit
837541f
Β·
1 Parent(s): 7b62b82

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +55 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # https://github.com/openai/whisper/discussions/categories/show-and-tell
3
+ import wavio as wv
4
+ import datetime
5
+
6
+ import gradio as gr
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
8
+ import torch
9
+
10
+ from dotenv import load_dotenv
11
+ import os
12
+ import whisper
13
+ import ffmpeg
14
+
15
+ import gradio as gr
16
+ from transformers import pipeline
17
+ p = pipeline("automatic-speech-recognition")
18
+
19
+
20
+ basedir = os.path.abspath(os.path.dirname(__file__))
21
+ load_dotenv(os.path.join(basedir, '.env'))
22
+ OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
23
+
24
+ whisper_model = whisper.load_model("base")
25
+
26
+ # this model was loaded from https://hf.co/models
27
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
28
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
29
+ device = 0 if torch.cuda.is_available() else -1
30
+ LANGS = ["ace_Arab", "eng_Latn", "fra_Latn", "spa_Latn", "yue_Hant","zho_Hans","zho_Hant"]
31
+ LANGS_source = ["eng_Latn"]
32
+
33
+ # Yue Chinese - yue_Hant, Chinese (Simplified)-Zho_Hans, Chinese(Traditional)-zho_Hant
34
+ # https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200
35
+
36
+ def translate(text, src_lang, tgt_lang):
37
+ """
38
+ Translate the text from source lang to target lang
39
+ """
40
+ translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
41
+ result = translation_pipeline(text)
42
+ return result[0]['translation_text']
43
+
44
+ def transcribe(audio):
45
+ # text_audio = p(audio)["text"]
46
+ text_for_audio = whisper_model.transcribe(audio)
47
+ text_from_whisper = text_for_audio["text"]
48
+ text=translate(text_from_whisper,"eng_Latn","zho_Hans")
49
+ return text
50
+
51
+ gr.Interface(
52
+ fn=transcribe,
53
+ inputs=gr.Audio(source="microphone", type="filepath"),
54
+ outputs="text").launch()
55
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ torch
3
+ transformers
4
+ openai
5
+ tiktoken
6
+ ffmpeg
7
+ git+https://github.com/openai/whisper.git
8
+ git+https://github.com/spatialaudio/python-sounddevice/
9
+ wavio
10
+ datetime