lampongyuen commited on
Commit
97747cc
·
1 Parent(s): 4b635d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -14
app.py CHANGED
@@ -1,15 +1,53 @@
1
- # https://www.gradio.app/guides/quickstart
2
- # https://www.gradio.app/guides/key-features
 
 
 
 
 
 
 
 
 
 
 
3
  import gradio as gr
4
- import time
5
- def greet(name):
6
- message_to_user="start here"
7
- # gr.Info("message1")
8
- # time.sleep(4)
9
- # gr.Info("message2")
10
- # gr.Warning("warning")
11
- return "Hello " + name + "!"
12
-
13
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
14
-
15
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/openai/whisper/discussions/categories/show-and-tell
2
+ import wavio as wv
3
+ import datetime
4
+
5
+ import gradio as gr
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
7
+ import torch
8
+
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import whisper
12
+ import ffmpeg
13
+
14
  import gradio as gr
15
+ from transformers import pipeline
16
+ p = pipeline("automatic-speech-recognition")
17
+
18
+ basedir = os.path.abspath(os.path.dirname(__file__))
19
+ load_dotenv(os.path.join(basedir, '.env'))
20
+ OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
21
+
22
+ whisper_model = whisper.load_model("base")
23
+
24
+ # this model was loaded from https://hf.co/models
25
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
26
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
27
+ device = 0 if torch.cuda.is_available() else -1
28
+ LANGS = ["ace_Arab", "eng_Latn", "fra_Latn", "spa_Latn", "yue_Hant","zho_Hans","zho_Hant"]
29
+ LANGS_source = ["eng_Latn"]
30
+
31
+ # Yue Chinese - yue_Hant, Chinese (Simplified)-Zho_Hans, Chinese(Traditional)-zho_Hant
32
+ # https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200
33
+
34
+ def translate(text, src_lang, tgt_lang):
35
+ """
36
+ Translate the text from source lang to target lang
37
+ """
38
+ translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
39
+ result = translation_pipeline(text)
40
+ return result[0]['translation_text']
41
+
42
+ def transcribe(audio):
43
+ # text_audio = p(audio)["text"]
44
+ text_for_audio = whisper_model.transcribe(audio)
45
+ text_from_whisper = text_for_audio["text"]
46
+ text=translate(text_from_whisper,"eng_Latn","zho_Hans")
47
+ return text
48
+
49
+ gr.Interface(
50
+ fn=transcribe,
51
+ inputs=gr.Audio(source="microphone", type="filepath"),
52
+ outputs="text").launch()
53
+