muttalib1326 commited on
Commit
d570d59
·
1 Parent(s): c19ff52

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +45 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import time
4
+ model = whisper.load_model("base")
5
+ #from transformers import pipeline
6
+ #es_en_translator = pipeline("translation_es_to_en")
7
+
8
+ def transcribe(audio):
9
+
10
+ #time.sleep(3)
11
+ # load audio and pad/trim it to fit 30 seconds
12
+ audio = whisper.load_audio(audio)
13
+ audio = whisper.pad_or_trim(audio)
14
+
15
+ # make log-Mel spectrogram and move to the same device as the model
16
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
+
18
+ # detect the spoken language
19
+ _, probs = model.detect_language(mel)
20
+ print(f"Detected language: {max(probs, key=probs.get)}")
21
+ #lang = LANGUAGES[language]
22
+ #lang=(f"Detected language: {lang}")
23
+
24
+
25
+ # decode the audio
26
+ options = whisper.DecodingOptions(fp16 = False)#,task= "translate")
27
+ result = whisper.decode(model, mel, options)
28
+ #word= result.text
29
+ #trans = es_en_translator(word)
30
+ #Trans = trans[0]['translation_text']
31
+ #result=f"{lang}\n{word}\n\nEnglish translation: {Trans}"
32
+ return result.text
33
+
34
+
35
+
36
+ gr.Interface(
37
+ title = 'SPEECH TO TEXT',
38
+ fn=transcribe,
39
+ inputs=[
40
+ gr.inputs.Audio(source="microphone", type="filepath")
41
+ ],
42
+ outputs=[
43
+ "textbox"
44
+ ],
45
+ live=True).launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ tensorflow
2
+ git+https://github.com/openai/whisper.git
3
+ transformers