FredBonux commited on
Commit
edf52c8
ยท
1 Parent(s): 4496618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -14
app.py CHANGED
@@ -1,21 +1,47 @@
1
- import gradio as gr
2
  from transformers import pipeline
 
 
 
 
 
3
 
 
 
 
 
4
  pipe = pipeline("automatic-speech-recognition", model="FredBonux/whisper-small-it")
5
 
6
  def transcribe(audio):
7
- text = pipe(audio)["text"]
8
- return text
9
-
10
- # demo = gr.Interface.from_pipeline(pipe)
11
- # demo.launch()
12
-
13
- iface = gr.Interface(
14
- fn=transcribe,
15
- inputs=gr.Audio(sources=["microphone"], type="filepath"),
16
- outputs="text",
17
- title="Whisper Small Italian",
18
- description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  )
20
 
21
- iface.launch()
 
 
 
 
1
  from transformers import pipeline
2
+ import gradio as gr
3
+ import os
4
+ import deepl
5
+ import openai
6
+ from pytube import YouTube
7
 
8
+ TARGET_LANG = "EN-GB"
9
+ deepl_key = os.environ.get('DEEPL_KEY')
10
+
11
+ translator = deepl.Translator(deepl_key)
12
  pipe = pipeline("automatic-speech-recognition", model="FredBonux/whisper-small-it")
13
 
14
  def transcribe(audio):
15
+ ita = pipe(audio)["text"]
16
+ eng = translator.translate_text(ita, target_lang=TARGET_LANG).text
17
+ print(f"{ita} -> {text_en}")
18
+ return ira, eng
19
+
20
+ def transcribe_url(url):
21
+ youtube = YouTube(str(url))
22
+ audio = youtube.streams.filter(only_audio=True).first().download('yt_video')
23
+ text_it = pipe(audio)["text"]
24
+ text_en = translator.translate_text(text_sv, target_lang=TARGET_LANG).text
25
+ return text_sv, text_en
26
+
27
+ url_demo = gr.Interface(
28
+ fn=transcribe_url,
29
+ inputs="text",
30
+ outputs=[gr.Textbox(label="Transcribed text"),
31
+ gr.Textbox(label="English translation")],
32
+ title="Italian video to english text",
33
+ description="Transcribing italian video to text and translating it to english!",
34
+ )
35
+
36
+ voice_demo = gr.Interface(
37
+ fn=transcribe,
38
+ inputs=gr.Audio(source="microphone", type="filepath"),
39
+ outputs=[gr.Textbox(label="Transcribed text"),
40
+ gr.Textbox(label="English translation")],
41
+ title="Italian recorded speech to english text",
42
+ description="Transcribing italian speech to text and translating it to english!",
43
  )
44
 
45
+ app = gr.TabbedInterface([url_demo, voice_demo], ["Video to English Text", "Audio to English Text"])
46
+
47
+ app.launch()