Edward Nagy commited on
Commit
32ebaf4
·
unverified ·
1 Parent(s): 623ac6b

Add microphone functionality as well

Browse files
Files changed (1) hide show
  1. app.py +40 -23
app.py CHANGED
@@ -6,36 +6,53 @@ import os
6
 
7
  # pipe = pipeline(model="esnagy/whisper-small-hu")
8
 
9
- def transcribe(video_url):
10
- # Download the video from the URL
11
- video_filename = "temp_video.mp4"
12
- with open(video_filename, 'wb') as f:
13
- response = requests.get(video_url)
14
- f.write(response.content)
15
 
16
- # Load the video using moviepy
17
- video = VideoFileClip(video_filename)
18
- audio = video.audio
 
 
 
 
19
 
20
- audio_file = "temp_audio.wav"
21
- audio.write_audiofile(audio_file, codec='pcm_s16le')
 
22
 
23
- # Transcribe the audio
24
- text = "Test text"
25
- # text = pipe(audio_file)["text"]
26
 
27
- # Remove temporary files
28
- os.remove(video_filename)
29
- os.remove(audio_file)
30
 
31
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- iface = gr.Interface(
34
  fn=transcribe,
35
- inputs=gr.Textbox(label="Enter video URL"),
36
  outputs="text",
37
- title="Whisper Small Hungarian",
38
- description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio.",
39
  )
40
 
41
- iface.launch()
 
 
6
 
7
  # pipe = pipeline(model="esnagy/whisper-small-hu")
8
 
9
+ def transcribe_audio(audio_file):
10
+ text = "Test text"
11
+ # text = pipe(audio_file)["text"]
12
+ os.remove(audio_file) # Remove temporary audio file
13
+ return text
 
14
 
15
+ def transcribe(video_url, audio=None):
16
+ if video_url:
17
+ # Download the video from the URL
18
+ video_filename = "temp_video.mp4"
19
+ with open(video_filename, 'wb') as f:
20
+ response = requests.get(video_url)
21
+ f.write(response.content)
22
 
23
+ # Load the video using moviepy
24
+ video = VideoFileClip(video_filename)
25
+ audio = video.audio
26
 
27
+ audio_file = "temp_audio.wav"
28
+ audio.write_audiofile(audio_file, codec='pcm_s16le')
 
29
 
30
+ text = transcribe_audio(audio_file)
 
 
31
 
32
+ # Remove temporary files
33
+ os.remove(video_filename)
34
+ os.remove(audio_file)
35
+
36
+ return text
37
+
38
+ elif audio:
39
+ return transcribe_audio(audio)
40
+
41
+ iface_video = gr.Interface(
42
+ fn=transcribe,
43
+ inputs=gr.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone"),
44
+ outputs="text",
45
+ title="Whisper Small Hungarian - Video",
46
+ description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio."
47
+ )
48
 
49
+ iface_audio = gr.Interface(
50
  fn=transcribe,
51
+ inputs=gr.Audio(sources=["microphone"], type="file", label="Or record your voice"),
52
  outputs="text",
53
+ title="Whisper Small Hungarian - Microphone",
54
+ description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Record your voice to transcribe."
55
  )
56
 
57
+ iface_video.launch(share=True)
58
+ iface_audio.launch(share=True)