thak123 commited on
Commit
9aedf57
·
verified ·
1 Parent(s): cf79af4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -19
app.py CHANGED
@@ -15,25 +15,60 @@ pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"t
15
  # )
16
  # )
17
 
18
- def transcribe(audio):
19
- # text = pipe(audio)["text"]
20
- # pipe(audio)
21
- text = pipe(audio)
22
- print("op",text)
23
- return text#pipe(audio) #text
24
-
25
- iface = gr.Interface(
26
- fn=transcribe,
27
- inputs=[gr.Audio(sources=["microphone", "upload"])],
28
- outputs="text",
29
- examples=[
30
- [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
31
- [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
32
- [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
33
- ],
34
- title="Whisper Konkani",
35
- description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
 
 
 
 
 
 
 
 
36
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
- iface.launch()
 
15
  # )
16
  # )
17
 
18
+ def transcribe_speech(filepath):
19
+ output = pipe(
20
+ filepath,
21
+ max_new_tokens=256,
22
+ generate_kwargs={
23
+ "task": "transcribe",
24
+ "language": "konkani",
25
+ }, # update with the language you've fine-tuned on
26
+ chunk_length_s=30,
27
+ batch_size=8,
28
+ )
29
+ return output["text"]
30
+
31
+
32
+ demo = gr.Blocks()
33
+
34
+ mic_transcribe = gr.Interface(
35
+ fn=transcribe_speech,
36
+ inputs=gr.Audio(sources="microphone", type="filepath"),
37
+ outputs=gr.components.Textbox(),
38
+ )
39
+
40
+ file_transcribe = gr.Interface(
41
+ fn=transcribe_speech,
42
+ inputs=gr.Audio(sources="upload", type="filepath"),
43
+ outputs=gr.components.Textbox(),
44
  )
45
+ with demo:
46
+ gr.TabbedInterface(
47
+ [mic_transcribe, file_transcribe],
48
+ ["Transcribe Microphone", "Transcribe Audio File"],
49
+ )
50
+
51
+ demo.launch(debug=True)
52
+
53
+ # def transcribe(audio):
54
+ # # text = pipe(audio)["text"]
55
+ # # pipe(audio)
56
+ # text = pipe(audio)
57
+ # print("op",text)
58
+ # return text#pipe(audio) #text
59
+
60
+ # iface = gr.Interface(
61
+ # fn=transcribe,
62
+ # inputs=[gr.Audio(sources=["microphone", "upload"])],
63
+ # outputs="text",
64
+ # examples=[
65
+ # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
66
+ # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
67
+ # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
68
+ # ],
69
+ # title="Whisper Konkani",
70
+ # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
71
+ # )
72
 
73
 
74
+ # iface.launch()