Spaces:

thak123
/

Whisper-Konkani

Sleeping

App Files Files Community

thak123 commited on Dec 10, 2024

Commit

9aedf57

verified ·

1 Parent(s): cf79af4

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -19

app.py CHANGED Viewed

@@ -15,25 +15,60 @@ pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"t
 #         )
 #     )
-def transcribe(audio):
-    # text = pipe(audio)["text"]
-    # pipe(audio)
-    text = pipe(audio)
-    print("op",text)
-    return text#pipe(audio) #text
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=[gr.Audio(sources=["microphone", "upload"])],
-    outputs="text",
-    examples=[
-        [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
-        [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
-        [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
-    ],
-    title="Whisper Konkani",
-    description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
 )
-iface.launch()

 #         )
 #     )
+def transcribe_speech(filepath):
+    output = pipe(
+        filepath,
+        max_new_tokens=256,
+        generate_kwargs={
+            "task": "transcribe",
+            "language": "konkani",
+        },  # update with the language you've fine-tuned on
+        chunk_length_s=30,
+        batch_size=8,
+    )
+    return output["text"]
+demo = gr.Blocks()
+mic_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="microphone", type="filepath"),
+    outputs=gr.components.Textbox(),
+)
+file_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="upload", type="filepath"),
+    outputs=gr.components.Textbox(),
 )
+with demo:
+    gr.TabbedInterface(
+        [mic_transcribe, file_transcribe],
+        ["Transcribe Microphone", "Transcribe Audio File"],
+    )
+demo.launch(debug=True)
+# def transcribe(audio):
+#     # text = pipe(audio)["text"]
+#     # pipe(audio)
+#     text = pipe(audio)
+#     print("op",text)
+#     return text#pipe(audio) #text
+# iface = gr.Interface(
+#     fn=transcribe,
+#     inputs=[gr.Audio(sources=["microphone", "upload"])],
+#     outputs="text",
+#     examples=[
+#         [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
+#         [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
+#         [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
+#     ],
+#     title="Whisper Konkani",
+#     description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
+# )
+# iface.launch()