whisper-asr-uz

Running

mrmuminov commited on Apr 30

Commit

9e4dfaa

verified ·

1 Parent(s): c3e624b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -50,19 +50,29 @@ def transcribe(audio_file, task):
     # Read audio using ffmpeg_read (correcting input format)
     audio_array = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
     # Convert to proper format
     inputs = {
-        "raw": np.array(audio_array),
         "sampling_rate": pipe.feature_extractor.sampling_rate
     }
     # Perform transcription
     result = pipe(
         inputs,
         batch_size=BATCH_SIZE,
-        generate_kwargs={"task": task},
-        return_timestamps=True
     )
     return result["text"]
@@ -143,6 +153,6 @@ yt_transcribe = gr.Interface(
 )
 with demo:
-    gr.TabbedInterface([file_transcribe, yt_transcribe], ["Audio file", "YouTube"])
 demo.launch()

     # Read audio using ffmpeg_read (correcting input format)
     audio_array = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
+    duration = len(audio_array) / pipe.feature_extractor.sampling_rate
+    print(f"Audio duration: {duration:.2f} seconds")
     # Convert to proper format
     inputs = {
+        "array": np.array(audio_array),
         "sampling_rate": pipe.feature_extractor.sampling_rate
     }
+    generate_kwargs = {
+        "task": task,
+        "no_speech_threshold": 0.3,
+        "logprob_threshold": -1.0,
+        "compression_ratio_threshold": 2.4
+    }
     # Perform transcription
     result = pipe(
         inputs,
         batch_size=BATCH_SIZE,
+        generate_kwargs=generate_kwargs,
+        return_timestamps="word"
     )
     return result["text"]
 )
 with demo:
+    gr.TabbedInterface([file_transcribe], ["Audio file"])
 demo.launch()