mrmuminov commited on
Commit
9e4dfaa
·
verified ·
1 Parent(s): c3e624b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -50,19 +50,29 @@ def transcribe(audio_file, task):
50
 
51
  # Read audio using ffmpeg_read (correcting input format)
52
  audio_array = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
 
 
 
53
 
54
  # Convert to proper format
55
  inputs = {
56
- "raw": np.array(audio_array),
57
  "sampling_rate": pipe.feature_extractor.sampling_rate
58
  }
59
-
 
 
 
 
 
 
 
60
  # Perform transcription
61
  result = pipe(
62
  inputs,
63
  batch_size=BATCH_SIZE,
64
- generate_kwargs={"task": task},
65
- return_timestamps=True
66
  )
67
 
68
  return result["text"]
@@ -143,6 +153,6 @@ yt_transcribe = gr.Interface(
143
  )
144
 
145
  with demo:
146
- gr.TabbedInterface([file_transcribe, yt_transcribe], ["Audio file", "YouTube"])
147
 
148
  demo.launch()
 
50
 
51
  # Read audio using ffmpeg_read (correcting input format)
52
  audio_array = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
53
+
54
+ duration = len(audio_array) / pipe.feature_extractor.sampling_rate
55
+ print(f"Audio duration: {duration:.2f} seconds")
56
 
57
  # Convert to proper format
58
  inputs = {
59
+ "array": np.array(audio_array),
60
  "sampling_rate": pipe.feature_extractor.sampling_rate
61
  }
62
+
63
+ generate_kwargs = {
64
+ "task": task,
65
+ "no_speech_threshold": 0.3,
66
+ "logprob_threshold": -1.0,
67
+ "compression_ratio_threshold": 2.4
68
+ }
69
+
70
  # Perform transcription
71
  result = pipe(
72
  inputs,
73
  batch_size=BATCH_SIZE,
74
+ generate_kwargs=generate_kwargs,
75
+ return_timestamps="word"
76
  )
77
 
78
  return result["text"]
 
153
  )
154
 
155
  with demo:
156
+ gr.TabbedInterface([file_transcribe], ["Audio file"])
157
 
158
  demo.launch()