Deepakkori45 commited on
Commit
9759e24
·
verified ·
1 Parent(s): 2fcb804

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -50,18 +50,41 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
50
  status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
51
  return chunks
52
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def transcribe(audio_file):
54
  """
55
  Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
56
  This uses librosa to load and resample the audio as required.
 
 
 
 
 
 
 
57
  """
58
  # Load audio with librosa at 16kHz (as required by Whisper)
59
  speech, sr = librosa.load(audio_file, sr=16000)
60
  input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
61
- predicted_ids = model.generate(input_features)
 
 
62
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
63
  return transcription
64
 
 
 
65
  def transcribe_chunk(chunk, index, min_length_ms=100):
66
  """
67
  Transcribe an individual audio chunk.
 
50
  status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
51
  return chunks
52
 
53
+ # def transcribe(audio_file):
54
+ # """
55
+ # Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
56
+ # This uses librosa to load and resample the audio as required.
57
+ # """
58
+ # # Load audio with librosa at 16kHz (as required by Whisper)
59
+ # speech, sr = librosa.load(audio_file, sr=16000)
60
+ # input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
61
+ # predicted_ids = model.generate(input_features)
62
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
63
+ # return transcription
64
+
65
  def transcribe(audio_file):
66
  """
67
  Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
68
  This uses librosa to load and resample the audio as required.
69
+ The transcription is forced to be in English.
70
+
71
+ Args:
72
+ audio_file (str): Path to the audio file.
73
+
74
+ Returns:
75
+ str: Transcribed text in English.
76
  """
77
  # Load audio with librosa at 16kHz (as required by Whisper)
78
  speech, sr = librosa.load(audio_file, sr=16000)
79
  input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
80
+ # Force the transcription output to be in English:
81
+ forced_ids = processor.get_decoder_prompt_ids(language="en", task="transcribe")
82
+ predicted_ids = model.generate(input_features, forced_decoder_ids=forced_ids)
83
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
84
  return transcription
85
 
86
+
87
+
88
  def transcribe_chunk(chunk, index, min_length_ms=100):
89
  """
90
  Transcribe an individual audio chunk.