roman commited on
Commit
19a40bb
·
1 Parent(s): cbb91a2
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -21,7 +21,7 @@ def map_to_pred(file_path):
21
  audio, _ = librosa.load(file_path)
22
 
23
  # preprocess audio and generate standard
24
- input_features = processor([audio], return_tensors="pt", sampling_rate=32000).input_features
25
  generated_ids = model.generate(inputs=input_features)
26
  transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
27
  text = processor.tokenizer._normalize(transcription[0])
@@ -33,6 +33,13 @@ if uploaded_file is not None:
33
  with open(file_path, 'wb') as f:
34
  f.write(uploaded_file.getbuffer())
35
 
 
 
 
 
 
 
 
36
  text = map_to_pred(file_path)
37
 
38
  # display results
 
21
  audio, _ = librosa.load(file_path)
22
 
23
  # preprocess audio and generate standard
24
+ input_features = processor([audio], return_tensors="pt", sampling_rate=16000).input_features
25
  generated_ids = model.generate(inputs=input_features)
26
  transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
27
  text = processor.tokenizer._normalize(transcription[0])
 
33
  with open(file_path, 'wb') as f:
34
  f.write(uploaded_file.getbuffer())
35
 
36
+ # Convert audio file to a format supported by Whisper (if necessary)
37
+ audio = AudioSegment.from_file(temp_file_path)
38
+ temp_wav_path = tempfile.mktemp(suffix=".wav")
39
+ audio.export(temp_wav_path, format="wav")
40
+
41
+ st.audio(uploaded_file, format="audio/wav")
42
+
43
  text = map_to_pred(file_path)
44
 
45
  # display results