roman
commited on
Commit
·
19a40bb
1
Parent(s):
cbb91a2
16000
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ def map_to_pred(file_path):
|
|
21 |
audio, _ = librosa.load(file_path)
|
22 |
|
23 |
# preprocess audio and generate standard
|
24 |
-
input_features = processor([audio], return_tensors="pt", sampling_rate=
|
25 |
generated_ids = model.generate(inputs=input_features)
|
26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
27 |
text = processor.tokenizer._normalize(transcription[0])
|
@@ -33,6 +33,13 @@ if uploaded_file is not None:
|
|
33 |
with open(file_path, 'wb') as f:
|
34 |
f.write(uploaded_file.getbuffer())
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
text = map_to_pred(file_path)
|
37 |
|
38 |
# display results
|
|
|
21 |
audio, _ = librosa.load(file_path)
|
22 |
|
23 |
# preprocess audio and generate standard
|
24 |
+
input_features = processor([audio], return_tensors="pt", sampling_rate=16000).input_features
|
25 |
generated_ids = model.generate(inputs=input_features)
|
26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
27 |
text = processor.tokenizer._normalize(transcription[0])
|
|
|
33 |
with open(file_path, 'wb') as f:
|
34 |
f.write(uploaded_file.getbuffer())
|
35 |
|
36 |
+
# Convert audio file to a format supported by Whisper (if necessary)
|
37 |
+
audio = AudioSegment.from_file(temp_file_path)
|
38 |
+
temp_wav_path = tempfile.mktemp(suffix=".wav")
|
39 |
+
audio.export(temp_wav_path, format="wav")
|
40 |
+
|
41 |
+
st.audio(uploaded_file, format="audio/wav")
|
42 |
+
|
43 |
text = map_to_pred(file_path)
|
44 |
|
45 |
# display results
|