Update app.py
Browse files
app.py
CHANGED
@@ -1,39 +1,15 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import os
|
6 |
-
import tempfile
|
7 |
|
8 |
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
|
9 |
headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
|
10 |
|
11 |
-
def query(
|
12 |
-
|
13 |
-
response = requests.post(API_URL, headers=headers, data=data)
|
14 |
return response.json()
|
15 |
|
16 |
-
def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1):
|
17 |
-
p = pyaudio.PyAudio()
|
18 |
-
stream = p.open(format=pyaudio.paInt16,
|
19 |
-
channels=channels,
|
20 |
-
rate=sample_rate,
|
21 |
-
input=True,
|
22 |
-
frames_per_buffer=chunk)
|
23 |
-
|
24 |
-
st.info(f"Recording for {duration} seconds...")
|
25 |
-
frames = []
|
26 |
-
for i in range(0, int(sample_rate / chunk * duration)):
|
27 |
-
data = stream.read(chunk)
|
28 |
-
frames.append(data)
|
29 |
-
st.info("Recording finished.")
|
30 |
-
|
31 |
-
stream.stop_stream()
|
32 |
-
stream.close()
|
33 |
-
p.terminate()
|
34 |
-
|
35 |
-
return frames, sample_rate
|
36 |
-
|
37 |
st.title("Speech Recognition with Whisper")
|
38 |
|
39 |
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
|
@@ -42,27 +18,69 @@ if option == 'Upload File':
|
|
42 |
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
|
43 |
if uploaded_file is not None:
|
44 |
st.audio(uploaded_file, format='audio/wav')
|
45 |
-
|
46 |
else:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
if '
|
63 |
if st.button('Transcribe'):
|
64 |
with st.spinner('Transcribing...'):
|
65 |
-
result = query(
|
66 |
|
67 |
if 'text' in result:
|
68 |
st.success("Transcription completed!")
|
@@ -73,8 +91,5 @@ if 'file_to_transcribe' in locals():
|
|
73 |
st.write("Error details:")
|
74 |
st.write(result)
|
75 |
|
76 |
-
if option == 'Record from Microphone':
|
77 |
-
os.unlink(file_to_transcribe.name)
|
78 |
-
|
79 |
st.markdown("---")
|
80 |
st.write("Note: This app uses the Whisper API from Hugging Face.")
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
+
from io import BytesIO
|
4 |
+
import base64
|
|
|
|
|
5 |
|
6 |
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
|
7 |
headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
|
8 |
|
9 |
+
def query(audio_bytes):
|
10 |
+
response = requests.post(API_URL, headers=headers, data=audio_bytes)
|
|
|
11 |
return response.json()
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
st.title("Speech Recognition with Whisper")
|
14 |
|
15 |
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
|
|
|
18 |
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
|
19 |
if uploaded_file is not None:
|
20 |
st.audio(uploaded_file, format='audio/wav')
|
21 |
+
audio_bytes = uploaded_file.read()
|
22 |
else:
|
23 |
+
st.write("Click the button below and allow microphone access to start recording")
|
24 |
+
|
25 |
+
# JavaScript to handle audio recording
|
26 |
+
js_code = """
|
27 |
+
var audioData = null;
|
28 |
+
var recorder = null;
|
29 |
+
var audioContext = null;
|
30 |
+
|
31 |
+
function startRecording() {
|
32 |
+
navigator.mediaDevices.getUserMedia({ audio: true })
|
33 |
+
.then(stream => {
|
34 |
+
audioContext = new AudioContext();
|
35 |
+
var input = audioContext.createMediaStreamSource(stream);
|
36 |
+
recorder = new Recorder(input);
|
37 |
+
recorder.record();
|
38 |
+
document.getElementById('startButton').style.display = 'none';
|
39 |
+
document.getElementById('stopButton').style.display = 'inline-block';
|
40 |
+
});
|
41 |
+
}
|
42 |
+
|
43 |
+
function stopRecording() {
|
44 |
+
recorder.stop();
|
45 |
+
document.getElementById('startButton').style.display = 'inline-block';
|
46 |
+
document.getElementById('stopButton').style.display = 'none';
|
47 |
+
recorder.exportWAV(function(blob) {
|
48 |
+
var reader = new FileReader();
|
49 |
+
reader.readAsDataURL(blob);
|
50 |
+
reader.onloadend = function() {
|
51 |
+
var base64data = reader.result;
|
52 |
+
audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part
|
53 |
+
document.getElementById('audioData').value = audioData;
|
54 |
+
document.getElementById('submitButton').click();
|
55 |
+
}
|
56 |
+
});
|
57 |
+
}
|
58 |
+
"""
|
59 |
+
|
60 |
+
# HTML for buttons
|
61 |
+
html_code = """
|
62 |
+
<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
|
63 |
+
<button id="startButton" onclick="startRecording()">Start Recording</button>
|
64 |
+
<button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
|
65 |
+
<input type="hidden" id="audioData" name="audioData">
|
66 |
+
"""
|
67 |
+
|
68 |
+
st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
|
69 |
+
|
70 |
+
audio_data = st.text_input("Audio data", key="audioData", type="password")
|
71 |
+
submit_button = st.empty()
|
72 |
+
|
73 |
+
if submit_button.button("Submit", key="submitButton"):
|
74 |
+
if audio_data:
|
75 |
+
audio_bytes = base64.b64decode(audio_data)
|
76 |
+
st.audio(audio_bytes, format="audio/wav")
|
77 |
+
else:
|
78 |
+
st.warning("No audio recorded. Please record audio before submitting.")
|
79 |
|
80 |
+
if 'audio_bytes' in locals():
|
81 |
if st.button('Transcribe'):
|
82 |
with st.spinner('Transcribing...'):
|
83 |
+
result = query(audio_bytes)
|
84 |
|
85 |
if 'text' in result:
|
86 |
st.success("Transcription completed!")
|
|
|
91 |
st.write("Error details:")
|
92 |
st.write(result)
|
93 |
|
|
|
|
|
|
|
94 |
st.markdown("---")
|
95 |
st.write("Note: This app uses the Whisper API from Hugging Face.")
|