Dmtlant commited on
Commit
0fef32f
·
verified ·
1 Parent(s): 1a848a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -48
app.py CHANGED
@@ -1,39 +1,15 @@
1
  import streamlit as st
2
  import requests
3
- import pyaudio
4
- import wave
5
- import os
6
- import tempfile
7
 
8
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
9
  headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
10
 
11
- def query(file):
12
- data = file.read()
13
- response = requests.post(API_URL, headers=headers, data=data)
14
  return response.json()
15
 
16
- def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1):
17
- p = pyaudio.PyAudio()
18
- stream = p.open(format=pyaudio.paInt16,
19
- channels=channels,
20
- rate=sample_rate,
21
- input=True,
22
- frames_per_buffer=chunk)
23
-
24
- st.info(f"Recording for {duration} seconds...")
25
- frames = []
26
- for i in range(0, int(sample_rate / chunk * duration)):
27
- data = stream.read(chunk)
28
- frames.append(data)
29
- st.info("Recording finished.")
30
-
31
- stream.stop_stream()
32
- stream.close()
33
- p.terminate()
34
-
35
- return frames, sample_rate
36
-
37
  st.title("Speech Recognition with Whisper")
38
 
39
  option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
@@ -42,27 +18,69 @@ if option == 'Upload File':
42
  uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
43
  if uploaded_file is not None:
44
  st.audio(uploaded_file, format='audio/wav')
45
- file_to_transcribe = uploaded_file
46
  else:
47
- duration = st.slider("Recording duration (seconds)", 1, 30, 5)
48
- if st.button('Start Recording'):
49
- frames, sample_rate = record_audio(duration=duration)
50
-
51
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
52
- wf = wave.open(tmpfile.name, 'wb')
53
- wf.setnchannels(1)
54
- wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
55
- wf.setframerate(sample_rate)
56
- wf.writeframes(b''.join(frames))
57
- wf.close()
58
-
59
- st.audio(tmpfile.name, format='audio/wav')
60
- file_to_transcribe = open(tmpfile.name, 'rb')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- if 'file_to_transcribe' in locals():
63
  if st.button('Transcribe'):
64
  with st.spinner('Transcribing...'):
65
- result = query(file_to_transcribe)
66
 
67
  if 'text' in result:
68
  st.success("Transcription completed!")
@@ -73,8 +91,5 @@ if 'file_to_transcribe' in locals():
73
  st.write("Error details:")
74
  st.write(result)
75
 
76
- if option == 'Record from Microphone':
77
- os.unlink(file_to_transcribe.name)
78
-
79
  st.markdown("---")
80
  st.write("Note: This app uses the Whisper API from Hugging Face.")
 
1
  import streamlit as st
2
  import requests
3
+ from io import BytesIO
4
+ import base64
 
 
5
 
6
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
7
  headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
8
 
9
+ def query(audio_bytes):
10
+ response = requests.post(API_URL, headers=headers, data=audio_bytes)
 
11
  return response.json()
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  st.title("Speech Recognition with Whisper")
14
 
15
  option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
 
18
  uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
19
  if uploaded_file is not None:
20
  st.audio(uploaded_file, format='audio/wav')
21
+ audio_bytes = uploaded_file.read()
22
  else:
23
+ st.write("Click the button below and allow microphone access to start recording")
24
+
25
+ # JavaScript to handle audio recording
26
+ js_code = """
27
+ var audioData = null;
28
+ var recorder = null;
29
+ var audioContext = null;
30
+
31
+ function startRecording() {
32
+ navigator.mediaDevices.getUserMedia({ audio: true })
33
+ .then(stream => {
34
+ audioContext = new AudioContext();
35
+ var input = audioContext.createMediaStreamSource(stream);
36
+ recorder = new Recorder(input);
37
+ recorder.record();
38
+ document.getElementById('startButton').style.display = 'none';
39
+ document.getElementById('stopButton').style.display = 'inline-block';
40
+ });
41
+ }
42
+
43
+ function stopRecording() {
44
+ recorder.stop();
45
+ document.getElementById('startButton').style.display = 'inline-block';
46
+ document.getElementById('stopButton').style.display = 'none';
47
+ recorder.exportWAV(function(blob) {
48
+ var reader = new FileReader();
49
+ reader.readAsDataURL(blob);
50
+ reader.onloadend = function() {
51
+ var base64data = reader.result;
52
+ audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part
53
+ document.getElementById('audioData').value = audioData;
54
+ document.getElementById('submitButton').click();
55
+ }
56
+ });
57
+ }
58
+ """
59
+
60
+ # HTML for buttons
61
+ html_code = """
62
+ <script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
63
+ <button id="startButton" onclick="startRecording()">Start Recording</button>
64
+ <button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
65
+ <input type="hidden" id="audioData" name="audioData">
66
+ """
67
+
68
+ st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
69
+
70
+ audio_data = st.text_input("Audio data", key="audioData", type="password")
71
+ submit_button = st.empty()
72
+
73
+ if submit_button.button("Submit", key="submitButton"):
74
+ if audio_data:
75
+ audio_bytes = base64.b64decode(audio_data)
76
+ st.audio(audio_bytes, format="audio/wav")
77
+ else:
78
+ st.warning("No audio recorded. Please record audio before submitting.")
79
 
80
+ if 'audio_bytes' in locals():
81
  if st.button('Transcribe'):
82
  with st.spinner('Transcribing...'):
83
+ result = query(audio_bytes)
84
 
85
  if 'text' in result:
86
  st.success("Transcription completed!")
 
91
  st.write("Error details:")
92
  st.write(result)
93
 
 
 
 
94
  st.markdown("---")
95
  st.write("Note: This app uses the Whisper API from Hugging Face.")