Spaces:

Dmtlant
/

Image

Sleeping

App Files Files Community

Dmtlant commited on Nov 7, 2024

Commit

0fef32f

verified ·

1 Parent(s): 1a848a0

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -48

app.py CHANGED Viewed

@@ -1,39 +1,15 @@
 import streamlit as st
 import requests
-import pyaudio
-import wave
-import os
-import tempfile
 API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
 headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
-def query(file):
-    data = file.read()
-    response = requests.post(API_URL, headers=headers, data=data)
     return response.json()
-def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1):
-    p = pyaudio.PyAudio()
-    stream = p.open(format=pyaudio.paInt16,
-                    channels=channels,
-                    rate=sample_rate,
-                    input=True,
-                    frames_per_buffer=chunk)
-    st.info(f"Recording for {duration} seconds...")
-    frames = []
-    for i in range(0, int(sample_rate / chunk * duration)):
-        data = stream.read(chunk)
-        frames.append(data)
-    st.info("Recording finished.")
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-    return frames, sample_rate
 st.title("Speech Recognition with Whisper")
 option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
@@ -42,27 +18,69 @@ if option == 'Upload File':
     uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
     if uploaded_file is not None:
         st.audio(uploaded_file, format='audio/wav')
-        file_to_transcribe = uploaded_file
 else:
-    duration = st.slider("Recording duration (seconds)", 1, 30, 5)
-    if st.button('Start Recording'):
-        frames, sample_rate = record_audio(duration=duration)
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
-            wf = wave.open(tmpfile.name, 'wb')
-            wf.setnchannels(1)
-            wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
-            wf.setframerate(sample_rate)
-            wf.writeframes(b''.join(frames))
-            wf.close()
-            st.audio(tmpfile.name, format='audio/wav')
-            file_to_transcribe = open(tmpfile.name, 'rb')
-if 'file_to_transcribe' in locals():
     if st.button('Transcribe'):
         with st.spinner('Transcribing...'):
-            result = query(file_to_transcribe)
             if 'text' in result:
                 st.success("Transcription completed!")
@@ -73,8 +91,5 @@ if 'file_to_transcribe' in locals():
                 st.write("Error details:")
                 st.write(result)
-        if option == 'Record from Microphone':
-            os.unlink(file_to_transcribe.name)
 st.markdown("---")
 st.write("Note: This app uses the Whisper API from Hugging Face.")

 import streamlit as st
 import requests
+from io import BytesIO
+import base64
 API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
 headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
+def query(audio_bytes):
+    response = requests.post(API_URL, headers=headers, data=audio_bytes)
     return response.json()
 st.title("Speech Recognition with Whisper")
 option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
     uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
     if uploaded_file is not None:
         st.audio(uploaded_file, format='audio/wav')
+        audio_bytes = uploaded_file.read()
 else:
+    st.write("Click the button below and allow microphone access to start recording")
+    # JavaScript to handle audio recording
+    js_code = """
+    var audioData = null;
+    var recorder = null;
+    var audioContext = null;
+    function startRecording() {
+        navigator.mediaDevices.getUserMedia({ audio: true })
+            .then(stream => {
+                audioContext = new AudioContext();
+                var input = audioContext.createMediaStreamSource(stream);
+                recorder = new Recorder(input);
+                recorder.record();
+                document.getElementById('startButton').style.display = 'none';
+                document.getElementById('stopButton').style.display = 'inline-block';
+            });
+    }
+    function stopRecording() {
+        recorder.stop();
+        document.getElementById('startButton').style.display = 'inline-block';
+        document.getElementById('stopButton').style.display = 'none';
+        recorder.exportWAV(function(blob) {
+            var reader = new FileReader();
+            reader.readAsDataURL(blob);
+            reader.onloadend = function() {
+                var base64data = reader.result;
+                audioData = base64data.split(',')[1];  // Remove the "data:audio/wav;base64," part
+                document.getElementById('audioData').value = audioData;
+                document.getElementById('submitButton').click();
+            }
+        });
+    }
+    """
+    # HTML for buttons
+    html_code = """
+    <script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
+    <button id="startButton" onclick="startRecording()">Start Recording</button>
+    <button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
+    <input type="hidden" id="audioData" name="audioData">
+    """
+    st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
+    audio_data = st.text_input("Audio data", key="audioData", type="password")
+    submit_button = st.empty()
+    if submit_button.button("Submit", key="submitButton"):
+        if audio_data:
+            audio_bytes = base64.b64decode(audio_data)
+            st.audio(audio_bytes, format="audio/wav")
+        else:
+            st.warning("No audio recorded. Please record audio before submitting.")
+if 'audio_bytes' in locals():
     if st.button('Transcribe'):
         with st.spinner('Transcribing...'):
+            result = query(audio_bytes)
             if 'text' in result:
                 st.success("Transcription completed!")
                 st.write("Error details:")
                 st.write(result)
 st.markdown("---")
 st.write("Note: This app uses the Whisper API from Hugging Face.")