Dmtlant commited on
Commit
dfb92f3
·
verified ·
1 Parent(s): 337a446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -6
app.py CHANGED
@@ -1,25 +1,68 @@
1
  import streamlit as st
2
  import requests
 
 
3
  import os
 
4
 
5
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
6
- headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}
7
 
8
  def query(file):
9
  data = file.read()
10
  response = requests.post(API_URL, headers=headers, data=data)
11
  return response.json()
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  st.title("Speech Recognition with Whisper")
14
 
15
- uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
16
 
17
- if uploaded_file is not None:
18
- st.audio(uploaded_file, format='audio/wav')
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  if st.button('Transcribe'):
21
  with st.spinner('Transcribing...'):
22
- result = query(uploaded_file)
23
 
24
  if 'text' in result:
25
  st.success("Transcription completed!")
@@ -30,5 +73,8 @@ if uploaded_file is not None:
30
  st.write("Error details:")
31
  st.write(result)
32
 
 
 
 
33
  st.markdown("---")
34
  st.write("Note: This app uses the Whisper API from Hugging Face.")
 
1
  import streamlit as st
2
  import requests
3
+ import pyaudio
4
+ import wave
5
  import os
6
+ import tempfile
7
 
8
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
9
+ headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
10
 
11
  def query(file):
12
  data = file.read()
13
  response = requests.post(API_URL, headers=headers, data=data)
14
  return response.json()
15
 
16
+ def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1):
17
+ p = pyaudio.PyAudio()
18
+ stream = p.open(format=pyaudio.paInt16,
19
+ channels=channels,
20
+ rate=sample_rate,
21
+ input=True,
22
+ frames_per_buffer=chunk)
23
+
24
+ st.info(f"Recording for {duration} seconds...")
25
+ frames = []
26
+ for i in range(0, int(sample_rate / chunk * duration)):
27
+ data = stream.read(chunk)
28
+ frames.append(data)
29
+ st.info("Recording finished.")
30
+
31
+ stream.stop_stream()
32
+ stream.close()
33
+ p.terminate()
34
+
35
+ return frames, sample_rate
36
+
37
  st.title("Speech Recognition with Whisper")
38
 
39
+ option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
40
 
41
+ if option == 'Upload File':
42
+ uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
43
+ if uploaded_file is not None:
44
+ st.audio(uploaded_file, format='audio/wav')
45
+ file_to_transcribe = uploaded_file
46
+ else:
47
+ duration = st.slider("Recording duration (seconds)", 1, 30, 5)
48
+ if st.button('Start Recording'):
49
+ frames, sample_rate = record_audio(duration=duration)
50
+
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
52
+ wf = wave.open(tmpfile.name, 'wb')
53
+ wf.setnchannels(1)
54
+ wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
55
+ wf.setframerate(sample_rate)
56
+ wf.writeframes(b''.join(frames))
57
+ wf.close()
58
+
59
+ st.audio(tmpfile.name, format='audio/wav')
60
+ file_to_transcribe = open(tmpfile.name, 'rb')
61
+
62
+ if 'file_to_transcribe' in locals():
63
  if st.button('Transcribe'):
64
  with st.spinner('Transcribing...'):
65
+ result = query(file_to_transcribe)
66
 
67
  if 'text' in result:
68
  st.success("Transcription completed!")
 
73
  st.write("Error details:")
74
  st.write(result)
75
 
76
+ if option == 'Record from Microphone':
77
+ os.unlink(file_to_transcribe.name)
78
+
79
  st.markdown("---")
80
  st.write("Note: This app uses the Whisper API from Hugging Face.")