Dmtlant commited on
Commit
be50a26
·
verified ·
1 Parent(s): 58aa94c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -161
app.py CHANGED
@@ -1,164 +1,44 @@
1
  import streamlit as st
2
- import asyncio
3
- import aiohttp
4
- from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
5
- import av
6
  import json
7
- import numpy as np
8
- import plotly.graph_objects as go
9
- from scipy.fft import fft
10
 
11
- # Отключаем XSRF-защиту
12
- st.set_page_config(page_title="Real-time Speech Recognition", page_icon="🎤")
13
- st.config.set_option("server.enableXsrfProtection", False)
14
-
15
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
16
- headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}
17
-
18
- class AudioTranscriber:
19
- def __init__(self):
20
- self.buffer = []
21
- self.text = ""
22
- self.spectrum_data = None
23
-
24
- async def transcribe(self, audio_data):
25
- async with aiohttp.ClientSession() as session:
26
- async with session.post(API_URL, headers=headers, data=audio_data) as response:
27
- result = await response.json()
28
- if 'text' in result:
29
- self.text += result['text'] + " "
30
- st.session_state.transcription = self.text
31
-
32
- def update_spectrum(self, audio_data):
33
- audio_array = np.frombuffer(audio_data, dtype=np.int16)
34
- fft_result = fft(audio_array)
35
- self.spectrum_data = np.abs(fft_result[:len(fft_result)//2])
36
-
37
- class AudioTrack(MediaStreamTrack):
38
- kind = "audio"
39
-
40
- def __init__(self, track, transcriber):
41
- super().__init__()
42
- self.track = track
43
- self.transcriber = transcriber
44
-
45
- async def recv(self):
46
- frame = await self.track.recv()
47
- if len(self.transcriber.buffer) < 5: # Collect 5 seconds of audio before transcribing
48
- self.transcriber.buffer.append(frame.to_ndarray())
49
- else:
50
- audio_data = b''.join([av.AudioFrame.from_ndarray(buf).to_bytes() for buf in self.transcriber.buffer])
51
- self.transcriber.update_spectrum(audio_data)
52
- asyncio.create_task(self.transcriber.transcribe(audio_data))
53
- self.transcriber.buffer = []
54
- return frame
55
-
56
- async def process_offer(offer, transcriber):
57
- pc = RTCPeerConnection()
58
- pc.addTransceiver("audio", direction="recvonly")
59
-
60
- @pc.on("track")
61
- def on_track(track):
62
- if track.kind == "audio":
63
- pc.addTrack(AudioTrack(track, transcriber))
64
-
65
- await pc.setRemoteDescription(RTCSessionDescription(sdp=offer["sdp"], type=offer["type"]))
66
- answer = await pc.createAnswer()
67
- await pc.setLocalDescription(answer)
68
-
69
- return {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
70
-
71
- st.title("Real-time Speech Recognition with Whisper")
72
-
73
- if 'transcription' not in st.session_state:
74
- st.session_state.transcription = ""
75
-
76
- if 'recording' not in st.session_state:
77
- st.session_state.recording = False
78
-
79
- transcriber = AudioTranscriber()
80
-
81
- js = """
82
- var pc = null;
83
- var audioStream = null;
84
-
85
- function startRecording() {
86
- navigator.mediaDevices.getUserMedia({audio: true}).then(stream => {
87
- audioStream = stream;
88
- pc = new RTCPeerConnection();
89
- stream.getTracks().forEach(track => pc.addTrack(track, stream));
90
- pc.createOffer().then(offer => {
91
- pc.setLocalDescription(offer);
92
- fetch('', {
93
- method: 'POST',
94
- body: JSON.stringify({offer: {sdp: offer.sdp, type: offer.type}})
95
- }).then(response => response.json()).then(answer => {
96
- pc.setRemoteDescription(new RTCSessionDescription(answer));
97
- });
98
- });
99
- });
100
- document.getElementById('startButton').style.display = 'none';
101
- document.getElementById('stopButton').style.display = 'inline-block';
102
- }
103
-
104
- function stopRecording() {
105
- if (pc) {
106
- pc.close();
107
- pc = null;
108
- }
109
- if (audioStream) {
110
- audioStream.getTracks().forEach(track => track.stop());
111
- audioStream = null;
112
- }
113
- document.getElementById('startButton').style.display = 'inline-block';
114
- document.getElementById('stopButton').style.display = 'none';
115
- }
116
- """
117
-
118
- st.components.v1.html(f"""
119
- <script>{js}</script>
120
- <button onclick="startRecording()" id="startButton">Start Recording</button>
121
- <button onclick="stopRecording()" id="stopButton" style="display:none;">Stop Recording</button>
122
- """, height=50)
123
-
124
- if st.button("Start Transcription", key="start_transcription"):
125
- st.session_state.recording = True
126
- st.rerun()
127
-
128
- if st.button("Stop Transcription", key="stop_transcription"):
129
- st.session_state.recording = False
130
- st.rerun()
131
-
132
- if st.session_state.recording:
133
- offer = st.query_params.get('offer')
134
- if offer:
135
- answer = asyncio.run(process_offer(json.loads(offer), transcriber))
136
- st.query_params['answer'] = json.dumps(answer)
137
-
138
- st.markdown("### Transcription")
139
- st.text_area("Transcribed Text", st.session_state.transcription, height=200, key="transcription_area")
140
-
141
- # Визуализация спектра с использованием Plotly
142
- if transcriber.spectrum_data is not None:
143
- st.markdown("### Audio Spectrum")
144
- fig = go.Figure(data=go.Scatter(y=transcriber.spectrum_data, mode='lines'))
145
- fig.update_layout(
146
- title='Audio Spectrum',
147
- xaxis_title='Frequency',
148
- yaxis_title='Magnitude'
149
- )
150
- st.plotly_chart(fig, use_container_width=True)
151
-
152
- # Добавляем пустой график, если спектр еще не доступен
153
- else:
154
- st.markdown("### Audio Spectrum")
155
- fig = go.Figure()
156
- fig.update_layout(
157
- title='Audio Spectrum (No data yet)',
158
- xaxis_title='Frequency',
159
- yaxis_title='Magnitude'
160
- )
161
- st.plotly_chart(fig, use_container_width=True)
162
-
163
- st.markdown("---")
164
- st.write("Note: This app uses the Whisper API from Hugging Face for real-time speech recognition.")
 
1
  import streamlit as st
2
+ import requests
 
 
 
3
  import json
 
 
 
4
 
5
+ # Read the API token from secrets.toml
6
+ try:
7
+ api_token = st.secrets["huggingface_api_token"]
8
+ except KeyError:
9
+ st.error("Hugging Face API token not found in secrets.toml. Please add it.")
10
+ st.stop() # Stop execution if token is missing
11
+
12
+
13
+ API_URL = "https://api-inference.huggingface.co/models/tencent/Tencent-Hunyuan-Large"
14
+ headers = {"Authorization": f"Bearer {api_token}"}
15
+
16
+ def query(payload):
17
+ response = requests.post(API_URL, headers=headers, json=payload)
18
+ try:
19
+ return response.json()
20
+ except requests.exceptions.RequestException as e:
21
+ st.error(f"An error occurred: {e}")
22
+ return None
23
+ except json.JSONDecodeError as e:
24
+ st.error(f"Invalid JSON response: {e}")
25
+ return None
26
+
27
+
28
+ st.title("Tencent HunYuan Large Language Model")
29
+
30
+ user_input = st.text_area("Enter your text here:", height=150)
31
+
32
+ if st.button("Submit"):
33
+ if not user_input:
34
+ st.warning("Please enter some text.")
35
+ else:
36
+ with st.spinner("Generating response..."):
37
+ output = query({"inputs": user_input})
38
+ if output:
39
+ try:
40
+ response_text = output[0]['generated_text'] if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0] else output["generated_text"] if "generated_text" in output else str(output)
41
+ st.success("Response:")
42
+ st.write(response_text)
43
+ except (KeyError, IndexError, TypeError) as e:
44
+ st.error(f"Unexpected response format: {e}. Response: {output}")