Spaces:

Dmtlant
/

Image

Sleeping

App Files Files Community

Dmtlant commited on Nov 8, 2024

Commit

be50a26

verified ·

1 Parent(s): 58aa94c

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -161

app.py CHANGED Viewed

@@ -1,164 +1,44 @@
 import streamlit as st
-import asyncio
-import aiohttp
-from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
-import av
 import json
-import numpy as np
-import plotly.graph_objects as go
-from scipy.fft import fft
-# Отключаем XSRF-защиту
-st.set_page_config(page_title="Real-time Speech Recognition", page_icon="🎤")
-st.config.set_option("server.enableXsrfProtection", False)
-API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
-headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}
-class AudioTranscriber:
-    def __init__(self):
-        self.buffer = []
-        self.text = ""
-        self.spectrum_data = None
-    async def transcribe(self, audio_data):
-        async with aiohttp.ClientSession() as session:
-            async with session.post(API_URL, headers=headers, data=audio_data) as response:
-                result = await response.json()
-                if 'text' in result:
-                    self.text += result['text'] + " "
-                    st.session_state.transcription = self.text
-    def update_spectrum(self, audio_data):
-        audio_array = np.frombuffer(audio_data, dtype=np.int16)
-        fft_result = fft(audio_array)
-        self.spectrum_data = np.abs(fft_result[:len(fft_result)//2])
-class AudioTrack(MediaStreamTrack):
-    kind = "audio"
-    def __init__(self, track, transcriber):
-        super().__init__()
-        self.track = track
-        self.transcriber = transcriber
-    async def recv(self):
-        frame = await self.track.recv()
-        if len(self.transcriber.buffer) < 5:  # Collect 5 seconds of audio before transcribing
-            self.transcriber.buffer.append(frame.to_ndarray())
-        else:
-            audio_data = b''.join([av.AudioFrame.from_ndarray(buf).to_bytes() for buf in self.transcriber.buffer])
-            self.transcriber.update_spectrum(audio_data)
-            asyncio.create_task(self.transcriber.transcribe(audio_data))
-            self.transcriber.buffer = []
-        return frame
-async def process_offer(offer, transcriber):
-    pc = RTCPeerConnection()
-    pc.addTransceiver("audio", direction="recvonly")
-    @pc.on("track")
-    def on_track(track):
-        if track.kind == "audio":
-            pc.addTrack(AudioTrack(track, transcriber))
-    await pc.setRemoteDescription(RTCSessionDescription(sdp=offer["sdp"], type=offer["type"]))
-    answer = await pc.createAnswer()
-    await pc.setLocalDescription(answer)
-    return {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
-st.title("Real-time Speech Recognition with Whisper")
-if 'transcription' not in st.session_state:
-    st.session_state.transcription = ""
-if 'recording' not in st.session_state:
-    st.session_state.recording = False
-transcriber = AudioTranscriber()
-js = """
-var pc = null;
-var audioStream = null;
-function startRecording() {
-    navigator.mediaDevices.getUserMedia({audio: true}).then(stream => {
-        audioStream = stream;
-        pc = new RTCPeerConnection();
-        stream.getTracks().forEach(track => pc.addTrack(track, stream));
-        pc.createOffer().then(offer => {
-            pc.setLocalDescription(offer);
-            fetch('', {
-                method: 'POST',
-                body: JSON.stringify({offer: {sdp: offer.sdp, type: offer.type}})
-            }).then(response => response.json()).then(answer => {
-                pc.setRemoteDescription(new RTCSessionDescription(answer));
-            });
-        });
-    });
-    document.getElementById('startButton').style.display = 'none';
-    document.getElementById('stopButton').style.display = 'inline-block';
-}
-function stopRecording() {
-    if (pc) {
-        pc.close();
-        pc = null;
-    }
-    if (audioStream) {
-        audioStream.getTracks().forEach(track => track.stop());
-        audioStream = null;
-    }
-    document.getElementById('startButton').style.display = 'inline-block';
-    document.getElementById('stopButton').style.display = 'none';
-}
-"""
-st.components.v1.html(f"""
-<script>{js}</script>
-<button onclick="startRecording()" id="startButton">Start Recording</button>
-<button onclick="stopRecording()" id="stopButton" style="display:none;">Stop Recording</button>
-""", height=50)
-if st.button("Start Transcription", key="start_transcription"):
-    st.session_state.recording = True
-    st.rerun()
-if st.button("Stop Transcription", key="stop_transcription"):
-    st.session_state.recording = False
-    st.rerun()
-if st.session_state.recording:
-    offer = st.query_params.get('offer')
-    if offer:
-        answer = asyncio.run(process_offer(json.loads(offer), transcriber))
-        st.query_params['answer'] = json.dumps(answer)
-st.markdown("### Transcription")
-st.text_area("Transcribed Text", st.session_state.transcription, height=200, key="transcription_area")
-# Визуализация спектра с использованием Plotly
-if transcriber.spectrum_data is not None:
-    st.markdown("### Audio Spectrum")
-    fig = go.Figure(data=go.Scatter(y=transcriber.spectrum_data, mode='lines'))
-    fig.update_layout(
-        title='Audio Spectrum',
-        xaxis_title='Frequency',
-        yaxis_title='Magnitude'
-    )
-    st.plotly_chart(fig, use_container_width=True)
-# Добавляем пустой график, если спектр еще не доступен
-else:
-    st.markdown("### Audio Spectrum")
-    fig = go.Figure()
-    fig.update_layout(
-        title='Audio Spectrum (No data yet)',
-        xaxis_title='Frequency',
-        yaxis_title='Magnitude'
-    )
-    st.plotly_chart(fig, use_container_width=True)
-st.markdown("---")
-st.write("Note: This app uses the Whisper API from Hugging Face for real-time speech recognition.")

 import streamlit as st
+import requests
 import json
+# Read the API token from secrets.toml
+try:
+    api_token = st.secrets["huggingface_api_token"]
+except KeyError:
+    st.error("Hugging Face API token not found in secrets.toml. Please add it.")
+    st.stop() # Stop execution if token is missing
+API_URL = "https://api-inference.huggingface.co/models/tencent/Tencent-Hunyuan-Large"
+headers = {"Authorization": f"Bearer {api_token}"}
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    try:
+        return response.json()
+    except requests.exceptions.RequestException as e:
+        st.error(f"An error occurred: {e}")
+        return None
+    except json.JSONDecodeError as e:
+        st.error(f"Invalid JSON response: {e}")
+        return None
+st.title("Tencent HunYuan Large Language Model")
+user_input = st.text_area("Enter your text here:", height=150)
+if st.button("Submit"):
+    if not user_input:
+        st.warning("Please enter some text.")
+    else:
+        with st.spinner("Generating response..."):
+            output = query({"inputs": user_input})
+            if output:
+                try:
+                    response_text = output[0]['generated_text'] if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0] else output["generated_text"] if "generated_text" in output else str(output)
+                    st.success("Response:")
+                    st.write(response_text)
+                except (KeyError, IndexError, TypeError) as e:
+                    st.error(f"Unexpected response format: {e}. Response: {output}")