Spaces:

Dmtlant
/

Image

Sleeping

App Files Files Community

Image / app.py

Dmtlant

Update app.py

3a508f7 verified 8 months ago

raw

history blame

2.83 kB

	import streamlit as st
	import requests
	import asyncio
	import aiohttp
	from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
	from aiortc.contrib.media import MediaBlackhole, MediaRecorder
	import av
	import base64
	import json

	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
	headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}

	class AudioTranscriber:
	def __init__(self):
	self.buffer = []
	self.text = ""

	async def transcribe(self, audio_data):
	async with aiohttp.ClientSession() as session:
	async with session.post(API_URL, headers=headers, data=audio_data) as response:
	result = await response.json()
	if 'text' in result:
	self.text += result['text'] + " "
	st.text_area("Transcription", self.text, height=200)

	class AudioTrack(MediaStreamTrack):
	kind = "audio"

	def __init__(self, track, transcriber):
	super().__init__()
	self.track = track
	self.transcriber = transcriber

	async def recv(self):
	frame = await self.track.recv()
	if len(self.transcriber.buffer) < 5: # Collect 5 seconds of audio before transcribing
	self.transcriber.buffer.append(frame.to_ndarray())
	else:
	audio_data = b''.join([av.AudioFrame.from_ndarray(buf).to_bytes() for buf in self.transcriber.buffer])
	asyncio.create_task(self.transcriber.transcribe(audio_data))
	self.transcriber.buffer = []
	return frame

	async def process_offer(offer, transcriber):
	pc = RTCPeerConnection()
	pc.addTransceiver("audio", direction="recvonly")

	@pc.on("track")
	def on_track(track):
	if track.kind == "audio":
	pc.addTrack(AudioTrack(track, transcriber))

	await pc.setRemoteDescription(RTCSessionDescription(sdp=offer["sdp"], type=offer["type"]))
	answer = await pc.createAnswer()
	await pc.setLocalDescription(answer)

	return {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}

	st.title("Real-time Speech Recognition with Whisper")

	webrtc_ctx = st.config.get_option("server.enableXsrfProtection")
	if webrtc_ctx:
	st.warning("To use this app, you need to disable XSRF protection. Set server.enableXsrfProtection=false in your Streamlit config.")
	else:
	offer = st.text_input("Paste the offer SDP here")
	if offer:
	transcriber = AudioTranscriber()
	answer = asyncio.run(process_offer(json.loads(offer), transcriber))
	st.text_area("Answer SDP", json.dumps(answer))
	st.write("Speak into your microphone. The transcription will appear below.")

	st.markdown("---")
	st.write("Note: This app uses the Whisper API from Hugging Face for real-time speech recognition.")