Saiyaswanth007 commited on
Commit
e1de00e
Β·
1 Parent(s): b9d6018

Removed complex code

Browse files
Files changed (1) hide show
  1. ui.py +431 -462
ui.py CHANGED
@@ -1,506 +1,475 @@
1
  import gradio as gr
2
- from fastapi import FastAPI
3
- from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS, FINAL_TRANSCRIPTION_MODEL, REALTIME_TRANSCRIPTION_MODEL
4
- print(gr.__version__)
5
- # Connection configuration (separate signaling server from model server)
6
- # These will be replaced at deployment time with the correct URLs
7
- RENDER_SIGNALING_URL = "wss://render-signal-audio.onrender.com/stream"
8
- HF_SPACE_URL = "https://androidguy-speaker-diarization.hf.space"
 
 
 
9
 
10
- def build_ui():
11
- """Build Gradio UI for speaker diarization"""
12
- with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
13
- # Add configuration variables to page using custom component
14
- gr.HTML(
15
- f"""
16
- <!-- Configuration parameters -->
17
- <script>
18
- window.RENDER_SIGNALING_URL = "{RENDER_SIGNALING_URL}";
19
- window.HF_SPACE_URL = "{HF_SPACE_URL}";
20
- </script>
21
- """
22
- )
23
-
24
- # Header and description
25
- gr.Markdown("# 🎀 Live Speaker Diarization")
26
- gr.Markdown(f"Real-time speech recognition with automatic speaker identification")
27
-
28
- # Add transcription model info
29
- gr.Markdown(f"**Using Models:** Final: {FINAL_TRANSCRIPTION_MODEL}, Realtime: {REALTIME_TRANSCRIPTION_MODEL}")
30
-
31
- # Status indicator
32
- connection_status = gr.HTML(
33
- """<div class="status-indicator">
34
- <span id="status-text" style="color:#888;">Waiting to connect...</span>
35
- <span id="status-icon" style="width:10px; height:10px; display:inline-block;
36
- background-color:#888; border-radius:50%; margin-left:5px;"></span>
37
- </div>"""
38
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- with gr.Row():
41
- with gr.Column(scale=2):
42
- # Conversation display with embedded JavaScript for WebRTC and audio handling
43
- conversation_display = gr.HTML(
44
- """
45
- <div class='output' id="conversation" style='padding:20px; background:#111; border-radius:10px;
46
- min-height:400px; font-family:Arial; font-size:16px; line-height:1.5; overflow-y:auto;'>
47
- <i>Click 'Start Listening' to begin...</i>
48
- </div>
49
 
50
- <script>
51
- // Global variables
52
- let rtcConnection;
53
- let mediaStream;
54
- let wsConnection;
55
- let statusUpdateInterval;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- // Check connection to HF space
58
- async function checkHfConnection() {
59
- try {
60
- let response = await fetch(`${window.HF_SPACE_URL}/health`);
61
- return response.ok;
62
- } catch (err) {
63
- return false;
64
- }
65
- }
66
 
67
- // Start the connection and audio streaming
68
- async function startStreaming() {
69
- try {
70
- // Update status
71
- updateStatus('connecting');
72
-
73
- // Request microphone access
74
- mediaStream = await navigator.mediaDevices.getUserMedia({audio: {
75
- echoCancellation: true,
76
- noiseSuppression: true,
77
- autoGainControl: true
78
- }});
79
-
80
- // Set up WebRTC connection to Render signaling server
81
- await setupWebRTC();
82
-
83
- // Also connect WebSocket directly to HF Space for conversation updates
84
- setupWebSocket();
85
-
86
- // Start status update interval
87
- statusUpdateInterval = setInterval(updateConnectionInfo, 5000);
88
-
89
- // Update status
90
- updateStatus('connected');
91
-
92
- document.getElementById("conversation").innerHTML = "<i>Connected! Start speaking...</i>";
93
- } catch (err) {
94
- console.error('Error starting stream:', err);
95
- updateStatus('error', err.message);
96
- }
97
- }
98
-
99
- // Set up WebRTC connection to Render signaling server
100
- async function setupWebRTC() {
101
- try {
102
- if (rtcConnection) {
103
- rtcConnection.close();
104
- }
105
-
106
- // Use FastRTC's connection approach
107
- const pc = new RTCPeerConnection({
108
- iceServers: [{ urls: 'stun:stun.l.google.com:19302' }]
109
- });
110
-
111
- // Add audio track
112
- mediaStream.getAudioTracks().forEach(track => {
113
- pc.addTrack(track, mediaStream);
114
- });
115
-
116
- // Connect to FastRTC signaling via WebSocket
117
- const signalWs = new WebSocket(window.RENDER_SIGNALING_URL.replace('wss://', 'wss://'));
118
-
119
- // Handle signaling messages
120
- signalWs.onmessage = async (event) => {
121
- const message = JSON.parse(event.data);
122
-
123
- if (message.type === 'offer') {
124
- await pc.setRemoteDescription(new RTCSessionDescription(message));
125
- const answer = await pc.createAnswer();
126
- await pc.setLocalDescription(answer);
127
- signalWs.send(JSON.stringify(pc.localDescription));
128
- } else if (message.type === 'candidate') {
129
- if (message.candidate) {
130
- await pc.addIceCandidate(new RTCIceCandidate(message));
131
- }
132
- }
133
- };
134
-
135
- // Send ICE candidates
136
- pc.onicecandidate = (event) => {
137
- if (event.candidate) {
138
- signalWs.send(JSON.stringify({
139
- type: 'candidate',
140
- candidate: event.candidate
141
- }));
142
- }
143
- };
144
-
145
- // Keep connection reference
146
- rtcConnection = pc;
147
-
148
- // Wait for connection to be established
149
- await new Promise((resolve, reject) => {
150
- const timeout = setTimeout(() => reject(new Error("WebRTC connection timeout")), 10000);
151
- pc.onconnectionstatechange = () => {
152
- if (pc.connectionState === 'connected') {
153
- clearTimeout(timeout);
154
- resolve();
155
- } else if (pc.connectionState === 'failed' || pc.connectionState === 'disconnected') {
156
- clearTimeout(timeout);
157
- reject(new Error("WebRTC connection failed"));
158
- }
159
- };
160
- });
161
-
162
- updateStatus('connected');
163
- } catch (err) {
164
- console.error('WebRTC setup error:', err);
165
- updateStatus('error', 'WebRTC setup failed: ' + err.message);
166
- }
167
- }
168
 
169
- // Set up WebSocket connection to HF Space for conversation updates
170
- function setupWebSocket() {
171
- const wsUrl = window.RENDER_SIGNALING_URL.replace('stream', 'ws_relay');
172
- wsConnection = new WebSocket(wsUrl);
173
-
174
- wsConnection.onopen = () => {
175
- console.log('WebSocket connection established');
176
- };
177
-
178
- wsConnection.onmessage = (event) => {
179
- try {
180
- // Parse the JSON message
181
- const message = JSON.parse(event.data);
182
-
183
- // Process different message types
184
- switch(message.type) {
185
- case 'transcription':
186
- // Handle transcription data
187
- if (message && message.data && typeof message.data === 'object') {
188
- document.getElementById("conversation").innerHTML = message.data.conversation_html ||
189
- JSON.stringify(message.data);
190
- }
191
- break;
192
-
193
- case 'processing_result':
194
- // Handle individual audio chunk processing result
195
- console.log('Processing result:', message.data);
196
-
197
- // Update status info if needed
198
- if (message.data && message.data.status === "processed") {
199
- const statusElem = document.getElementById('status-text');
200
- if (statusElem) {
201
- const speakerId = message.data.speaker_id !== undefined ?
202
- `Speaker ${message.data.speaker_id + 1}` : '';
203
-
204
- if (speakerId) {
205
- statusElem.textContent = `Connected - ${speakerId} active`;
206
- }
207
- }
208
- } else if (message.data && message.data.status === "error") {
209
- updateStatus('error', message.data.message || 'Processing error');
210
- }
211
- break;
212
-
213
- case 'connection':
214
- console.log('Connection status:', message.status);
215
- updateStatus(message.status === 'connected' ? 'connected' : 'warning');
216
- break;
217
-
218
- case 'connection_established':
219
- console.log('Connection established:', message);
220
- updateStatus('connected');
221
-
222
- // If initial conversation is provided, display it
223
- if (message.conversation) {
224
- document.getElementById("conversation").innerHTML = message.conversation;
225
- }
226
- break;
227
-
228
- case 'conversation_update':
229
- if (message.conversation_html) {
230
- document.getElementById("conversation").innerHTML = message.conversation_html;
231
- }
232
- break;
233
-
234
- case 'conversation_cleared':
235
- document.getElementById("conversation").innerHTML =
236
- "<i>Conversation cleared. Start speaking again...</i>";
237
- break;
238
-
239
- case 'error':
240
- console.error('Error message from server:', message.message);
241
- updateStatus('warning', message.message);
242
- break;
243
-
244
- default:
245
- // If it's just HTML content without proper JSON structure (legacy format)
246
- document.getElementById("conversation").innerHTML = event.data;
247
- }
248
-
249
- // Auto-scroll to bottom
250
- const container = document.getElementById("conversation");
251
- container.scrollTop = container.scrollHeight;
252
- } catch (e) {
253
- // Fallback for non-JSON messages (legacy format)
254
- document.getElementById("conversation").innerHTML = event.data;
255
-
256
- // Auto-scroll to bottom
257
- const container = document.getElementById("conversation");
258
- container.scrollTop = container.scrollHeight;
259
- }
260
- };
261
-
262
- wsConnection.onerror = (error) => {
263
- console.error('WebSocket error:', error);
264
- updateStatus('warning', 'WebSocket error');
265
- };
266
-
267
- wsConnection.onclose = () => {
268
- console.log('WebSocket connection closed');
269
- // Try to reconnect after a delay
270
- setTimeout(setupWebSocket, 3000);
271
- };
272
- }
273
 
274
- // Update connection info in the UI
275
- async function updateConnectionInfo() {
276
- try {
277
- const hfConnected = await checkHfConnection();
278
- if (!hfConnected) {
279
- updateStatus('warning', 'HF Space connection issue');
280
- } else if (rtcConnection?.connectionState === 'connected' ||
281
- rtcConnection?.iceConnectionState === 'connected') {
282
- updateStatus('connected');
283
- } else {
284
- updateStatus('warning', 'Connection unstable');
285
- }
286
- } catch (err) {
287
- console.error('Error updating connection info:', err);
288
- }
289
- }
290
 
291
- // Update status indicator
292
- function updateStatus(status, message = '') {
293
- const statusText = document.getElementById('status-text');
294
- const statusIcon = document.getElementById('status-icon');
295
-
296
- switch(status) {
297
- case 'connected':
298
- statusText.textContent = 'Connected';
299
- statusIcon.style.backgroundColor = '#4CAF50';
300
- break;
301
- case 'connecting':
302
- statusText.textContent = 'Connecting...';
303
- statusIcon.style.backgroundColor = '#FFC107';
304
- break;
305
- case 'disconnected':
306
- statusText.textContent = 'Disconnected';
307
- statusIcon.style.backgroundColor = '#9E9E9E';
308
- break;
309
- case 'error':
310
- statusText.textContent = 'Error: ' + message;
311
- statusIcon.style.backgroundColor = '#F44336';
312
- break;
313
- case 'warning':
314
- statusText.textContent = 'Warning: ' + message;
315
- statusIcon.style.backgroundColor = '#FF9800';
316
- break;
317
- default:
318
- statusText.textContent = 'Unknown';
319
- statusIcon.style.backgroundColor = '#9E9E9E';
320
- }
321
- }
322
-
323
- // Stop streaming and clean up
324
- function stopStreaming() {
325
- // Close WebRTC connection
326
- if (rtcConnection) {
327
- rtcConnection.close();
328
- rtcConnection = null;
329
- }
330
-
331
- // Close WebSocket
332
- if (wsConnection) {
333
- wsConnection.close();
334
- wsConnection = null;
335
- }
336
-
337
- // Stop all tracks in media stream
338
- if (mediaStream) {
339
- mediaStream.getTracks().forEach(track => track.stop());
340
- mediaStream = null;
341
- }
342
-
343
- // Clear interval
344
- if (statusUpdateInterval) {
345
- clearInterval(statusUpdateInterval);
346
- statusUpdateInterval = null;
347
- }
348
-
349
- // Update status
350
- updateStatus('disconnected');
351
- }
352
-
353
- // Set up event listeners when the DOM is loaded
354
- document.addEventListener('DOMContentLoaded', () => {
355
- updateStatus('disconnected');
356
- });
357
- </script>
358
- """,
359
- label="Live Conversation"
360
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  # Control buttons
363
  with gr.Row():
364
- start_btn = gr.Button("▢️ Start Listening", variant="primary", size="lg")
365
- stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
366
- clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="lg")
367
-
368
- # Status display
369
- status_output = gr.Markdown(
370
- """
371
- ## System Status
372
- Waiting to connect...
373
 
374
- *Click Start Listening to begin*
375
- """,
376
- label="Status Information"
377
- )
 
 
 
 
 
 
 
 
 
378
 
379
  with gr.Column(scale=1):
380
- # Settings
381
  gr.Markdown("## βš™οΈ Settings")
382
 
383
  threshold_slider = gr.Slider(
384
  minimum=0.3,
385
  maximum=0.9,
386
  step=0.05,
387
- value=DEFAULT_CHANGE_THRESHOLD,
388
  label="Speaker Change Sensitivity",
389
- info="Lower = more sensitive (more speaker changes)"
390
  )
391
 
392
  max_speakers_slider = gr.Slider(
393
  minimum=2,
394
- maximum=ABSOLUTE_MAX_SPEAKERS,
395
  step=1,
396
- value=DEFAULT_MAX_SPEAKERS,
397
  label="Maximum Speakers"
398
  )
399
 
400
- update_btn = gr.Button("Update Settings", variant="secondary")
401
-
402
  # Instructions
403
  gr.Markdown("""
404
- ## πŸ“‹ Instructions
405
- 1. **Start Listening** - allows browser to access microphone
406
- 2. **Speak** - system will transcribe and identify speakers
407
  3. **Stop** when finished
408
  4. **Clear** to reset conversation
409
 
410
  ## 🎨 Speaker Colors
411
- - πŸ”΄ Speaker 1 (Red)
412
- - 🟒 Speaker 2 (Teal)
413
- - πŸ”΅ Speaker 3 (Blue)
414
- - 🟑 Speaker 4 (Green)
415
- - ⭐ Speaker 5 (Yellow)
416
- - 🟣 Speaker 6 (Plum)
417
- - 🟀 Speaker 7 (Mint)
418
- - 🟠 Speaker 8 (Gold)
419
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
- # JavaScript to connect buttons to the script functions
422
- gr.HTML("""
423
- <script>
424
- // Wait for Gradio to fully load
425
- document.addEventListener('DOMContentLoaded', () => {
426
- // Wait a bit for Gradio buttons to be created
427
- setTimeout(() => {
428
- // Get the buttons
429
- const startBtn = document.querySelector('button[aria-label="Start Listening"]');
430
- const stopBtn = document.querySelector('button[aria-label="Stop"]');
431
- const clearBtn = document.querySelector('button[aria-label="Clear"]');
432
-
433
- if (startBtn) startBtn.onclick = () => startStreaming();
434
- if (stopBtn) stopBtn.onclick = () => stopStreaming();
435
- if (clearBtn) clearBtn.onclick = () => {
436
- // Make API call to clear conversation
437
- fetch(`${window.HF_SPACE_URL}/clear`, {
438
- method: 'POST'
439
- }).then(resp => resp.json())
440
- .then(data => {
441
- document.getElementById("conversation").innerHTML =
442
- "<i>Conversation cleared. Start speaking again...</i>";
443
- });
444
- }
445
-
446
- // Set up settings update
447
- const updateBtn = document.querySelector('button[aria-label="Update Settings"]');
448
- if (updateBtn) updateBtn.onclick = () => {
449
- const threshold = document.querySelector('input[aria-label="Speaker Change Sensitivity"]').value;
450
- const maxSpeakers = document.querySelector('input[aria-label="Maximum Speakers"]').value;
451
-
452
- fetch(`${window.HF_SPACE_URL}/settings?threshold=${threshold}&max_speakers=${maxSpeakers}`, {
453
- method: 'POST'
454
- }).then(resp => resp.json())
455
- .then(data => {
456
- const statusOutput = document.querySelector('.prose');
457
- if (statusOutput) {
458
- statusOutput.innerHTML = `
459
- <h2>System Status</h2>
460
- <p>Settings updated:</p>
461
- <ul>
462
- <li>Threshold: ${threshold}</li>
463
- <li>Max Speakers: ${maxSpeakers}</li>
464
- </ul>
465
- <p>Transcription Models:</p>
466
- <ul>
467
- <li>Final: ${window.FINAL_TRANSCRIPTION_MODEL || "distil-large-v3"}</li>
468
- <li>Realtime: ${window.REALTIME_TRANSCRIPTION_MODEL || "distil-small.en"}</li>
469
- </ul>
470
- `;
471
- }
472
- });
473
- }
474
- }, 1000);
475
- });
476
- </script>
477
- """)
478
 
479
- # Set up periodic status updates
480
- def get_status():
481
- """API call to get system status - called periodically"""
482
- import requests
 
 
 
 
483
  try:
484
- resp = requests.get(f"{HF_SPACE_URL}/status")
485
- if resp.status_code == 200:
486
- return resp.json().get('status', 'No status information')
487
- return "Error getting status"
488
- except Exception as e:
489
- return f"Connection error: {str(e)}"
490
-
491
- status_timer = gr.Timer(5)
492
- status_timer.tick(fn=get_status, outputs=status_output)
493
-
494
-
495
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
497
- # Create Gradio interface
498
- demo = build_ui()
 
499
 
500
- def mount_ui(app: FastAPI):
501
- """Mount Gradio app to FastAPI"""
502
- app.mount("/ui", demo.app)
503
 
504
- # For standalone testing
505
  if __name__ == "__main__":
506
- demo.launch()
 
 
1
  import gradio as gr
2
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
3
+ from fastapi.responses import JSONResponse
4
+ import asyncio
5
+ import json
6
+ import logging
7
+ from typing import Dict, List, Optional
8
+ import os
9
+ from datetime import datetime
10
+ import httpx
11
+ import websockets
12
 
13
+ # Configuration - use environment variables for deployment
14
+ class Config:
15
+ def __init__(self):
16
+ self.hf_space_url = os.getenv("HF_SPACE_URL", "https://your-space.hf.space")
17
+ self.render_url = os.getenv("RENDER_URL", "https://your-app.onrender.com")
18
+ self.default_threshold = float(os.getenv("DEFAULT_THRESHOLD", "0.7"))
19
+ self.default_max_speakers = int(os.getenv("DEFAULT_MAX_SPEAKERS", "4"))
20
+ self.max_speakers_limit = int(os.getenv("MAX_SPEAKERS_LIMIT", "8"))
21
+
22
+ config = Config()
23
+ logger = logging.getLogger(__name__)
24
+
25
+ class ConnectionManager:
26
+ """Manage WebSocket connections"""
27
+ def __init__(self):
28
+ self.active_connections: List[WebSocket] = []
29
+ self.conversation_history: List[Dict] = []
30
+
31
+ async def connect(self, websocket: WebSocket):
32
+ await websocket.accept()
33
+ self.active_connections.append(websocket)
34
+ logger.info(f"Client connected. Total connections: {len(self.active_connections)}")
35
+
36
+ def disconnect(self, websocket: WebSocket):
37
+ if websocket in self.active_connections:
38
+ self.active_connections.remove(websocket)
39
+ logger.info(f"Client disconnected. Total connections: {len(self.active_connections)}")
40
+
41
+ async def send_personal_message(self, message: str, websocket: WebSocket):
42
+ try:
43
+ await websocket.send_text(message)
44
+ except Exception as e:
45
+ logger.error(f"Error sending message: {e}")
46
+ self.disconnect(websocket)
47
+
48
+ async def broadcast(self, message: str):
49
+ """Send message to all connected clients"""
50
+ disconnected = []
51
+ for connection in self.active_connections:
52
+ try:
53
+ await connection.send_text(message)
54
+ except Exception as e:
55
+ logger.error(f"Error broadcasting to connection: {e}")
56
+ disconnected.append(connection)
57
 
58
+ # Clean up disconnected clients
59
+ for conn in disconnected:
60
+ self.disconnect(conn)
 
 
 
 
 
 
61
 
62
+ manager = ConnectionManager()
63
+
64
+ def create_gradio_app():
65
+ """Create the Gradio interface"""
66
+
67
+ def get_client_js():
68
+ """Return the client-side JavaScript"""
69
+ return f"""
70
+ <script>
71
+ class SpeakerDiarizationClient {{
72
+ constructor() {{
73
+ this.ws = null;
74
+ this.mediaStream = null;
75
+ this.mediaRecorder = null;
76
+ this.isRecording = false;
77
+ this.baseUrl = '{config.hf_space_url}';
78
+ this.wsUrl = this.baseUrl.replace('https://', 'wss://').replace('http://', 'ws://') + '/ws';
79
+ }}
80
+
81
+ async startRecording() {{
82
+ try {{
83
+ // Request microphone access
84
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({{
85
+ audio: {{
86
+ echoCancellation: true,
87
+ noiseSuppression: true,
88
+ autoGainControl: true,
89
+ sampleRate: 16000
90
+ }}
91
+ }});
92
 
93
+ // Set up WebSocket connection
94
+ await this.connectWebSocket();
 
 
 
 
 
 
 
95
 
96
+ // Set up MediaRecorder for audio chunks
97
+ this.mediaRecorder = new MediaRecorder(this.mediaStream, {{
98
+ mimeType: 'audio/webm;codecs=opus'
99
+ }});
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ this.mediaRecorder.ondataavailable = (event) => {{
102
+ if (event.data.size > 0 && this.ws && this.ws.readyState === WebSocket.OPEN) {{
103
+ // Send audio chunk to server
104
+ this.ws.send(event.data);
105
+ }}
106
+ }};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ // Start recording with chunks every 1 second
109
+ this.mediaRecorder.start(1000);
110
+ this.isRecording = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ this.updateStatus('connected', 'Recording started');
113
+
114
+ }} catch (error) {{
115
+ console.error('Error starting recording:', error);
116
+ this.updateStatus('error', `Failed to start: ${{error.message}}`);
117
+ }}
118
+ }}
119
+
120
+ async connectWebSocket() {{
121
+ return new Promise((resolve, reject) => {{
122
+ this.ws = new WebSocket(this.wsUrl);
123
+
124
+ this.ws.onopen = () => {{
125
+ console.log('WebSocket connected');
126
+ resolve();
127
+ }};
128
+
129
+ this.ws.onmessage = (event) => {{
130
+ try {{
131
+ const data = JSON.parse(event.data);
132
+ this.handleServerMessage(data);
133
+ }} catch (e) {{
134
+ console.error('Error parsing message:', e);
135
+ }}
136
+ }};
137
+
138
+ this.ws.onerror = (error) => {{
139
+ console.error('WebSocket error:', error);
140
+ reject(error);
141
+ }};
142
+
143
+ this.ws.onclose = () => {{
144
+ console.log('WebSocket closed');
145
+ if (this.isRecording) {{
146
+ // Try to reconnect after a delay
147
+ setTimeout(() => this.connectWebSocket(), 3000);
148
+ }}
149
+ }};
150
+ }});
151
+ }}
152
+
153
+ handleServerMessage(data) {{
154
+ switch(data.type) {{
155
+ case 'transcription':
156
+ this.updateConversation(data.conversation_html);
157
+ break;
158
+ case 'speaker_update':
159
+ this.updateStatus('connected', `Active: ${{data.speaker}}`);
160
+ break;
161
+ case 'error':
162
+ this.updateStatus('error', data.message);
163
+ break;
164
+ case 'status':
165
+ this.updateStatus(data.status, data.message);
166
+ break;
167
+ }}
168
+ }}
169
+
170
+ stopRecording() {{
171
+ this.isRecording = false;
172
+
173
+ if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {{
174
+ this.mediaRecorder.stop();
175
+ }}
176
+
177
+ if (this.mediaStream) {{
178
+ this.mediaStream.getTracks().forEach(track => track.stop());
179
+ this.mediaStream = null;
180
+ }}
181
+
182
+ if (this.ws) {{
183
+ this.ws.close();
184
+ this.ws = null;
185
+ }}
186
+
187
+ this.updateStatus('disconnected', 'Recording stopped');
188
+ }}
189
+
190
+ async clearConversation() {{
191
+ try {{
192
+ const response = await fetch(`${{this.baseUrl}}/clear`, {{
193
+ method: 'POST'
194
+ }});
195
+
196
+ if (response.ok) {{
197
+ this.updateConversation('<i>Conversation cleared. Start speaking...</i>');
198
+ }}
199
+ }} catch (error) {{
200
+ console.error('Error clearing conversation:', error);
201
+ }}
202
+ }}
203
+
204
+ updateConversation(html) {{
205
+ const elem = document.getElementById('conversation');
206
+ if (elem) {{
207
+ elem.innerHTML = html;
208
+ elem.scrollTop = elem.scrollHeight;
209
+ }}
210
+ }}
211
+
212
+ updateStatus(status, message = '') {{
213
+ const statusText = document.getElementById('status-text');
214
+ const statusIcon = document.getElementById('status-icon');
215
+
216
+ if (!statusText || !statusIcon) return;
217
+
218
+ const colors = {{
219
+ 'connected': '#4CAF50',
220
+ 'connecting': '#FFC107',
221
+ 'disconnected': '#9E9E9E',
222
+ 'error': '#F44336',
223
+ 'warning': '#FF9800'
224
+ }};
225
+
226
+ const labels = {{
227
+ 'connected': 'Connected',
228
+ 'connecting': 'Connecting...',
229
+ 'disconnected': 'Disconnected',
230
+ 'error': 'Error',
231
+ 'warning': 'Warning'
232
+ }};
233
+
234
+ statusText.textContent = message ? `${{labels[status]}}: ${{message}}` : labels[status];
235
+ statusIcon.style.backgroundColor = colors[status] || '#9E9E9E';
236
+ }}
237
+ }}
238
+
239
+ // Global client instance
240
+ window.diarizationClient = new SpeakerDiarizationClient();
241
+
242
+ // Button event handlers
243
+ function startListening() {{
244
+ window.diarizationClient.startRecording();
245
+ }}
246
+
247
+ function stopListening() {{
248
+ window.diarizationClient.stopRecording();
249
+ }}
250
+
251
+ function clearConversation() {{
252
+ window.diarizationClient.clearConversation();
253
+ }}
254
+
255
+ // Initialize on page load
256
+ document.addEventListener('DOMContentLoaded', () => {{
257
+ window.diarizationClient.updateStatus('disconnected');
258
+ }});
259
+ </script>
260
+ """
261
+
262
+ with gr.Blocks(
263
+ title="Real-time Speaker Diarization",
264
+ theme=gr.themes.Soft(),
265
+ css="""
266
+ .status-indicator { margin: 10px 0; }
267
+ .conversation-display {
268
+ background: #f8f9fa;
269
+ border: 1px solid #dee2e6;
270
+ border-radius: 8px;
271
+ padding: 20px;
272
+ min-height: 400px;
273
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
274
+ overflow-y: auto;
275
+ }
276
+ """
277
+ ) as demo:
278
+
279
+ # Inject client-side JavaScript
280
+ gr.HTML(get_client_js())
281
+
282
+ # Header
283
+ gr.Markdown("# 🎀 Real-time Speaker Diarization")
284
+ gr.Markdown("Advanced speech recognition with automatic speaker identification")
285
+
286
+ # Status indicator
287
+ gr.HTML(f"""
288
+ <div class="status-indicator">
289
+ <span id="status-text" style="color:#666;">Ready to connect</span>
290
+ <span id="status-icon" style="width:12px; height:12px; display:inline-block;
291
+ background-color:#9E9E9E; border-radius:50%; margin-left:8px;"></span>
292
+ </div>
293
+ """)
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=2):
297
+ # Conversation display
298
+ gr.HTML(f"""
299
+ <div id="conversation" class="conversation-display">
300
+ <i>Click 'Start Listening' to begin real-time transcription...</i>
301
+ </div>
302
+ """)
303
 
304
  # Control buttons
305
  with gr.Row():
306
+ gr.Button(
307
+ "▢️ Start Listening",
308
+ variant="primary",
309
+ size="lg",
310
+ elem_id="start-btn"
311
+ ).click(fn=None, js="startListening()")
 
 
 
312
 
313
+ gr.Button(
314
+ "⏹️ Stop",
315
+ variant="stop",
316
+ size="lg",
317
+ elem_id="stop-btn"
318
+ ).click(fn=None, js="stopListening()")
319
+
320
+ gr.Button(
321
+ "πŸ—‘οΈ Clear",
322
+ variant="secondary",
323
+ size="lg",
324
+ elem_id="clear-btn"
325
+ ).click(fn=None, js="clearConversation()")
326
 
327
  with gr.Column(scale=1):
 
328
  gr.Markdown("## βš™οΈ Settings")
329
 
330
  threshold_slider = gr.Slider(
331
  minimum=0.3,
332
  maximum=0.9,
333
  step=0.05,
334
+ value=config.default_threshold,
335
  label="Speaker Change Sensitivity",
336
+ info="Lower = more sensitive to speaker changes"
337
  )
338
 
339
  max_speakers_slider = gr.Slider(
340
  minimum=2,
341
+ maximum=config.max_speakers_limit,
342
  step=1,
343
+ value=config.default_max_speakers,
344
  label="Maximum Speakers"
345
  )
346
 
 
 
347
  # Instructions
348
  gr.Markdown("""
349
+ ## πŸ“‹ How to Use
350
+ 1. **Start Listening** - Grant microphone access
351
+ 2. **Speak** - System transcribes and identifies speakers
352
  3. **Stop** when finished
353
  4. **Clear** to reset conversation
354
 
355
  ## 🎨 Speaker Colors
356
+ - πŸ”΄ Speaker 1 - 🟒 Speaker 2 - πŸ”΅ Speaker 3 - 🟑 Speaker 4
357
+ - ⭐ Speaker 5 - 🟣 Speaker 6 - 🟀 Speaker 7 - 🟠 Speaker 8
 
 
 
 
 
 
358
  """)
359
+
360
+ return demo
361
+
362
+ def create_fastapi_app():
363
+ """Create the FastAPI backend"""
364
+ app = FastAPI(title="Speaker Diarization API")
365
+
366
+ @app.websocket("/ws")
367
+ async def websocket_endpoint(websocket: WebSocket):
368
+ await manager.connect(websocket)
369
+ try:
370
+ while True:
371
+ # Receive audio data
372
+ data = await websocket.receive_bytes()
373
+
374
+ # Process audio data here
375
+ # This is where you'd integrate your actual speaker diarization model
376
+ result = await process_audio_chunk(data)
377
+
378
+ # Send result back to client
379
+ await manager.send_personal_message(
380
+ json.dumps(result),
381
+ websocket
382
+ )
383
+
384
+ except WebSocketDisconnect:
385
+ manager.disconnect(websocket)
386
+ except Exception as e:
387
+ logger.error(f"WebSocket error: {e}")
388
+ manager.disconnect(websocket)
389
+
390
+ @app.post("/clear")
391
+ async def clear_conversation():
392
+ """Clear the conversation history"""
393
+ manager.conversation_history.clear()
394
+ await manager.broadcast(json.dumps({
395
+ "type": "conversation_cleared"
396
+ }))
397
+ return {"status": "cleared"}
398
+
399
+ @app.get("/health")
400
+ async def health_check():
401
+ """Health check endpoint"""
402
+ return {
403
+ "status": "healthy",
404
+ "timestamp": datetime.now().isoformat(),
405
+ "active_connections": len(manager.active_connections)
406
+ }
407
+
408
+ @app.get("/status")
409
+ async def get_status():
410
+ """Get system status"""
411
+ return {
412
+ "status": "online",
413
+ "connections": len(manager.active_connections),
414
+ "conversation_length": len(manager.conversation_history)
415
+ }
416
+
417
+ return app
418
+
419
+ async def process_audio_chunk(audio_data: bytes) -> dict:
420
+ """
421
+ Process audio chunk and return diarization result by sending it to the Speaker Diarization backend
422
+ """
423
+ try:
424
+ # Convert WebM audio to appropriate format if needed
425
+ # This step may require additional processing depending on your backend requirements
426
 
427
+ # Connect to the Speaker Diarization backend via WebSocket
428
+ websocket_url = f"wss://{config.hf_space_url.replace('https://', '').replace('http://', '')}/ws_inference"
429
+ logger.info(f"Connecting to diarization backend at {websocket_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
+ async with websockets.connect(websocket_url) as websocket:
432
+ # Send audio data
433
+ await websocket.send(audio_data)
434
+
435
+ # Receive the response (may need to handle multiple messages)
436
+ response = await websocket.recv()
437
+
438
+ # Parse the response
439
  try:
440
+ result = json.loads(response)
441
+
442
+ # Add to conversation history if it's a transcription
443
+ if result.get("type") == "transcription" or result.get("type") == "conversation_update":
444
+ if "conversation_html" in result:
445
+ manager.conversation_history.append({
446
+ "timestamp": datetime.now().isoformat(),
447
+ "html": result["conversation_html"]
448
+ })
449
+
450
+ return result
451
+ except json.JSONDecodeError:
452
+ logger.error(f"Invalid JSON response: {response}")
453
+ return {
454
+ "type": "error",
455
+ "error": "Invalid response from backend",
456
+ "timestamp": datetime.now().isoformat()
457
+ }
458
+ except Exception as e:
459
+ logger.exception(f"Error processing audio chunk: {e}")
460
+ return {
461
+ "type": "error",
462
+ "error": str(e),
463
+ "timestamp": datetime.now().isoformat()
464
+ }
465
 
466
+ # Create both apps
467
+ fastapi_app = create_fastapi_app()
468
+ gradio_app = create_gradio_app()
469
 
470
+ # Mount Gradio app to FastAPI
471
+ fastapi_app.mount("/", gradio_app.app)
 
472
 
 
473
  if __name__ == "__main__":
474
+ import uvicorn
475
+ uvicorn.run(fastapi_app, host="0.0.0.0", port=7860)