Prathamesh Sarjerao Vaidya commited on
Commit
cb974bb
·
1 Parent(s): 3543d68

made some changes

Browse files
Files changed (4) hide show
  1. Dockerfile +20 -18
  2. README.md +18 -4
  3. templates/index.html +31 -54
  4. web_app.py +135 -98
Dockerfile CHANGED
@@ -1,9 +1,7 @@
1
  FROM python:3.9-slim
2
 
3
- # Set working directory
4
  WORKDIR /app
5
 
6
- # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  ffmpeg \
9
  git \
@@ -11,31 +9,35 @@ RUN apt-get update && apt-get install -y \
11
  curl \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
- # Copy requirements first for better caching
15
  COPY requirements.txt .
16
 
17
- # Install Python dependencies
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
- # Copy application code
21
  COPY . .
22
 
23
- # Create necessary directories
24
- RUN mkdir -p templates static uploads outputs model_cache
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Set environment variables for HuggingFace Spaces
27
- ENV PYTHONPATH=/app
28
- ENV GRADIO_ANALYTICS_ENABLED=False
29
-
30
- # Preload models during build time (optional - comment out if build time is too long)
31
- # RUN python model_preloader.py
32
-
33
- # Expose port
34
  EXPOSE 7860
35
 
36
- # Health check
37
  HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
38
  CMD curl -f http://localhost:7860/api/system-info || exit 1
39
 
40
- # Start command for HuggingFace Spaces
41
- CMD ["python", "-c", "import subprocess; subprocess.run(['python', 'model_preloader.py']); import uvicorn; uvicorn.run('web_app:app', host='0.0.0.0', port=7860, workers=1)"]
 
1
  FROM python:3.9-slim
2
 
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
  ffmpeg \
7
  git \
 
9
  curl \
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
12
  COPY requirements.txt .
13
 
 
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
 
16
  COPY . .
17
 
18
+ # Create necessary directories & fix permissions
19
+ RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results \
20
+ && chmod -R 777 templates static uploads outputs model_cache temp_files demo_results
21
+
22
+ # Environment variables
23
+ ENV PYTHONPATH=/app \
24
+ GRADIO_ANALYTICS_ENABLED=False \
25
+ HF_MODELS_CACHE=/app/model_cache \
26
+ OUTPUT_DIR=./outputs \
27
+ TEMP_DIR=./temp_files \
28
+ WHISPER_MODEL_SIZE=small \
29
+ TARGET_LANGUAGE=en \
30
+ MAX_WORKERS=1 \
31
+ USE_GPU=false \
32
+ HF_HOME=/app/model_cache \
33
+ TRANSFORMERS_CACHE=/app/model_cache \
34
+ TORCH_HOME=/app/model_cache \
35
+ XDG_CACHE_HOME=/app/model_cache \
36
+ MPLCONFIGDIR=/tmp/matplotlib
37
 
 
 
 
 
 
 
 
 
38
  EXPOSE 7860
39
 
 
40
  HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
41
  CMD curl -f http://localhost:7860/api/system-info || exit 1
42
 
43
+ CMD ["python", "-c", "import subprocess; subprocess.run(['python', 'model_preloader.py']); import uvicorn; uvicorn.run('web_app:app', host='0.0.0.0', port=7860, workers=1)"]
 
README.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # 🎵 Multilingual Audio Intelligence System
2
 
3
  ![Multilingual Audio Intelligence System Banner](/static/imgs/banner.png)
@@ -24,11 +34,11 @@ The Multilingual Audio Intelligence System is an advanced AI-powered platform th
24
 
25
  #### 🎬 Demo Banner
26
 
27
- ![Demo Banner](/static/imgs/demo_banner.png)
28
 
29
  #### 📝 Transcript with Translation
30
 
31
- ![Transcript with Translation](/static/imgs/demo_res_transcript_translate.png)
32
 
33
  #### 📊 Visual Representation
34
 
@@ -38,7 +48,7 @@ The Multilingual Audio Intelligence System is an advanced AI-powered platform th
38
 
39
  #### 🧠 Summary Output
40
 
41
- ![Summary Output](/static/imgs/demo_res_summary.png)
42
 
43
  ## Demo & Documentation
44
 
@@ -168,4 +178,8 @@ uvicorn web_app:app --host 0.0.0.0 --port 8000
168
 
169
  - **Documentation**: Check `/api/docs` endpoint
170
  - **System Info**: Use the info button in the web interface
171
- - **Logs**: Monitor terminal output for detailed information
 
 
 
 
 
1
+ ---
2
+ title: Multilingual Audio Intelligence System
3
+ emoji: 🎵
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ short_description: AI system for multilingual transcription and translation
9
+ ---
10
+
11
  # 🎵 Multilingual Audio Intelligence System
12
 
13
  ![Multilingual Audio Intelligence System Banner](/static/imgs/banner.png)
 
34
 
35
  #### 🎬 Demo Banner
36
 
37
+ <img src="static/imgs/demo_banner.png" alt="Demo Banner"/>
38
 
39
  #### 📝 Transcript with Translation
40
 
41
+ <img src="static/imgs/demo_res_transcript_translate.png" alt="Transcript with Translation"/>
42
 
43
  #### 📊 Visual Representation
44
 
 
48
 
49
  #### 🧠 Summary Output
50
 
51
+ <img src="static/imgs/demo_res_summary.png" alt="Summary Output"/>
52
 
53
  ## Demo & Documentation
54
 
 
178
 
179
  - **Documentation**: Check `/api/docs` endpoint
180
  - **System Info**: Use the info button in the web interface
181
+ - **Logs**: Monitor terminal output for detailed information
182
+
183
+ ---
184
+
185
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
templates/index.html CHANGED
@@ -67,8 +67,14 @@
67
  <i class="fas fa-cog mr-2"></i>
68
  Full Processing
69
  </button>
70
- <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
71
- Operational
 
 
 
 
 
 
72
  </span>
73
  <button id="system-info-btn" class="text-gray-500 hover:text-gray-700">
74
  <i class="fas fa-info-circle"></i>
@@ -423,7 +429,6 @@
423
  <div class="grid grid-cols-1 gap-6">
424
  <div id="language-chart" style="width:100%;height:300px;"></div>
425
  <div id="speaker-timeline" style="width:100%;height:300px;"></div>
426
- <div id="confidence-chart" style="width:100%;height:300px;"></div>
427
  </div>
428
  </div>
429
 
@@ -485,6 +490,28 @@
485
  const processingModeBtn = document.getElementById('processing-mode-btn');
486
  const processingModeIndicator = document.getElementById('processing-mode-indicator');
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  // Navigation handling
489
  function showHome() {
490
  homeSection.classList.add('active');
@@ -579,7 +606,7 @@
579
  'yuri_kizaki': {
580
  name: 'Yuri Kizaki - Japanese Audio',
581
  filename: 'Yuri_Kizaki.mp3',
582
- duration: 22.8
583
  },
584
  'film_podcast': {
585
  name: 'French Film Podcast',
@@ -908,8 +935,6 @@
908
  // Speaker Timeline
909
  createSpeakerTimeline(segments);
910
 
911
- // Confidence Analysis
912
- createConfidenceChart(segments);
913
  }
914
 
915
  function createLanguageChart(segments) {
@@ -985,45 +1010,6 @@
985
  Plotly.newPlot('speaker-timeline', data, layout, {responsive: true});
986
  }
987
 
988
- function createConfidenceChart(segments) {
989
- const confidenceRanges = {
990
- 'High (90-100%)': 0,
991
- 'Medium (70-89%)': 0,
992
- 'Low (50-69%)': 0,
993
- 'Very Low (<50%)': 0
994
- };
995
-
996
- segments.forEach(seg => {
997
- const confidence = seg.confidence * 100;
998
- if (confidence >= 90) confidenceRanges['High (90-100%)']++;
999
- else if (confidence >= 70) confidenceRanges['Medium (70-89%)']++;
1000
- else if (confidence >= 50) confidenceRanges['Low (50-69%)']++;
1001
- else confidenceRanges['Very Low (<50%)']++;
1002
- });
1003
-
1004
- const data = [{
1005
- x: Object.keys(confidenceRanges),
1006
- y: Object.values(confidenceRanges),
1007
- type: 'bar',
1008
- marker: {
1009
- color: ['#10B981', '#F59E0B', '#EF4444', '#6B7280']
1010
- }
1011
- }];
1012
-
1013
- const layout = {
1014
- title: {
1015
- text: '📊 Recognition Confidence Distribution',
1016
- font: { size: 18, family: 'Arial, sans-serif' }
1017
- },
1018
- xaxis: { title: 'Confidence Level' },
1019
- yaxis: { title: 'Number of Segments' },
1020
- height: 300,
1021
- margin: { t: 50, b: 80, l: 50, r: 20 }
1022
- };
1023
-
1024
- Plotly.newPlot('confidence-chart', data, layout, {responsive: true});
1025
- }
1026
-
1027
  function populateTranscript(segments) {
1028
  const transcriptContent = document.getElementById('transcript-content');
1029
  transcriptContent.innerHTML = '';
@@ -1032,12 +1018,6 @@
1032
  const segmentDiv = document.createElement('div');
1033
  segmentDiv.className = 'mb-6 p-4 border border-gray-200 rounded-lg bg-white shadow-sm';
1034
 
1035
- // Ensure confidence is a positive percentage
1036
- const confidencePercent = Math.round(Math.abs(segment.confidence * 100));
1037
- const confidenceColor = confidencePercent >= 90 ? 'bg-green-100 text-green-800' :
1038
- confidencePercent >= 70 ? 'bg-yellow-100 text-yellow-800' :
1039
- 'bg-red-100 text-red-800';
1040
-
1041
  segmentDiv.innerHTML = `
1042
  <div class="flex justify-between items-start mb-3">
1043
  <span class="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-blue-100 text-blue-800">
@@ -1053,9 +1033,6 @@
1053
  <div class="flex items-center mb-2">
1054
  <i class="fas fa-microphone text-gray-600 mr-2"></i>
1055
  <span class="text-sm font-medium text-gray-700">Original (${segment.language.toUpperCase()})</span>
1056
- <span class="ml-2 inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${confidenceColor}">
1057
- ${confidencePercent}% confidence
1058
- </span>
1059
  </div>
1060
  <p class="text-gray-800 leading-relaxed">${segment.text}</p>
1061
  </div>
 
67
  <i class="fas fa-cog mr-2"></i>
68
  Full Processing
69
  </button>
70
+ <!-- <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
71
+ <!-- <i class="fas fa-circle w-2 h-2 mr-2"></i> -->
72
+ <!-- <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800"> -->
73
+ <!-- <i class="fas fa-circle w-2 h-2 mr-1"></i> -->
74
+ <!--⬤ Operational
75
+ </span> -->
76
+ <span id="server-status" class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium">
77
+ ⬤ Checking...
78
  </span>
79
  <button id="system-info-btn" class="text-gray-500 hover:text-gray-700">
80
  <i class="fas fa-info-circle"></i>
 
429
  <div class="grid grid-cols-1 gap-6">
430
  <div id="language-chart" style="width:100%;height:300px;"></div>
431
  <div id="speaker-timeline" style="width:100%;height:300px;"></div>
 
432
  </div>
433
  </div>
434
 
 
490
  const processingModeBtn = document.getElementById('processing-mode-btn');
491
  const processingModeIndicator = document.getElementById('processing-mode-indicator');
492
 
493
+ async function updateServerStatus() {
494
+ const el = document.getElementById("server-status");
495
+ try {
496
+ const res = await fetch("/health"); // or your FastAPI health endpoint
497
+ if (!res.ok) throw new Error("Bad response");
498
+ el.textContent = "⬤ Live";
499
+ el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800";
500
+ } catch (err) {
501
+ // Could be error or down
502
+ fetch("/").catch(() => {
503
+ el.textContent = "⬤ Server Down";
504
+ el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 text-red-800";
505
+ });
506
+
507
+ el.textContent = "⬤ Error";
508
+ el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-yellow-100 text-yellow-800";
509
+ }
510
+ }
511
+ // setInterval(updateServerStatus, 5000);
512
+ // updateServerStatus();
513
+ document.addEventListener("DOMContentLoaded", updateServerStatus);
514
+
515
  // Navigation handling
516
  function showHome() {
517
  homeSection.classList.add('active');
 
606
  'yuri_kizaki': {
607
  name: 'Yuri Kizaki - Japanese Audio',
608
  filename: 'Yuri_Kizaki.mp3',
609
+ duration: 23.0
610
  },
611
  'film_podcast': {
612
  name: 'French Film Podcast',
 
935
  // Speaker Timeline
936
  createSpeakerTimeline(segments);
937
 
 
 
938
  }
939
 
940
  function createLanguageChart(segments) {
 
1010
  Plotly.newPlot('speaker-timeline', data, layout, {responsive: true});
1011
  }
1012
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1013
  function populateTranscript(segments) {
1014
  const transcriptContent = document.getElementById('transcript-content');
1015
  transcriptContent.innerHTML = '';
 
1018
  const segmentDiv = document.createElement('div');
1019
  segmentDiv.className = 'mb-6 p-4 border border-gray-200 rounded-lg bg-white shadow-sm';
1020
 
 
 
 
 
 
 
1021
  segmentDiv.innerHTML = `
1022
  <div class="flex justify-between items-start mb-3">
1023
  <span class="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-blue-100 text-blue-800">
 
1033
  <div class="flex items-center mb-2">
1034
  <i class="fas fa-microphone text-gray-600 mr-2"></i>
1035
  <span class="text-sm font-medium text-gray-700">Original (${segment.language.toUpperCase()})</span>
 
 
 
1036
  </div>
1037
  <p class="text-gray-800 leading-relaxed">${segment.text}</p>
1038
  </div>
web_app.py CHANGED
@@ -128,6 +128,26 @@ DEMO_FILES = {
128
  }
129
  }
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  # Demo results cache
132
  demo_results_cache = {}
133
 
@@ -146,7 +166,7 @@ class DemoManager:
146
  file_path = self.demo_dir / config["filename"]
147
  results_path = self.results_dir / f"{demo_id}_results.json"
148
 
149
- # Check if file exists
150
  if not file_path.exists():
151
  logger.info(f"Downloading demo file: {config['filename']}")
152
  try:
@@ -155,13 +175,13 @@ class DemoManager:
155
  logger.error(f"Failed to download {config['filename']}: {e}")
156
  continue
157
 
158
- # Check if results exist
159
  if not results_path.exists():
160
- logger.info(f"Preprocessing demo file: {config['filename']}")
161
  try:
162
- await self.preprocess_demo_file(demo_id, file_path, results_path)
163
  except Exception as e:
164
- logger.error(f"Failed to preprocess {config['filename']}: {e}")
165
  continue
166
 
167
  # Load results into cache
@@ -181,109 +201,126 @@ class DemoManager:
181
 
182
  logger.info(f"Downloaded demo file: {file_path.name}")
183
 
184
- async def preprocess_demo_file(self, demo_id: str, file_path: Path, results_path: Path):
185
- """Preprocess demo file and cache results."""
186
- config = DEMO_FILES[demo_id]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- # Create realistic demo results based on the actual content
189
- if demo_id == "yuri_kizaki":
190
- segments = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  {
192
  "speaker": "Speaker 1",
193
  "start_time": 0.0,
194
- "end_time": 8.5,
195
- "text": "音声メッセージが既存のウェブサイトを超えたコミュニケーションを実現。目で見るだけだったウェブサイトに音声情報をインクルードすることで、",
196
- "translated_text": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites,",
197
- "language": "ja",
198
- "confidence": 0.94
199
- },
200
- {
201
- "speaker": "Speaker 1",
202
- "start_time": 8.5,
203
- "end_time": 16.2,
204
- "text": "情報に新しい価値を与え、他者との差別化に効果を発揮します。また、文字やグラフィックだけでは伝えることの難しかった感情やニュアンスを表現し、",
205
- "translated_text": "you can add new value to the information and effectively differentiate from others. They also express emotions and nuances that are difficult to convey with text and graphics alone,",
206
- "language": "ja",
207
- "confidence": 0.96
208
- },
209
- {
210
- "speaker": "Speaker 1",
211
- "start_time": 16.2,
212
- "end_time": 22.8,
213
- "text": "ユーザーの興味と理解を深めます。見る、聞く、理解するウェブサイトへ。音声メッセージが人の心を動かします。",
214
- "translated_text": "deepening user interest and understanding. Turn your website into a place of sight, hearing, and understanding. Audio messages move people's hearts.",
215
- "language": "ja",
216
- "confidence": 0.95
217
  }
218
  ]
219
- duration = 22.8
 
 
 
220
 
221
- elif demo_id == "film_podcast":
222
- segments = [
 
 
 
 
 
223
  {
224
- "speaker": "Speaker 1",
225
  "start_time": 0.0,
226
- "end_time": 5.0,
227
- "text": "Le film intitulé The Social Network traite de la création du site Facebook par Mark Zuckerberg",
228
- "translated_text": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg",
229
- "language": "fr",
230
- "confidence": 0.97
231
- },
232
- {
233
- "speaker": "Speaker 1",
234
- "start_time": 5.0,
235
- "end_time": 14.0,
236
- "text": "et des problèmes judiciaires que cela a comporté pour le créateur de ce site.",
237
- "translated_text": "and the legal problems this caused for the creator of this site.",
238
- "language": "fr",
239
- "confidence": 0.95
240
- },
241
- {
242
- "speaker": "Speaker 1",
243
- "start_time": 14.0,
244
- "end_time": 19.0,
245
- "text": "Ce film est très réaliste et très intéressant.",
246
- "translated_text": "This film is very realistic and very interesting.",
247
- "language": "fr",
248
- "confidence": 0.98
249
- },
250
- {
251
- "speaker": "Speaker 1",
252
- "start_time": 19.0,
253
- "end_time": 25.0,
254
- "text": "La semaine dernière, j'ai été au cinéma voir Paranormal Activity 2.",
255
- "translated_text": "Last week, I went to the cinema to see Paranormal Activity 2.",
256
- "language": "fr",
257
- "confidence": 0.96
258
  }
259
- ]
260
- duration = 25.0
261
-
262
- # Create comprehensive results
263
- results = {
264
- "segments": segments,
265
  "summary": {
266
- "total_duration": duration,
267
- "num_speakers": len(set(seg["speaker"] for seg in segments)),
268
- "num_segments": len(segments),
269
- "languages": [segments[0]["language"]],
270
- "processing_time": 0.5,
271
- "file_path": str(file_path),
272
- "demo_id": demo_id
273
- },
274
- "metadata": {
275
- "original_filename": config["filename"],
276
- "display_name": config["display_name"],
277
- "language": config["language"],
278
- "description": config["description"]
279
  }
280
  }
281
-
282
- # Save results
283
- with open(results_path, 'w', encoding='utf-8') as f:
284
- json.dump(results, f, indent=2, ensure_ascii=False)
285
-
286
- logger.info(f"Preprocessed demo file: {config['filename']}")
287
 
288
  # Initialize demo manager
289
  demo_manager = DemoManager()
@@ -456,6 +493,9 @@ async def startup_event():
456
  logger.info("Demo files initialization complete")
457
  except Exception as e:
458
  logger.error(f"Demo files initialization failed: {e}")
 
 
 
459
 
460
 
461
  @app.get("/", response_class=HTMLResponse)
@@ -559,7 +599,6 @@ async def get_results(task_id: str):
559
  "text": seg.original_text if hasattr(seg, 'original_text') else "",
560
  "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
561
  "language": seg.original_language if hasattr(seg, 'original_language') else "unknown",
562
- "confidence": seg.confidence_transcription if hasattr(seg, 'confidence_transcription') else 0.0
563
  })
564
 
565
  # Extract summary information
@@ -589,7 +628,6 @@ async def get_results(task_id: str):
589
  "end_time": 5.0,
590
  "text": f"Processed audio from file. Full results processing encountered an error: {str(e)}",
591
  "language": "en",
592
- "confidence": 0.8
593
  }
594
  ],
595
  "summary": {
@@ -619,7 +657,6 @@ async def get_results(task_id: str):
619
  "end_time": 1.0,
620
  "text": "Audio processing completed but results are not available for display.",
621
  "language": "en",
622
- "confidence": 1.0
623
  }
624
  ],
625
  "summary": {
 
128
  }
129
  }
130
 
131
+ @app.get("/health")
132
+ async def health():
133
+ """Simple health check endpoint."""
134
+ try:
135
+ # Basic system check
136
+ import shutil
137
+ total, used, free = shutil.disk_usage(".")
138
+
139
+ if free < 50 * 1024 * 1024: # less than 50MB
140
+ return {"status": "error", "detail": "Low disk space"}
141
+
142
+ # Check if models are loaded
143
+ if not hasattr(app.state, "models_loaded") or not app.state.models_loaded:
144
+ return {"status": "error", "detail": "Models not loaded"}
145
+
146
+ return {"status": "ok"}
147
+
148
+ except Exception as e:
149
+ return {"status": "error", "detail": str(e)}
150
+
151
  # Demo results cache
152
  demo_results_cache = {}
153
 
 
166
  file_path = self.demo_dir / config["filename"]
167
  results_path = self.results_dir / f"{demo_id}_results.json"
168
 
169
+ # Check if file exists, download if not
170
  if not file_path.exists():
171
  logger.info(f"Downloading demo file: {config['filename']}")
172
  try:
 
175
  logger.error(f"Failed to download {config['filename']}: {e}")
176
  continue
177
 
178
+ # Check if results exist, process if not
179
  if not results_path.exists():
180
+ logger.info(f"Processing demo file: {config['filename']}")
181
  try:
182
+ await self.process_demo_file(demo_id, file_path, results_path)
183
  except Exception as e:
184
+ logger.error(f"Failed to process {config['filename']}: {e}")
185
  continue
186
 
187
  # Load results into cache
 
201
 
202
  logger.info(f"Downloaded demo file: {file_path.name}")
203
 
204
+ async def process_demo_file(self, demo_id: str, file_path: Path, results_path: Path):
205
+ """Process demo file using actual pipeline and cache results."""
206
+ try:
207
+ # Initialize pipeline for demo processing
208
+ pipeline = AudioIntelligencePipeline(
209
+ whisper_model_size="small",
210
+ target_language="en",
211
+ device="auto",
212
+ hf_token=os.getenv('HUGGINGFACE_TOKEN'),
213
+ output_dir="./outputs"
214
+ )
215
+
216
+ # Process the actual audio file
217
+ logger.info(f"Processing demo file: {file_path}")
218
+ results = pipeline.process_audio(
219
+ str(file_path),
220
+ save_outputs=True,
221
+ output_formats=['json', 'srt_original', 'srt_translated', 'text', 'summary']
222
+ )
223
+
224
+ # Format results for demo display
225
+ formatted_results = self.format_demo_results(results, demo_id)
226
+
227
+ # Save formatted results
228
+ with open(results_path, 'w', encoding='utf-8') as f:
229
+ json.dump(formatted_results, f, indent=2, ensure_ascii=False)
230
+
231
+ logger.info(f"Demo file processed and cached: {config['filename']}")
232
+
233
+ except Exception as e:
234
+ logger.error(f"Failed to process demo file {demo_id}: {e}")
235
+ # Create fallback results if processing fails
236
+ fallback_results = self.create_fallback_results(demo_id, str(e))
237
+ with open(results_path, 'w', encoding='utf-8') as f:
238
+ json.dump(fallback_results, f, indent=2, ensure_ascii=False)
239
+
240
+ def format_demo_results(self, results: Dict, demo_id: str) -> Dict:
241
+ """Format pipeline results for demo display."""
242
+ formatted_results = {
243
+ "segments": [],
244
+ "summary": {
245
+ "total_duration": 0,
246
+ "num_speakers": 0,
247
+ "num_segments": 0,
248
+ "languages": [],
249
+ "processing_time": 0
250
+ }
251
+ }
252
 
253
+ try:
254
+ # Extract segments from actual pipeline results
255
+ if 'processed_segments' in results:
256
+ for seg in results['processed_segments']:
257
+ formatted_results["segments"].append({
258
+ "speaker": seg.speaker_id if hasattr(seg, 'speaker_id') else "Speaker 1",
259
+ "start_time": seg.start_time if hasattr(seg, 'start_time') else 0,
260
+ "end_time": seg.end_time if hasattr(seg, 'end_time') else 0,
261
+ "text": seg.original_text if hasattr(seg, 'original_text') else "",
262
+ "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
263
+ "language": seg.original_language if hasattr(seg, 'original_language') else "unknown"
264
+ })
265
+
266
+ # Extract metadata
267
+ if 'audio_metadata' in results:
268
+ metadata = results['audio_metadata']
269
+ formatted_results["summary"]["total_duration"] = metadata.get('duration_seconds', 0)
270
+
271
+ if 'processing_stats' in results:
272
+ stats = results['processing_stats']
273
+ formatted_results["summary"]["processing_time"] = stats.get('total_time', 0)
274
+
275
+ # Calculate derived stats
276
+ formatted_results["summary"]["num_segments"] = len(formatted_results["segments"])
277
+ speakers = set(seg["speaker"] for seg in formatted_results["segments"])
278
+ formatted_results["summary"]["num_speakers"] = len(speakers)
279
+ languages = set(seg["language"] for seg in formatted_results["segments"] if seg["language"] != 'unknown')
280
+ formatted_results["summary"]["languages"] = list(languages) if languages else ["unknown"]
281
+
282
+ except Exception as e:
283
+ logger.error(f"Error formatting demo results: {e}")
284
+ # Return basic structure if formatting fails
285
+ formatted_results["segments"] = [
286
  {
287
  "speaker": "Speaker 1",
288
  "start_time": 0.0,
289
+ "end_time": 5.0,
290
+ "text": f"Demo processing completed. Error in formatting: {str(e)}",
291
+ "translated_text": f"Demo processing completed. Error in formatting: {str(e)}",
292
+ "language": "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  }
294
  ]
295
+ formatted_results["summary"]["total_duration"] = 5.0
296
+ formatted_results["summary"]["num_segments"] = 1
297
+ formatted_results["summary"]["num_speakers"] = 1
298
+ formatted_results["summary"]["languages"] = ["en"]
299
 
300
+ return formatted_results
301
+
302
+ def create_fallback_results(self, demo_id: str, error_msg: str) -> Dict:
303
+ """Create fallback results when demo processing fails."""
304
+ config = DEMO_FILES[demo_id]
305
+ return {
306
+ "segments": [
307
  {
308
+ "speaker": "System",
309
  "start_time": 0.0,
310
+ "end_time": 1.0,
311
+ "text": f"Demo processing failed: {error_msg}",
312
+ "translated_text": f"Demo processing failed: {error_msg}",
313
+ "language": "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
+ ],
 
 
 
 
 
316
  "summary": {
317
+ "total_duration": 1.0,
318
+ "num_speakers": 1,
319
+ "num_segments": 1,
320
+ "languages": ["en"],
321
+ "processing_time": 0.1
 
 
 
 
 
 
 
 
322
  }
323
  }
 
 
 
 
 
 
324
 
325
  # Initialize demo manager
326
  demo_manager = DemoManager()
 
493
  logger.info("Demo files initialization complete")
494
  except Exception as e:
495
  logger.error(f"Demo files initialization failed: {e}")
496
+
497
+ # Set models loaded flag for health check
498
+ app.state.models_loaded = True
499
 
500
 
501
  @app.get("/", response_class=HTMLResponse)
 
599
  "text": seg.original_text if hasattr(seg, 'original_text') else "",
600
  "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
601
  "language": seg.original_language if hasattr(seg, 'original_language') else "unknown",
 
602
  })
603
 
604
  # Extract summary information
 
628
  "end_time": 5.0,
629
  "text": f"Processed audio from file. Full results processing encountered an error: {str(e)}",
630
  "language": "en",
 
631
  }
632
  ],
633
  "summary": {
 
657
  "end_time": 1.0,
658
  "text": "Audio processing completed but results are not available for display.",
659
  "language": "en",
 
660
  }
661
  ],
662
  "summary": {