Amr-h commited on
Commit
e654589
·
1 Parent(s): 70f7db6

ignore cache

Browse files
Files changed (2) hide show
  1. .gitignore +3 -0
  2. chunck_time.py +0 -261
.gitignore CHANGED
@@ -106,3 +106,6 @@ env/
106
  # Misc
107
  *.bak
108
  *.swp
 
 
 
 
106
  # Misc
107
  *.bak
108
  *.swp
109
+
110
+ chunk_time.py
111
+ analyze.txt
chunck_time.py DELETED
@@ -1,261 +0,0 @@
1
- import os
2
- import sys
3
- import warnings
4
- import time
5
- import statistics
6
- from collections import Counter
7
-
8
- import torch
9
- import torchaudio
10
- from speechbrain.inference.classifiers import EncoderClassifier
11
-
12
- from audio_extractor import extract_audio_from_video_url
13
-
14
- warnings.filterwarnings("ignore")
15
- os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
16
-
17
- def create_chunks_by_size(waveform, sample_rate, chunk_length_sec):
18
- """Create chunks of specific size"""
19
- chunk_samples = chunk_length_sec * sample_rate
20
- total_samples = waveform.size(1)
21
- chunks = []
22
-
23
- for start in range(0, total_samples, chunk_samples):
24
- end = min(start + chunk_samples, total_samples)
25
- chunk = waveform[:, start:end]
26
- if chunk.size(1) > sample_rate * 2: # minimum 2 seconds
27
- chunks.append(chunk)
28
- return chunks
29
-
30
- def predict_chunks_timing(chunks, classifier):
31
- """Time the prediction process for chunks"""
32
- if not chunks:
33
- return [], 0.0
34
-
35
- start_time = time.time()
36
-
37
- # Pad to same length
38
- max_len = max(chunk.size(1) for chunk in chunks)
39
- padded_chunks = [torch.nn.functional.pad(chunk, (0, max_len - chunk.size(1))) for chunk in chunks]
40
- batch = torch.cat(padded_chunks, dim=0).unsqueeze(1)
41
- batch = batch.squeeze(1)
42
-
43
- out_prob, score, index, text_lab = classifier.classify_batch(batch)
44
-
45
- end_time = time.time()
46
- prediction_time = end_time - start_time
47
-
48
- results = []
49
- for i in range(len(chunks)):
50
- results.append({
51
- "accent": text_lab[i],
52
- "confidence": score[i].item(),
53
- })
54
-
55
- return results, prediction_time
56
-
57
- def analyze_chunk_size_performance(video_url, chunk_sizes=[10, 15, 20, 30, 60]):
58
- """Analyze performance for different chunk sizes"""
59
- print("🔍 Starting Chunk Size Performance Analysis")
60
- print("=" * 60)
61
-
62
- # Extract and prepare audio once
63
- print("🎵 Extracting and preparing audio...")
64
- audio_start = time.time()
65
-
66
- audio_path = extract_audio_from_video_url(video_url)
67
- waveform, sample_rate = torchaudio.load(audio_path)
68
-
69
- if sample_rate != 16000:
70
- waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
71
- sample_rate = 16000
72
-
73
- if waveform.shape[0] > 1:
74
- waveform = torch.mean(waveform, dim=0, keepdim=True)
75
-
76
- # # Apply VAD
77
- # waveform = simple_vad(waveform, sample_rate)
78
-
79
- audio_end = time.time()
80
- audio_prep_time = audio_end - audio_start
81
-
82
- duration_minutes = waveform.size(1) / sample_rate / 60
83
- print(f"✅ Audio prepared in {audio_prep_time:.2f}s | Duration: {duration_minutes:.1f} minutes")
84
-
85
- # Load model once
86
- print("🧠 Loading model...")
87
- model_start = time.time()
88
- classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa")
89
- model_end = time.time()
90
- model_load_time = model_end - model_start
91
- print(f"✅ Model loaded in {model_load_time:.2f}s")
92
-
93
- print("\n" + "=" * 60)
94
- print("📊 CHUNK SIZE ANALYSIS RESULTS")
95
- print("=" * 60)
96
-
97
- results = []
98
-
99
- for chunk_size in chunk_sizes:
100
- print(f"\n🧩 Testing {chunk_size}-second chunks...")
101
-
102
- # Create chunks
103
- chunk_start = time.time()
104
- chunks = create_chunks_by_size(waveform, sample_rate, chunk_size)
105
- chunk_end = time.time()
106
- chunking_time = chunk_end - chunk_start
107
-
108
- if not chunks:
109
- print(f"❌ No valid chunks created for {chunk_size}s size")
110
- continue
111
-
112
- # Predict
113
- predictions, prediction_time = predict_chunks_timing(chunks, classifier)
114
-
115
- # Calculate statistics
116
- confidences = [p["confidence"] for p in predictions]
117
- accents = [p["accent"] for p in predictions]
118
-
119
- avg_confidence = statistics.mean(confidences) if confidences else 0
120
- max_confidence = max(confidences) if confidences else 0
121
- min_confidence = min(confidences) if confidences else 0
122
- std_confidence = statistics.stdev(confidences) if len(confidences) > 1 else 0
123
-
124
- # Most common accent
125
- accent_counts = Counter(accents)
126
- most_common_accent = accent_counts.most_common(1)[0] if accent_counts else ("Unknown", 0)
127
-
128
- # Calculate processing rates
129
- total_processing_time = chunking_time + prediction_time
130
- chunks_per_second = len(chunks) / total_processing_time if total_processing_time > 0 else 0
131
- seconds_per_chunk = total_processing_time / len(chunks) if len(chunks) > 0 else 0
132
-
133
- result = {
134
- "chunk_size": chunk_size,
135
- "num_chunks": len(chunks),
136
- "chunking_time": chunking_time,
137
- "prediction_time": prediction_time,
138
- "total_time": total_processing_time,
139
- "avg_confidence": avg_confidence,
140
- "max_confidence": max_confidence,
141
- "min_confidence": min_confidence,
142
- "std_confidence": std_confidence,
143
- "most_common_accent": most_common_accent[0],
144
- "accent_occurrence": most_common_accent[1],
145
- "chunks_per_second": chunks_per_second,
146
- "seconds_per_chunk": seconds_per_chunk,
147
- "confidence_consistency": 1 - (std_confidence / avg_confidence) if avg_confidence > 0 else 0
148
- }
149
-
150
- results.append(result)
151
-
152
- # Print results for this chunk size
153
- print(f" 📦 Chunks created: {len(chunks)}")
154
- print(f" ⏱️ Chunking time: {chunking_time:.3f}s")
155
- print(f" 🧠 Prediction time: {prediction_time:.3f}s")
156
- print(f" 🔄 Total processing: {total_processing_time:.3f}s")
157
- print(f" ⚡ Processing rate: {chunks_per_second:.1f} chunks/sec")
158
- print(f" 📈 Avg confidence: {avg_confidence:.3f}")
159
- print(f" 🎯 Most common: {most_common_accent[0]} ({most_common_accent[1]} times)")
160
- print(f" 📊 Confidence range: {min_confidence:.3f} - {max_confidence:.3f}")
161
-
162
- # Print summary comparison
163
- print("\n" + "=" * 80)
164
- print("📈 PERFORMANCE COMPARISON SUMMARY")
165
- print("=" * 80)
166
-
167
- if results:
168
- print(f"{'Size':<6} {'Chunks':<8} {'Total Time':<12} {'Rate':<12} {'Avg Conf':<10} {'Consistency':<12} {'Winner'}")
169
- print("-" * 80)
170
-
171
- for r in results:
172
- consistency = f"{r['confidence_consistency']:.2f}"
173
- print(f"{r['chunk_size']:<6} {r['num_chunks']:<8} {r['total_time']:<12.3f} {r['chunks_per_second']:<12.1f} {r['avg_confidence']:<10.3f} {consistency:<12} {r['most_common_accent']}")
174
-
175
- # Recommendations
176
- print("\n" + "=" * 60)
177
- print("🏆 RECOMMENDATIONS")
178
- print("=" * 60)
179
-
180
- if results:
181
- # Find best for speed
182
- fastest = min(results, key=lambda x: x['total_time'])
183
- print(f"⚡ Fastest processing: {fastest['chunk_size']}s chunks ({fastest['total_time']:.2f}s total)")
184
-
185
- # Find best for accuracy (highest average confidence)
186
- most_accurate = max(results, key=lambda x: x['avg_confidence'])
187
- print(f"🎯 Highest accuracy: {most_accurate['chunk_size']}s chunks ({most_accurate['avg_confidence']:.3f} avg confidence)")
188
-
189
- # Find most consistent
190
- most_consistent = max(results, key=lambda x: x['confidence_consistency'])
191
- print(f"📊 Most consistent: {most_consistent['chunk_size']}s chunks ({most_consistent['confidence_consistency']:.3f} consistency)")
192
-
193
- # Find best balance (speed + accuracy)
194
- for r in results:
195
- r['balance_score'] = (r['chunks_per_second'] * 0.4) + (r['avg_confidence'] * 100 * 0.6)
196
-
197
- best_balance = max(results, key=lambda x: x['balance_score'])
198
- print(f"⚖️ Best balance: {best_balance['chunk_size']}s chunks (score: {best_balance['balance_score']:.1f})")
199
-
200
- return results
201
-
202
- def quick_test_multiple_videos(video_urls, chunk_sizes=[10, 15, 20, 30]):
203
- """Quick test on multiple videos to get average performance"""
204
- print("🔍 MULTI-VIDEO CHUNK SIZE ANALYSIS")
205
- print("=" * 60)
206
-
207
- all_results = {size: [] for size in chunk_sizes}
208
-
209
- for i, video_url in enumerate(video_urls, 1):
210
- print(f"\n📹 Testing Video {i}/{len(video_urls)}")
211
- try:
212
- video_results = analyze_chunk_size_performance(video_url, chunk_sizes)
213
- for result in video_results:
214
- all_results[result['chunk_size']].append(result)
215
- except Exception as e:
216
- print(f"❌ Error with video {i}: {str(e)}")
217
- continue
218
-
219
- # Calculate averages
220
- print("\n" + "=" * 60)
221
- print("📊 AVERAGE PERFORMANCE ACROSS ALL VIDEOS")
222
- print("=" * 60)
223
-
224
- avg_results = []
225
- for chunk_size in chunk_sizes:
226
- if all_results[chunk_size]:
227
- results = all_results[chunk_size]
228
- avg_result = {
229
- 'chunk_size': chunk_size,
230
- 'avg_total_time': statistics.mean([r['total_time'] for r in results]),
231
- 'avg_chunks_per_sec': statistics.mean([r['chunks_per_second'] for r in results]),
232
- 'avg_confidence': statistics.mean([r['avg_confidence'] for r in results]),
233
- 'avg_consistency': statistics.mean([r['confidence_consistency'] for r in results]),
234
- 'sample_count': len(results)
235
- }
236
- avg_results.append(avg_result)
237
-
238
- if avg_results:
239
- print(f"{'Size':<6} {'Samples':<8} {'Avg Time':<10} {'Avg Rate':<10} {'Avg Conf':<10} {'Consistency'}")
240
- print("-" * 60)
241
- for r in avg_results:
242
- print(f"{r['chunk_size']:<6} {r['sample_count']:<8} {r['avg_total_time']:<10.2f} {r['avg_chunks_per_sec']:<10.1f} {r['avg_confidence']:<10.3f} {r['avg_consistency']:.3f}")
243
-
244
- return avg_results
245
-
246
- if __name__ == "__main__":
247
- # Test with single video
248
- video_url = "https://www.youtube.com/watch?v=-JTq1BFBwmo&list=PLDN4rrl48XKpZkf03iYFl-O29szjTrs_O&index=2"
249
-
250
- print("🚀 Starting Single Video Analysis...")
251
- results = analyze_chunk_size_performance(video_url)
252
-
253
- # Uncomment below to test multiple videos
254
- # print("\n" + "="*60)
255
- # print("🚀 Starting Multi-Video Analysis...")
256
- # video_urls = [
257
- # "https://www.youtube.com/watch?v=VIDEO1",
258
- # "https://www.youtube.com/watch?v=VIDEO2",
259
- # # Add more video URLs here
260
- # ]
261
- # multi_results = quick_test_multiple_videos(video_urls)