richard-su commited on
Commit
76f9cd2
·
verified ·
1 Parent(s): 2b70d67

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. tests/README_SPEAKER_TESTS.md +163 -0
  3. tests/__init__.py +3 -0
  4. tests/__pycache__/__init__.cpython-310.pyc +0 -0
  5. tests/__pycache__/conftest.cpython-310-pytest-8.4.0.pyc +0 -0
  6. tests/__pycache__/test_01_podcast_download.cpython-310-pytest-8.4.0.pyc +0 -0
  7. tests/__pycache__/test_02_remote_transcription.cpython-310-pytest-8.4.0.pyc +0 -0
  8. tests/__pycache__/test_03_transcription_file_management.cpython-310-pytest-8.4.0.pyc +0 -0
  9. tests/__pycache__/test_04_mp3_file_management.cpython-310-pytest-8.4.0.pyc +0 -0
  10. tests/__pycache__/test_05_real_world_integration.cpython-310-pytest-8.4.0.pyc +0 -0
  11. tests/__pycache__/test_06_modal_improvements.cpython-310-pytest-8.4.0.pyc +0 -0
  12. tests/__pycache__/test_07_modal_final_improvements.cpython-310-pytest-8.4.0.pyc +0 -0
  13. tests/__pycache__/test_08_speaker_diarization_integration.cpython-310-pytest-8.4.0.pyc +0 -0
  14. tests/__pycache__/test_09_storage_config_unit_tests.cpython-310-pytest-8.4.0.pyc +0 -0
  15. tests/__pycache__/test_concurrent_processing.cpython-310-pytest-8.4.0.pyc +0 -0
  16. tests/__pycache__/test_segmentation_fallback.cpython-310-pytest-8.4.0.pyc +0 -0
  17. tests/__pycache__/test_services.cpython-310-pytest-8.4.0.pyc +0 -0
  18. tests/__pycache__/test_speaker_embedding_integration.cpython-310-pytest-8.4.0.pyc +0 -0
  19. tests/__pycache__/test_speaker_embedding_service.cpython-310-pytest-8.4.0.pyc +0 -0
  20. tests/__pycache__/test_speaker_integration.cpython-310-pytest-8.4.0.pyc +0 -0
  21. tests/__pycache__/test_speaker_segmentation.cpython-310-pytest-8.4.0.pyc +0 -0
  22. tests/__pycache__/test_speaker_segmentation_advanced.cpython-310-pytest-8.4.0.pyc +0 -0
  23. tests/cache/apple_podcast_episode.mp3 +3 -0
  24. tests/cache/apple_podcast_episode.srt +875 -0
  25. tests/cache/apple_podcast_episode.txt +1 -0
  26. tests/cache/synthetic_multi_speaker.srt +3 -0
  27. tests/cache/synthetic_multi_speaker.txt +1 -0
  28. tests/cache/synthetic_multi_speaker.wav +3 -0
  29. tests/cache/transcribe/speaker_diarization/apple_podcast_episode/with_speaker_diarization_result.json +16 -0
  30. tests/cache/transcribe/speaker_diarization/apple_podcast_episode/without_speaker_diarization_result.json +13 -0
  31. tests/cache/transcribe/speaker_diarization/comprehensive_test_results.json +108 -0
  32. tests/cache/transcribe/speaker_diarization/download_log.json +1 -0
  33. tests/cache/transcribe/speaker_diarization/environment_status.json +40 -0
  34. tests/cache/transcribe/speaker_diarization/local_vs_modal_comparison.json +80 -0
  35. tests/cache/transcribe/speaker_diarization/pipeline_test.json +4 -0
  36. tests/cache/transcribe/speaker_diarization/speaker_diarization_report.json +83 -0
  37. tests/cache/transcribe/speaker_diarization/speaker_diarization_report.md +69 -0
  38. tests/cache/transcribe/speaker_diarization/test_summary.json +18 -0
  39. tests/cache/transcribe/speaker_diarization/xyz_podcast_episode/with_speaker_diarization_result.json +16 -0
  40. tests/cache/transcribe/speaker_diarization/xyz_podcast_episode/without_speaker_diarization_result.json +13 -0
  41. tests/cache/transcribe/xyz_podcast_episode.srt +584 -0
  42. tests/cache/transcribe/xyz_podcast_episode.txt +1 -0
  43. tests/cache/xyz_podcast_episode.mp3 +3 -0
  44. tests/cache/xyz_podcast_episode.srt +1231 -0
  45. tests/cache/xyz_podcast_episode.txt +1 -0
  46. tests/conftest.py +97 -0
  47. tests/playwright_mcp_testing_guide.md +439 -0
  48. tests/run_all_tests.py +92 -0
  49. tests/test_01_podcast_download.py +177 -0
  50. tests/test_02_remote_transcription.py +241 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tests/cache/apple_podcast_episode.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ tests/cache/synthetic_multi_speaker.wav filter=lfs diff=lfs merge=lfs -text
38
+ tests/cache/xyz_podcast_episode.mp3 filter=lfs diff=lfs merge=lfs -text
tests/README_SPEAKER_TESTS.md ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Speaker Segmentation Tests
2
+
3
+ 这个目录包含了针对 Speaker Segmentation 功能的全面测试套件。
4
+
5
+ ## 📁 测试文件结构
6
+
7
+ ```
8
+ tests/
9
+ ├── test_speaker_segmentation.py # 基础功能测试
10
+ ├── test_speaker_segmentation_advanced.py # 高级场景和性能测试
11
+ ├── test_speaker_integration.py # 集成测试
12
+ └── README_SPEAKER_TESTS.md # 测试文档(本文件)
13
+ ```
14
+
15
+ ## 🔧 重构内容
16
+
17
+ ### 核心功能重构
18
+
19
+ 我们重构了 `TranscriptionService` 中的说话人分割逻辑:
20
+
21
+ 1. **`_merge_speaker_segments` 方法** - 主要的合并逻辑
22
+ - 检测单个转录段中的多个说话人
23
+ - 自动分割包含多个说话人的段
24
+ - 保持单词边界完整性
25
+
26
+ 2. **`_split_transcription_segment` 方法** - 新增的分割方法
27
+ - 基于说话人时间重叠来分配文本
28
+ - 按比例分配文本给不同说话人
29
+ - 使用实际的说话人识别时间戳
30
+
31
+ ### 关键改进
32
+
33
+ - ✅ **多说话人检测**: 自动检测并分割包含多个说话人的转录段
34
+ - ✅ **智能文本分割**: 基于说话人时长比例分配文本
35
+ - ✅ **单词边界保护**: 避免在单词中间分割文本
36
+ - ✅ **时间戳精度**: 使用说话人识别的实际时间戳
37
+ - ✅ **重叠处理**: 正确处理说话人时间重叠的复杂情况
38
+
39
+ ## 📋 测试覆盖
40
+
41
+ ### 基础测试 (`test_speaker_segmentation.py`)
42
+
43
+ | 测试用例 | 描述 | 状态 |
44
+ |---------|------|------|
45
+ | `test_single_speaker_segment` | 单个说话人的基本情况 | ✅ |
46
+ | `test_no_speaker_detected` | 未检测到说话人 | ✅ |
47
+ | `test_multiple_speakers_in_single_segment` | 单段中多个说话人 | ✅ |
48
+ | `test_overlapping_speakers` | 说话人时间重叠 | ✅ |
49
+ | `test_partial_speaker_overlap` | 部分重叠 | ✅ |
50
+ | `test_multiple_transcription_segments_with_speakers` | 多段复杂情况 | ✅ |
51
+ | `test_word_boundary_preservation` | 单词边界保护 | ✅ |
52
+ | `test_empty_text_handling` | 空文本处理 | ✅ |
53
+ | `test_split_transcription_segment_direct` | 直接分割方法测试 | ✅ |
54
+ | `test_unequal_speaker_durations` | 不等说话人时长 | ✅ |
55
+
56
+ ### 高级测试 (`test_speaker_segmentation_advanced.py`)
57
+
58
+ | 测试用例 | 描述 | 状态 |
59
+ |---------|------|------|
60
+ | `test_rapid_speaker_changes` | 快速说话人切换 | ✅ |
61
+ | `test_very_short_speaker_segments` | 极短说话人段 | ✅ |
62
+ | `test_overlapping_segments_complex` | 复杂重叠情况 | ✅ |
63
+ | `test_performance_large_segments` | 大量段性能测试 | ✅ |
64
+ | `test_no_overlap_at_all` | 完全无重叠 | ✅ |
65
+ | `test_exact_boundary_matching` | 精确边界匹配 | ✅ |
66
+ | `test_floating_point_precision` | 浮点数精度 | ✅ |
67
+ | `test_text_distribution_accuracy` | 文本分配准确性 | ✅ |
68
+ | `test_single_word_segments` | 单词级分割 | ✅ |
69
+ | `test_empty_speaker_segments` | 空说话人段 | ✅ |
70
+ | `test_malformed_input_handling` | 异常输入处理 | ✅ |
71
+
72
+ ### 性能基准测试
73
+
74
+ | 指标 | 测试结果 |
75
+ |------|----------|
76
+ | **处理速度** | 70,575 段/秒 |
77
+ | **测试场景** | 30分钟播客,360个转录段,62个说话人段 |
78
+ | **输出段数** | 421个最终段 |
79
+ | **执行时间** | 0.006秒 |
80
+ | **性能要求** | < 2秒(满足实时处理需求) |
81
+
82
+ ### 集成测试 (`test_speaker_integration.py`)
83
+
84
+ | 测试场景 | 描述 | 状态 |
85
+ |----------|------|------|
86
+ | `test_speaker_segmentation_integration` | 完整流程验证 | ✅ |
87
+ | `test_complex_conversation_splitting` | 复杂对话分割 | ✅ |
88
+
89
+ ## 🚀 运行测试
90
+
91
+ ### 运行所有测试
92
+ ```bash
93
+ cd tests
94
+ python -m pytest test_speaker_*.py -v
95
+ ```
96
+
97
+ ### 运行基础测试
98
+ ```bash
99
+ python -m pytest test_speaker_segmentation.py -v
100
+ ```
101
+
102
+ ### 运行高级测试(排除基准测试)
103
+ ```bash
104
+ python -m pytest test_speaker_segmentation_advanced.py -v -m "not benchmark"
105
+ ```
106
+
107
+ ### 运行性能基准测试
108
+ ```bash
109
+ python -m pytest test_speaker_segmentation_advanced.py::TestSpeakerSegmentationBenchmark -v -s
110
+ ```
111
+
112
+ ### 运行集成测试
113
+ ```bash
114
+ python test_speaker_integration.py
115
+ ```
116
+
117
+ ## 🎯 测试结果示例
118
+
119
+ ### 简单对话场景
120
+ ```
121
+ [0.0s-3.0s] Alice: "Hello, this is Alice speaking."
122
+ [3.0s-8.0s] Bob: "Hi Alice, this is Bob responding to your message."
123
+ [8.0s-12.0s] Alice: "Great to hear from you Bob, how are you today?"
124
+ [12.0s-15.0s] Bob: "I'm doing well, thank you for asking Alice."
125
+ ```
126
+
127
+ ### 复杂分割场景
128
+ ```
129
+ Original: "Welcome to our podcast today we have a special guest joining us to discuss..."
130
+ ↓ Split into 3 speakers ↓
131
+ [0.0s-3.0s] HOST: "Welcome to our podcast today we have a"
132
+ [3.0s-7.0s] GUEST: "special guest joining us to discuss the latest"
133
+ [7.0s-10.0s] CO_HOST: "developments in AI technology and its impact on so..."
134
+ ```
135
+
136
+ ## 📊 覆盖率统计
137
+
138
+ - **总测试用例**: 22个
139
+ - **通过率**: 100% ✅
140
+ - **功能覆盖**: 全覆盖
141
+ - **边缘情况**: 全覆盖
142
+ - **性能测试**: 通过 ✅
143
+
144
+ ## 🔍 关键测试验证点
145
+
146
+ 1. **功能正确性**: 确保说话人正确分配到对应文本段
147
+ 2. **文本完整性**: 验证分割过程中文本不丢失
148
+ 3. **时间戳准确性**: 确保时间戳与说话人识别结果一致
149
+ 4. **边界处理**: 测试各种边缘情况和异常输入
150
+ 5. **性能要求**: 验证实时处理能力
151
+ 6. **集成兼容**: 确保与现有转录流程完全兼容
152
+
153
+ ## 🎉 总结
154
+
155
+ 经过全面的测试验证,新的 Speaker Segmentation 功能:
156
+
157
+ - ✅ **功能完整**: 支持所有预期的使用场景
158
+ - ✅ **性能优异**: 满足实时处理需求
159
+ - ✅ **质量可靠**: 文本分割准确,时间戳精确
160
+ - ✅ **向后兼容**: 不影响现有功能
161
+ - ✅ **边缘情况**: 正确处理各种复杂情况
162
+
163
+ 该重构显著提升了转录系统在多说话人场景下的处理能力,特别适用于播客、会议和多人对话的转录场景。
tests/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Integration tests for Podcast MCP Gradio application
3
+ """
tests/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (212 Bytes). View file
 
tests/__pycache__/conftest.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (3.16 kB). View file
 
tests/__pycache__/test_01_podcast_download.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (9.76 kB). View file
 
tests/__pycache__/test_02_remote_transcription.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (10.3 kB). View file
 
tests/__pycache__/test_03_transcription_file_management.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (11.1 kB). View file
 
tests/__pycache__/test_04_mp3_file_management.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (11.7 kB). View file
 
tests/__pycache__/test_05_real_world_integration.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (15.3 kB). View file
 
tests/__pycache__/test_06_modal_improvements.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (12 kB). View file
 
tests/__pycache__/test_07_modal_final_improvements.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (17.4 kB). View file
 
tests/__pycache__/test_08_speaker_diarization_integration.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (19.2 kB). View file
 
tests/__pycache__/test_09_storage_config_unit_tests.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (24.4 kB). View file
 
tests/__pycache__/test_concurrent_processing.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (7.66 kB). View file
 
tests/__pycache__/test_segmentation_fallback.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (7.52 kB). View file
 
tests/__pycache__/test_services.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (24.1 kB). View file
 
tests/__pycache__/test_speaker_embedding_integration.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (15.5 kB). View file
 
tests/__pycache__/test_speaker_embedding_service.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (24.7 kB). View file
 
tests/__pycache__/test_speaker_integration.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (5.89 kB). View file
 
tests/__pycache__/test_speaker_segmentation.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (16.7 kB). View file
 
tests/__pycache__/test_speaker_segmentation_advanced.cpython-310-pytest-8.4.0.pyc ADDED
Binary file (17 kB). View file
 
tests/cache/apple_podcast_episode.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ecef44bb14733831a1a14a6ea253e087de9f01fb9e32bd068530eca021c334c
3
+ size 33871323
tests/cache/apple_podcast_episode.srt ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1
2
+ 00:00:00,000 --> 00:00:06,740
3
+ This is the All Ears English Podcast, Episode 2422. Don't be loathe to speak English.
4
+
5
+ 2
6
+ 00:00:08,580 --> 00:00:15,099
7
+ Welcome to the All Ears English Podcast, downloaded more than 200 million times.
8
+
9
+ 3
10
+ 00:00:15,300 --> 00:00:21,559
11
+ Are you feeling stuck with your English? We'll show you how to become fearless and fluent by
12
+
13
+ 4
14
+ 00:00:21,559 --> 00:00:29,260
15
+ focusing on connection, not perfection, with your American host, Aubrey Carter, the IELTS whiz,
16
+
17
+ 5
18
+ 00:00:29,260 --> 00:00:37,740
19
+ and Lindsay McMahon, the English adventurer, coming to you from Arizona and Colorado, USA.
20
+
21
+ 6
22
+ 00:00:40,160 --> 00:00:46,840
23
+ Reach the top 25% of all native speakers by understanding these three ways to say that
24
+
25
+ 7
26
+ 00:00:46,840 --> 00:00:58,120
27
+ you dislike something in English. Do you ever wish that you could just hang out with native speakers
28
+
29
+ 8
30
+ 00:00:58,119 --> 00:01:03,719
31
+ and pick up real English naturally? That's what it's like here on All Ears English. We're two friends
32
+
33
+ 9
34
+ 00:01:03,719 --> 00:01:10,159
35
+ having real conversations, no scripts, no lectures, just fun, honest talk about life, culture, and how
36
+
37
+ 10
38
+ 00:01:10,159 --> 00:01:15,759
39
+ to connect in English. It's like grabbing coffee with us and leaving with better English every time.
40
+
41
+ 11
42
+ 00:01:16,120 --> 00:01:20,299
43
+ But if you're not following All Ears English, then you're probably missing some episodes.
44
+
45
+ 12
46
+ 00:01:20,299 --> 00:01:26,420
47
+ So go ahead and hit the follow button now, wherever you listen to the show. Hit follow now
48
+
49
+ 13
50
+ 00:01:26,420 --> 00:01:28,620
51
+ and come check us out five days a week.
52
+
53
+ 14
54
+ 00:01:32,840 --> 00:01:34,299
55
+ Hey there, Aubrey, what's shaking?
56
+
57
+ 15
58
+ 00:01:34,640 --> 00:01:35,659
59
+ Not much. How are you, Lindsay?
60
+
61
+ 16
62
+ 00:01:36,200 --> 00:01:39,759
63
+ Feeling great today, but I have a key question for you today. You ready?
64
+
65
+ 17
66
+ 00:01:40,140 --> 00:01:41,079
67
+ Yeah, let's hear it.
68
+
69
+ 18
70
+ 00:01:41,359 --> 00:01:44,879
71
+ So Aubrey, in your daily life or just maybe right now in this moment,
72
+
73
+ 19
74
+ 00:01:44,879 --> 00:01:47,280
75
+ is there anything you are loathe to do?
76
+
77
+ 20
78
+ 00:01:48,179 --> 00:01:54,519
79
+ You know, I am loathe to swim with my kids right now. Our pool is still ice cold,
80
+
81
+ 21
82
+ 00:01:54,819 --> 00:01:59,039
83
+ in my opinion. It's way too cold and they are ready to swim. They're getting in. They're like,
84
+
85
+ 22
86
+ 00:01:59,099 --> 00:02:04,359
87
+ mom, come swim with me. And it's way too cold for me. So I make excuses and I really, I'm like,
88
+
89
+ 23
90
+ 00:02:04,379 --> 00:02:09,560
91
+ I'll just dip my toes in. It's crazy. So I am really loathe to get in that cold swimming pool.
92
+
93
+ 24
94
+ 00:02:09,759 --> 00:02:13,560
95
+ It's funny how we lose our nerves as we get older. It becomes harder and harder to get into
96
+
97
+ 25
98
+ 00:02:13,560 --> 00:02:17,659
99
+ swimming pools or cold lakes or the ocean. When we're kids, we just run in like,
100
+
101
+ 26
102
+ 00:02:17,740 --> 00:02:22,659
103
+ we didn't mind at all. I know. Maybe our nerve endings are less sensitive when we're younger.
104
+
105
+ 27
106
+ 00:02:22,900 --> 00:02:24,060
107
+ I don't know. We just don't care.
108
+
109
+ 28
110
+ 00:02:24,560 --> 00:02:28,960
111
+ We just don't care. I don't know. We don't think about it. I don't know. We think a lot as adults,
112
+
113
+ 29
114
+ 00:02:29,099 --> 00:02:29,439
115
+ don't we?
116
+
117
+ 30
118
+ 00:02:29,699 --> 00:02:33,879
119
+ Yeah, for sure. Well, this is interesting. This word loathe came up in a recent episode.
120
+
121
+ 31
122
+ 00:02:34,560 --> 00:02:38,599
123
+ Stay to the end and we'll share which one it is in case you missed it. But I realized when
124
+
125
+ 32
126
+ 00:02:38,599 --> 00:02:43,360
127
+ proofreading the transcripts that it is spelled differently if it's an adjective or a verb.
128
+
129
+ 33
130
+ 00:02:43,560 --> 00:02:44,479
131
+ That's a really good insight.
132
+
133
+ 34
134
+ 00:02:44,500 --> 00:02:48,020
135
+ So I noticed like the misspelling and I was like, but usually I thought it did end with
136
+
137
+ 35
138
+ 00:02:48,020 --> 00:02:52,640
139
+ an E. So this is interesting. We're going to share this today. This is a common error
140
+
141
+ 36
142
+ 00:02:52,640 --> 00:02:57,979
143
+ by native speakers as well. So not something you really need to stress about. But if you can make
144
+
145
+ 37
146
+ 00:02:57,979 --> 00:03:03,680
147
+ this improvement, there will be times where people are impressed. If it's an email or a written memo or
148
+
149
+ 38
150
+ 00:03:03,680 --> 00:03:08,140
151
+ something, especially at work, and you spell this correctly, you're doing better than probably
152
+
153
+ 39
154
+ 00:03:08,139 --> 00:03:10,259
155
+ 80% of native English speakers.
156
+
157
+ 40
158
+ 00:03:10,699 --> 00:03:15,199
159
+ Oh, for sure. And I think even beyond just spelling it correctly, just using it is going
160
+
161
+ 41
162
+ 00:03:15,199 --> 00:03:21,739
163
+ to put you probably in the top 25% of native speakers, I think. I don't think this is an
164
+
165
+ 42
166
+ 00:03:21,739 --> 00:03:25,259
167
+ average word. I think this is an above average word to use, Aubrey. Don't you think?
168
+
169
+ 43
170
+ 00:03:25,619 --> 00:03:30,619
171
+ Yes, it's an impressive word. We're going to go into both the verb and the adjective. And we're also
172
+
173
+ 44
174
+ 00:03:30,619 --> 00:03:37,079
175
+ going to share a few more interesting ways to share this because connection isn't just about things you have
176
+
177
+ 45
178
+ 00:03:37,080 --> 00:03:42,580
179
+ in common and things you like. We also connect about things we loathe, things we dread, things we hate.
180
+
181
+ 46
182
+ 00:03:42,980 --> 00:03:45,920
183
+ So this is interesting too. You need to also have this vocab.
184
+
185
+ 47
186
+ 00:03:46,480 --> 00:03:51,740
187
+ Yeah. And it doesn't mean that you're a negative person. If you're always connecting, there can be
188
+
189
+ 48
190
+ 00:03:51,740 --> 00:03:55,740
191
+ very positive connection around something you don't want to do, especially if you share that in
192
+
193
+ 49
194
+ 00:03:55,740 --> 00:03:59,420
195
+ common with someone else. That can be a true connection moment as well.
196
+
197
+ 50
198
+ 00:03:59,860 --> 00:04:05,120
199
+ Yes, for sure. We also want to give a shout out. There was a comment on YouTube
200
+
201
+ 51
202
+ 00:04:05,120 --> 00:04:10,759
203
+ from Rehaman from India. And there wasn't a question, but they just said, I love all your
204
+
205
+ 52
206
+ 00:04:10,759 --> 00:04:15,960
207
+ podcasts. Could you please call out my name in your next podcast? So hello, Rehaman. Thank you
208
+
209
+ 53
210
+ 00:04:15,960 --> 00:04:20,379
211
+ for the YouTube comment. And we wanted to give you a shout out. Wonderful. And guys, don't forget to go
212
+
213
+ 54
214
+ 00:04:20,379 --> 00:04:25,040
215
+ ahead and hit that follow button wherever you're listening. Now, if you're over on YouTube, you can
216
+
217
+ 55
218
+ 00:04:25,040 --> 00:04:30,860
219
+ hit the subscribe button. Just subscribe right there. However, if you're on Apple Podcasts or Spotify,
220
+
221
+ 56
222
+ 00:04:30,860 --> 00:04:37,600
223
+ go ahead and hit follow. So you make sure you get all there's English five days a week. Okay. Yes.
224
+
225
+ 57
226
+ 00:04:37,680 --> 00:04:42,680
227
+ Awesome. All right. Let's dive in with this interesting vocabulary. So when we use the adjective
228
+
229
+ 58
230
+ 00:04:42,680 --> 00:04:50,920
231
+ loathe, it's always in this chunk loathe to, loathe to do something. And this means to intensely dislike
232
+
233
+ 59
234
+ 00:04:50,920 --> 00:04:56,980
235
+ or hate something. So like at the top of the episode, I'm loathe to swim in our pool. Or you might say,
236
+
237
+ 60
238
+ 00:04:56,980 --> 00:05:01,520
239
+ I'm loathe to go through haunted houses. I really don't like them. This is true for me.
240
+
241
+ 61
242
+ 00:05:01,759 --> 00:05:06,120
243
+ I don't like jump scares. I don't want to be like, that's not fun. Do you like haunted houses?
244
+
245
+ 62
246
+ 00:05:06,120 --> 00:05:11,560
247
+ I'm kind of the same way. I love, opposite word, I love going on ghost tours.
248
+
249
+ 63
250
+ 00:05:12,060 --> 00:05:12,960
251
+ Oh yeah, that I would like.
252
+
253
+ 64
254
+ 00:05:13,400 --> 00:05:17,560
255
+ Yeah. You could be on the sidewalk and you're safe and it's happening in that house, right? It's not
256
+
257
+ 65
258
+ 00:05:17,560 --> 00:05:21,819
259
+ happening to you. It's very different when you're in a haunted house and things are jumping out at you.
260
+
261
+ 66
262
+ 00:05:22,220 --> 00:05:26,720
263
+ Right. I think this is for my brother would do this when I was young. He would jump out from behind a door,
264
+
265
+ 67
266
+ 00:05:26,720 --> 00:05:31,520
267
+ and grab you and scream. And I really hate that now. I'm like, please don't jump scare.
268
+
269
+ 68
270
+ 00:05:31,940 --> 00:05:33,400
271
+ Yeah, no jump scare. I love it.
272
+
273
+ 69
274
+ 00:05:33,920 --> 00:05:38,300
275
+ But what's interesting is loathe this way as an adjective is spelled without an E. It's just
276
+
277
+ 70
278
+ 00:05:38,300 --> 00:05:45,220
279
+ L-O-A-T-H, loathe to do something. So that's tricky. What's another example of using it this way?
280
+
281
+ 71
282
+ 00:05:45,220 --> 00:05:51,320
283
+ So someone's habits, right? They're nocturnal. How awake are they? What are their habits? He has always
284
+
285
+ 72
286
+ 00:05:51,319 --> 00:05:57,599
287
+ been loathe to get up early. Okay. Yes. He's just a nighttime person, not a morning person.
288
+
289
+ 73
290
+ 00:05:58,040 --> 00:06:02,360
291
+ Exactly. Right. And like Lindsay said, this is a little less common, but it is impressive. It's
292
+
293
+ 74
294
+ 00:06:02,360 --> 00:06:06,759
295
+ impressive vocabulary. You will hear it. You definitely can use it, but the spelling's a
296
+
297
+ 75
298
+ 00:06:06,759 --> 00:06:11,379
299
+ little tricky because there's no E there. What about when there is an E, Lindsay? This is when
300
+
301
+ 76
302
+ 00:06:11,379 --> 00:06:15,939
303
+ it's a verb in the sentence. To be honest, this is a good review for me too, right? Remembering
304
+
305
+ 77
306
+ 00:06:15,939 --> 00:06:23,360
307
+ where the E goes. So we put an E at the end in this case, right? L-O-A-T-H-E, and that becomes a
308
+
309
+ 78
310
+ 00:06:23,360 --> 00:06:30,980
311
+ verb. Okay. So for example, I loathe driving in snowy weather. And I think native speakers tend to
312
+
313
+ 79
314
+ 00:06:30,980 --> 00:06:37,500
315
+ punch that loathe too. I agree. That almost would be strange because it's like intense hatred or dread.
316
+
317
+ 80
318
+ 00:06:37,500 --> 00:06:41,199
319
+ So it would be strange to be like, I loathe driving in snowy weather. Yeah.
320
+
321
+ 81
322
+ 00:06:41,980 --> 00:06:44,399
323
+ Emphasize it. I loathe driving in snowy weather.
324
+
325
+ 82
326
+ 00:06:44,759 --> 00:06:49,459
327
+ And it's also one of those kind of full mouth words. You need everything happening. So we really
328
+
329
+ 83
330
+ 00:06:49,459 --> 00:06:55,300
331
+ indulge in saying that word. Okay. I agree. Absolutely. Or maybe she loathes doing homework,
332
+
333
+ 84
334
+ 00:06:55,639 --> 00:06:59,160
335
+ right? This has, it can't be something that you just sort of don't like a little bit.
336
+
337
+ 85
338
+ 00:06:59,279 --> 00:07:01,540
339
+ It really has to be a pretty intense feeling.
340
+
341
+ 86
342
+ 00:07:02,680 --> 00:07:06,620
343
+ Love it. Love it. All right, Aubrey, where to now? What else do we need to know?
344
+
345
+ 87
346
+ 00:07:06,620 --> 00:07:11,540
347
+ Yeah. So just the main thing is that these are pronounced exactly the same. So when you're
348
+
349
+ 88
350
+ 00:07:11,540 --> 00:07:15,639
351
+ speaking, you don't have to worry about this at all. It's like effect and effect that we've talked
352
+
353
+ 89
354
+ 00:07:15,639 --> 00:07:20,819
355
+ about recently. Yes. Doesn't matter when you're speaking, but they are spelled differently. So
356
+
357
+ 90
358
+ 00:07:20,819 --> 00:07:25,180
359
+ when you're writing, if you see the word loathe, you need to take a second. Okay. Is this being used
360
+
361
+ 91
362
+ 00:07:25,180 --> 00:07:31,100
363
+ as an adjective or a verb? And the big pro tip here is if it's loathe to, loathe to do something,
364
+
365
+ 92
366
+ 00:07:31,240 --> 00:07:33,280
367
+ that's when there's no E because it's an adjective.
368
+
369
+ 93
370
+ 00:07:33,279 --> 00:07:36,939
371
+ Hmm. Really good to know. So if we're taking the IELTS exam, for example,
372
+
373
+ 94
374
+ 00:07:37,359 --> 00:07:41,579
375
+ or if we're just writing a business email, we need to know, or even a text message to a friend,
376
+
377
+ 95
378
+ 00:07:41,699 --> 00:07:45,659
379
+ we need to know the difference here. Okay. Absolutely. But let's share some other
380
+
381
+ 96
382
+ 00:07:45,659 --> 00:07:51,199
383
+ interesting options because if you're talking about being upset about something or angry or
384
+
385
+ 97
386
+ 00:07:51,199 --> 00:07:56,399
387
+ something you dread or hate doing, there are so many interesting words to ways to say this,
388
+
389
+ 98
390
+ 00:07:56,459 --> 00:08:02,479
391
+ right? Yes. Yes. I really like the word abhor. Yeah. This is actually even stronger than loathe,
392
+
393
+ 99
394
+ 00:08:02,480 --> 00:08:06,860
395
+ if you can believe it, because loathe is pretty strong. But if you say you abhor something,
396
+
397
+ 100
398
+ 00:08:06,860 --> 00:08:10,960
399
+ that's like the most intense dislike that you can get.
400
+
401
+ 101
402
+ 00:08:11,439 --> 00:08:17,460
403
+ Ooh. Okay. So for example, I abhor small spaces. I'm definitely claustrophobic. Are you
404
+
405
+ 102
406
+ 00:08:17,460 --> 00:08:20,340
407
+ claustrophobic, Aubrey? Do you mind being in an elevator?
408
+
409
+ 103
410
+ 00:08:20,340 --> 00:08:21,520
411
+ I don't think so. What about you?
412
+
413
+ 104
414
+ 00:08:21,920 --> 00:08:25,700
415
+ No, I don't think so. I had an apartment in New York or Tokyo too.
416
+
417
+ 105
418
+ 00:08:25,699 --> 00:08:29,039
419
+ Right. If ever you lived in New York, you can't be claustrophobic.
420
+
421
+ 106
422
+ 00:08:30,079 --> 00:08:33,819
423
+ Basically living in a closet. Yeah, for sure. But if I had to be in an elevator for
424
+
425
+ 107
426
+ 00:08:33,819 --> 00:08:38,600
427
+ hours and hours, then I might start getting claustrophobic. I can imagine the length of
428
+
429
+ 108
430
+ 00:08:38,600 --> 00:08:41,799
431
+ time definitely would affect that. Yeah. Because your mind would start running,
432
+
433
+ 109
434
+ 00:08:42,019 --> 00:08:42,860
435
+ right? That's the key. Absolutely.
436
+
437
+ 110
438
+ 00:08:42,860 --> 00:08:44,379
439
+ Okay. What's another?
440
+
441
+ 111
442
+ 00:08:44,500 --> 00:08:49,680
443
+ Or maybe this is about Michelle. She pours olives. She doesn't even like the smell of them.
444
+
445
+ 112
446
+ 00:08:50,379 --> 00:08:54,500
447
+ I know. I think Michelle doesn't love olives. I love them so much.
448
+
449
+ 113
450
+ 00:08:54,779 --> 00:08:59,820
451
+ And I'm sure a lot of our listeners love. I mean, it's a Mediterranean food. I love olives. I can't
452
+
453
+ 114
454
+ 00:08:59,820 --> 00:09:03,980
455
+ imagine a life without olives or olive oil. I mean, I eat so much olive oil.
456
+
457
+ 115
458
+ 00:09:04,159 --> 00:09:08,080
459
+ I have a feeling Michelle does eat olive oil. I think it's just the taste of like actual olives,
460
+
461
+ 116
462
+ 00:09:08,080 --> 00:09:12,779
463
+ a bowl of green and Kalamata olives, but she is missing out. I love them so much.
464
+
465
+ 117
466
+ 00:09:13,759 --> 00:09:19,480
467
+ So abhor is a good one. And then disdain is good. This is a noun, right?
468
+
469
+ 118
470
+ 00:09:19,700 --> 00:09:23,899
471
+ Well, this is tricky. It exists as a noun and a verb. So let's go over the noun first. So this
472
+
473
+ 119
474
+ 00:09:23,899 --> 00:09:27,980
475
+ is the feeling that something isn't worthy of respect or consideration. So maybe like
476
+
477
+ 120
478
+ 00:09:27,980 --> 00:09:35,000
479
+ she looked at him with disdain. So this is a noun here, right? It's what she's using to look that her
480
+
481
+ 121
482
+ 00:09:35,000 --> 00:09:40,300
483
+ expression has disdain in it. Disdain, like not giving the person a chance, very close-minded,
484
+
485
+ 122
486
+ 00:09:40,299 --> 00:09:42,579
487
+ very negative, right? Absolutely.
488
+
489
+ 123
490
+ 00:09:43,199 --> 00:09:46,979
491
+ Or he's always treated her with disdain. Oh, strange word.
492
+
493
+ 124
494
+ 00:09:47,279 --> 00:09:50,719
495
+ Yes. Yeah, it is kind of a strange word. And it also exists as a verb. They are spelled the
496
+
497
+ 125
498
+ 00:09:50,719 --> 00:09:52,919
499
+ same, luckily, unlike loathe. Good.
500
+
501
+ 126
502
+ 00:09:52,919 --> 00:09:58,139
503
+ So this is like, if you treat someone like they're not worthy of respect, you can use this as a verb.
504
+
505
+ 127
506
+ 00:09:58,620 --> 00:10:05,699
507
+ So the main way I've seen this is if you disdain to answer questions. This is very rare
508
+
509
+ 128
510
+ 00:10:05,699 --> 00:10:10,039
511
+ in English that we use this as a verb. When I saw this in the dictionary, there's a verb and
512
+
513
+ 129
514
+ 00:10:10,039 --> 00:10:14,319
515
+ a noun. I had to think, I'm like, I don't hear that very often. So maybe if someone say like,
516
+
517
+ 130
518
+ 00:10:14,320 --> 00:10:19,820
519
+ I disdained to answer the questions or in present. And this is like, because you dislike the questions
520
+
521
+ 131
522
+ 00:10:19,820 --> 00:10:25,680
523
+ or maybe if it were like on a crime show, be like, she disdained to answer. But this is definitely a
524
+
525
+ 132
526
+ 00:10:25,680 --> 00:10:31,500
527
+ connotation of you are not answering them because you disagree with them. Something like that,
528
+
529
+ 133
530
+ 00:10:31,560 --> 00:10:32,379
531
+ right? Interesting.
532
+
533
+ 134
534
+ 00:10:32,379 --> 00:10:34,879
535
+ You don't respect the questions for whatever reason.
536
+
537
+ 135
538
+ 00:10:35,180 --> 00:10:38,640
539
+ Yeah. I mean, the courtroom idea, I know in a courtroom, well, at least on courtroom dramas,
540
+
541
+ 136
542
+ 00:10:38,639 --> 00:10:43,360
543
+ we hear the word sustained a lot, right? A different word, but maybe in a similar family
544
+
545
+ 137
546
+ 00:10:43,360 --> 00:10:44,519
547
+ sort of thing. I don't know.
548
+
549
+ 138
550
+ 00:10:44,720 --> 00:10:47,799
551
+ Good point. Good point. So this is really interesting too, to think about.
552
+
553
+ 139
554
+ 00:10:48,360 --> 00:10:54,460
555
+ Some of these we use more often in the past tense, right? We wouldn't really use abhor in
556
+
557
+ 140
558
+ 00:10:54,460 --> 00:11:00,319
559
+ the past tense. We're like, I abhorred that film. I loathed that restaurant. Not really, right?
560
+
561
+ 141
562
+ 00:11:00,500 --> 00:11:06,120
563
+ Instead, we usually use these in the present because they're such strong feelings. It's more like,
564
+
565
+ 142
566
+ 00:11:06,120 --> 00:11:11,960
567
+ this is something I always will always abhor or loathe because it's a strong, it's hard to like
568
+
569
+ 143
570
+ 00:11:11,960 --> 00:11:17,320
571
+ hate something that strongly that just happened once, like one trip to a restaurant or something,
572
+
573
+ 144
574
+ 00:11:17,460 --> 00:11:20,820
575
+ right? I see what you're saying. So it's almost like a state of being, this feeling.
576
+
577
+ 145
578
+ 00:11:20,820 --> 00:11:22,360
579
+ Yes. Almost like state of being.
580
+
581
+ 146
582
+ 00:11:22,360 --> 00:11:26,000
583
+ Exactly. That's how strong it is, right? If it's something we don't like, we probably would say
584
+
585
+ 147
586
+ 00:11:26,000 --> 00:11:32,820
587
+ like, oh, I really hated that. I absolutely detested it. Or I would say, I couldn't stand it.
588
+
589
+ 148
590
+ 00:11:33,019 --> 00:11:33,279
591
+ Yes.
592
+
593
+ 149
594
+ 00:11:33,340 --> 00:11:35,419
595
+ I mean, I really didn't like it.
596
+
597
+ 150
598
+ 00:11:35,419 --> 00:11:39,819
599
+ Yes. I love that. That's a really important point. So it's the things that we really hate
600
+
601
+ 151
602
+ 00:11:39,819 --> 00:11:46,819
603
+ and we've always hated. We just don't identify with those things. We're using abhor or loathe,
604
+
605
+ 152
606
+ 00:11:47,519 --> 00:11:53,000
607
+ whereas it's a single experience or a movie or a meal, lighter things like hate. I mean,
608
+
609
+ 153
610
+ 00:11:53,059 --> 00:11:57,939
611
+ light is not, hate is not light, but like we don't get that passionate about like an individual
612
+
613
+ 154
614
+ 00:11:57,939 --> 00:12:03,360
615
+ experience, right? So these words, this is what's important to, to pay attention to here is
616
+
617
+ 155
618
+ 00:12:03,360 --> 00:12:09,940
619
+ these words really signify that a passionate dislike. And so it is strange if you use them.
620
+
621
+ 156
622
+ 00:12:10,080 --> 00:12:14,379
623
+ We don't even really use them jokingly to talk about how much we dislike something. I guess you
624
+
625
+ 157
626
+ 00:12:14,379 --> 00:12:21,120
627
+ could be like, oh, I loathe spinach. And you're just joking, right? But for the most part, it's for
628
+
629
+ 158
630
+ 00:12:21,120 --> 00:12:24,039
631
+ things that you really hate, feel very strongly about.
632
+
633
+ 159
634
+ 00:12:24,039 --> 00:12:30,019
635
+ All right. Good to know. Excellent. Okay, Aubrey, we are back from break. Let's do a role play for
636
+
637
+ 160
638
+ 00:12:30,019 --> 00:12:35,059
639
+ today. Here we are discussing the foods that we dislike. Okay. Let's see.
640
+
641
+ 161
642
+ 00:12:35,059 --> 00:12:39,980
643
+ I'll start us out. I'm not usually picky, but I absolutely abhor eel.
644
+
645
+ 162
646
+ 00:12:40,559 --> 00:12:45,120
647
+ Really? I love eel rolls at a sushi place. Eel sauce is delicious.
648
+
649
+ 163
650
+ 00:12:45,120 --> 00:12:51,120
651
+ Yeah. It's not for me. I'm loathe to even think about eating eel. What about you? Are there any
652
+
653
+ 164
654
+ 00:12:51,120 --> 00:12:56,560
655
+ foods that you can't stand? Hmm. Yeah. I've always had disdain for masago.
656
+
657
+ 165
658
+ 00:12:57,060 --> 00:13:02,039
659
+ Oh, that's fish eggs, right? Yeah. I don't know what it is, but I loathe it.
660
+
661
+ 166
662
+ 00:13:02,440 --> 00:13:08,120
663
+ Okay. Nice. So this is possible that you like really dislike certain foods and we would use,
664
+
665
+ 167
666
+ 00:13:08,179 --> 00:13:12,740
667
+ you know, abhor, loathe, whatever. But if you just mean to say like, I don't really like that very much,
668
+
669
+ 168
670
+ 00:13:12,740 --> 00:13:17,519
671
+ we wouldn't use these bigger, stronger words. I think a lot of people have issues around texture,
672
+
673
+ 169
674
+ 00:13:17,799 --> 00:13:21,159
675
+ right? The texture of things. Even some people don't like the texture of eggs,
676
+
677
+ 170
678
+ 00:13:21,860 --> 00:13:25,500
679
+ but sort of fish eggs, the texture, I think people probably struggle with.
680
+
681
+ 171
682
+ 00:13:25,680 --> 00:13:29,820
683
+ That's why I like eel, but I know someone who doesn't and I think it's a texture thing.
684
+
685
+ 172
686
+ 00:13:30,279 --> 00:13:34,680
687
+ Yeah. No, eel is great. When I lived in Japan for my last meal there, my friends,
688
+
689
+ 173
690
+ 00:13:34,759 --> 00:13:39,720
691
+ my Japanese friends took me to an eel restaurant, a place like in the mountains that specialized just in
692
+
693
+ 174
694
+ 00:13:39,720 --> 00:13:45,180
695
+ eel. It was amazing. Wow. That's awesome. And I do love eel sauce. This is me. I think it's so
696
+
697
+ 175
698
+ 00:13:45,180 --> 00:13:50,300
699
+ tasty. It's a little bit sweeter on some rolls. Oh, so delicious. Yes, exactly. Let's go through
700
+
701
+ 176
702
+ 00:13:50,300 --> 00:13:56,379
703
+ this. So again, you said, I'm usually not picky, but I absolutely abhor eel. It's kind of a weird
704
+
705
+ 177
706
+ 00:13:56,379 --> 00:14:00,720
707
+ word too, because we are kind of pronouncing the H, aren't we, Aubrey? Yes, we do have to,
708
+
709
+ 178
710
+ 00:14:00,820 --> 00:14:05,160
711
+ right? You hit that H kind of hard, abhor. And this is another one where you have to like
712
+
713
+ 179
714
+ 00:14:05,159 --> 00:14:09,639
715
+ hit the word hard. You have to emphasize it just like loathe. It's a little strange. We're like,
716
+
717
+ 180
718
+ 00:14:09,679 --> 00:14:14,819
719
+ I really abhor that. No, you need the intonation needs to be powerful because the emotion,
720
+
721
+ 181
722
+ 00:14:15,000 --> 00:14:18,579
723
+ the passion about the dislike is powerful. The word implies that.
724
+
725
+ 182
726
+ 00:14:19,019 --> 00:14:21,980
727
+ Yeah. We don't want to have that flat affect, right? We want to make sure we're
728
+
729
+ 183
730
+ 00:14:21,980 --> 00:14:24,659
731
+ putting life into our words here. What else, Aubrey?
732
+
733
+ 184
734
+ 00:14:24,899 --> 00:14:30,199
735
+ Yeah. And then I said, oh, it's not for me. I'm loathe to even think about eating eel. So this is where
736
+
737
+ 185
738
+ 00:14:30,200 --> 00:14:35,740
739
+ it's an adjective. I'm describing myself, myself as loathe to think about something.
740
+
741
+ 186
742
+ 00:14:36,259 --> 00:14:38,740
743
+ Hmm. Very interesting construction too. I'm loathe to even think.
744
+
745
+ 187
746
+ 00:14:38,740 --> 00:14:40,879
747
+ Yeah. So it would be written without the E there, right?
748
+
749
+ 188
750
+ 00:14:40,920 --> 00:14:47,620
751
+ Okay. No E there. Good. And then I said, hmm, yeah, I've always had disdain for masago.
752
+
753
+ 189
754
+ 00:14:48,080 --> 00:14:52,000
755
+ Hmm. Yeah. Since I've always, and this is an interesting thing to say, like disdain,
756
+
757
+ 190
758
+ 00:14:52,000 --> 00:14:58,800
759
+ because it does often imply more of like a lack of respect. So this would be sort of joking even to
760
+
761
+ 191
762
+ 00:14:58,799 --> 00:15:03,559
763
+ say this about food. Be like, oh, I have such disdain for masago. Like I would probably laugh
764
+
765
+ 192
766
+ 00:15:03,559 --> 00:15:08,819
767
+ at that. I'd be like, that's really funny because it usually does imply more like, um,
768
+
769
+ 193
770
+ 00:15:09,199 --> 00:15:14,419
771
+ you, you, you are critical of something. You, you disrespect something.
772
+
773
+ 194
774
+ 00:15:15,120 --> 00:15:19,019
775
+ Right. For bigger things than just food. Like food is just a taste. That's all it is. It's
776
+
777
+ 195
778
+ 00:15:19,019 --> 00:15:25,019
779
+ sort of one dimensional. Right. But for example, like political parties or certain ways of thinking
780
+
781
+ 196
782
+ 00:15:25,019 --> 00:15:29,139
783
+ would be more common for disdain, right? That's where you would use it more literally,
784
+
785
+ 197
786
+ 00:15:29,340 --> 00:15:33,340
787
+ right? This is kind of more joking, but yeah, you could say you have disdain for a certain
788
+
789
+ 198
790
+ 00:15:33,340 --> 00:15:37,899
791
+ political candidate or people who agree with a certain policy. That would be more because it's
792
+
793
+ 199
794
+ 00:15:37,899 --> 00:15:42,699
795
+ more of a lack of respect for that for sure. And then the last thing that was said was, yeah,
796
+
797
+ 200
798
+ 00:15:42,699 --> 00:15:48,600
799
+ I don't know what it is, but I loathe it. Right. So they just mean like, I really hate it. Yeah.
800
+
801
+ 201
802
+ 00:15:48,600 --> 00:15:54,300
803
+ And this is because it's a verb. This would be spelled with an E L O A T H E. All right. Good
804
+
805
+ 202
806
+ 00:15:54,300 --> 00:16:00,200
807
+ high level episode for our listeners today at the B2 C1 level. I love it. Yes, absolutely. We want to
808
+
809
+ 203
810
+ 00:16:00,200 --> 00:16:06,060
811
+ make sure that you don't miss the episode that inspired this and 2402 right here on All There's
812
+
813
+ 204
814
+ 00:16:06,060 --> 00:16:11,600
815
+ English. Should you call someone cheap in English where we said sometimes people are loathe to spend
816
+
817
+ 205
818
+ 00:16:11,600 --> 00:16:17,019
819
+ money. So use it as an adjective there. So it wouldn't have an E. Yes. And the connection piece
820
+
821
+ 206
822
+ 00:16:17,019 --> 00:16:22,079
823
+ here guys for today is it's not always, it doesn't have to be about these positive, amazing things
824
+
825
+ 207
826
+ 00:16:22,079 --> 00:16:27,500
827
+ that you're connecting over. You can build just as strong of a connection over things that you loathe
828
+
829
+ 208
830
+ 00:16:27,500 --> 00:16:33,079
831
+ and have disdain for. Okay. Absolutely. Yes. Right. I feel like that would even bond you closer with
832
+
833
+ 209
834
+ 00:16:33,079 --> 00:16:38,279
835
+ someone if you are sort of willing to admit these more negative things. I don't love everything,
836
+
837
+ 210
838
+ 00:16:38,399 --> 00:16:42,699
839
+ right? The people I'm close to, I want to build a stronger connection with. I'm going to admit the things
840
+
841
+ 211
842
+ 00:16:42,700 --> 00:16:47,820
843
+ that I really abhor. Especially if it's very, like most people love this thing, but you actually
844
+
845
+ 212
846
+ 00:16:47,820 --> 00:16:53,080
847
+ have disdain for it, right? Yeah. A hot take. Yes, exactly. That could build a really strong sense
848
+
849
+ 213
850
+ 00:16:53,080 --> 00:16:57,740
851
+ of trust and connection right there. So that's what we're going for. All right. Good stuff, Aubrey.
852
+
853
+ 214
854
+ 00:16:57,840 --> 00:17:02,140
855
+ We'll see you in the next episode of All There's English and guys hit the follow button now.
856
+
857
+ 215
858
+ 00:17:02,640 --> 00:17:06,240
859
+ Yes. Awesome. We'll see you guys next time. All right. Take care. Bye. Bye.
860
+
861
+ 216
862
+ 00:17:06,240 --> 00:17:16,019
863
+ Bye. Thanks for listening to All Ears English. Would you like to know your English level?
864
+
865
+ 217
866
+ 00:17:16,259 --> 00:17:23,339
867
+ Take our two-minute quiz. Go to allearsenglish.com forward slash fluency score. And if you believe
868
+
869
+ 218
870
+ 00:17:23,339 --> 00:17:31,420
871
+ in connection, not perfection, then hit subscribe now to make sure you don't miss anything. See you next time.
872
+
873
+ 219
874
+ 00:17:31,420 --> 00:17:32,420
875
+ Bye.
tests/cache/apple_podcast_episode.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is the All Ears English Podcast, Episode 2422. Don't be loathe to speak English. Welcome to the All Ears English Podcast, downloaded more than 200 million times. Are you feeling stuck with your English? We'll show you how to become fearless and fluent by focusing on connection, not perfection, with your American host, Aubrey Carter, the IELTS whiz, and Lindsay McMahon, the English adventurer, coming to you from Arizona and Colorado, USA. Reach the top 25% of all native speakers by understanding these three ways to say that you dislike something in English. Do you ever wish that you could just hang out with native speakers and pick up real English naturally? That's what it's like here on All Ears English. We're two friends having real conversations, no scripts, no lectures, just fun, honest talk about life, culture, and how to connect in English. It's like grabbing coffee with us and leaving with better English every time. But if you're not following All Ears English, then you're probably missing some episodes. So go ahead and hit the follow button now, wherever you listen to the show. Hit follow now and come check us out five days a week. Hey there, Aubrey, what's shaking? Not much. How are you, Lindsay? Feeling great today, but I have a key question for you today. You ready? Yeah, let's hear it. So Aubrey, in your daily life or just maybe right now in this moment, is there anything you are loathe to do? You know, I am loathe to swim with my kids right now. Our pool is still ice cold, in my opinion. It's way too cold and they are ready to swim. They're getting in. They're like, mom, come swim with me. And it's way too cold for me. So I make excuses and I really, I'm like, I'll just dip my toes in. It's crazy. So I am really loathe to get in that cold swimming pool. It's funny how we lose our nerves as we get older. It becomes harder and harder to get into swimming pools or cold lakes or the ocean. When we're kids, we just run in like, we didn't mind at all. I know. Maybe our nerve endings are less sensitive when we're younger. I don't know. We just don't care. We just don't care. I don't know. We don't think about it. I don't know. We think a lot as adults, don't we? Yeah, for sure. Well, this is interesting. This word loathe came up in a recent episode. Stay to the end and we'll share which one it is in case you missed it. But I realized when proofreading the transcripts that it is spelled differently if it's an adjective or a verb. That's a really good insight. So I noticed like the misspelling and I was like, but usually I thought it did end with an E. So this is interesting. We're going to share this today. This is a common error by native speakers as well. So not something you really need to stress about. But if you can make this improvement, there will be times where people are impressed. If it's an email or a written memo or something, especially at work, and you spell this correctly, you're doing better than probably 80% of native English speakers. Oh, for sure. And I think even beyond just spelling it correctly, just using it is going to put you probably in the top 25% of native speakers, I think. I don't think this is an average word. I think this is an above average word to use, Aubrey. Don't you think? Yes, it's an impressive word. We're going to go into both the verb and the adjective. And we're also going to share a few more interesting ways to share this because connection isn't just about things you have in common and things you like. We also connect about things we loathe, things we dread, things we hate. So this is interesting too. You need to also have this vocab. Yeah. And it doesn't mean that you're a negative person. If you're always connecting, there can be very positive connection around something you don't want to do, especially if you share that in common with someone else. That can be a true connection moment as well. Yes, for sure. We also want to give a shout out. There was a comment on YouTube from Rehaman from India. And there wasn't a question, but they just said, I love all your podcasts. Could you please call out my name in your next podcast? So hello, Rehaman. Thank you for the YouTube comment. And we wanted to give you a shout out. Wonderful. And guys, don't forget to go ahead and hit that follow button wherever you're listening. Now, if you're over on YouTube, you can hit the subscribe button. Just subscribe right there. However, if you're on Apple Podcasts or Spotify, go ahead and hit follow. So you make sure you get all there's English five days a week. Okay. Yes. Awesome. All right. Let's dive in with this interesting vocabulary. So when we use the adjective loathe, it's always in this chunk loathe to, loathe to do something. And this means to intensely dislike or hate something. So like at the top of the episode, I'm loathe to swim in our pool. Or you might say, I'm loathe to go through haunted houses. I really don't like them. This is true for me. I don't like jump scares. I don't want to be like, that's not fun. Do you like haunted houses? I'm kind of the same way. I love, opposite word, I love going on ghost tours. Oh yeah, that I would like. Yeah. You could be on the sidewalk and you're safe and it's happening in that house, right? It's not happening to you. It's very different when you're in a haunted house and things are jumping out at you. Right. I think this is for my brother would do this when I was young. He would jump out from behind a door, and grab you and scream. And I really hate that now. I'm like, please don't jump scare. Yeah, no jump scare. I love it. But what's interesting is loathe this way as an adjective is spelled without an E. It's just L-O-A-T-H, loathe to do something. So that's tricky. What's another example of using it this way? So someone's habits, right? They're nocturnal. How awake are they? What are their habits? He has always been loathe to get up early. Okay. Yes. He's just a nighttime person, not a morning person. Exactly. Right. And like Lindsay said, this is a little less common, but it is impressive. It's impressive vocabulary. You will hear it. You definitely can use it, but the spelling's a little tricky because there's no E there. What about when there is an E, Lindsay? This is when it's a verb in the sentence. To be honest, this is a good review for me too, right? Remembering where the E goes. So we put an E at the end in this case, right? L-O-A-T-H-E, and that becomes a verb. Okay. So for example, I loathe driving in snowy weather. And I think native speakers tend to punch that loathe too. I agree. That almost would be strange because it's like intense hatred or dread. So it would be strange to be like, I loathe driving in snowy weather. Yeah. Emphasize it. I loathe driving in snowy weather. And it's also one of those kind of full mouth words. You need everything happening. So we really indulge in saying that word. Okay. I agree. Absolutely. Or maybe she loathes doing homework, right? This has, it can't be something that you just sort of don't like a little bit. It really has to be a pretty intense feeling. Love it. Love it. All right, Aubrey, where to now? What else do we need to know? Yeah. So just the main thing is that these are pronounced exactly the same. So when you're speaking, you don't have to worry about this at all. It's like effect and effect that we've talked about recently. Yes. Doesn't matter when you're speaking, but they are spelled differently. So when you're writing, if you see the word loathe, you need to take a second. Okay. Is this being used as an adjective or a verb? And the big pro tip here is if it's loathe to, loathe to do something, that's when there's no E because it's an adjective. Hmm. Really good to know. So if we're taking the IELTS exam, for example, or if we're just writing a business email, we need to know, or even a text message to a friend, we need to know the difference here. Okay. Absolutely. But let's share some other interesting options because if you're talking about being upset about something or angry or something you dread or hate doing, there are so many interesting words to ways to say this, right? Yes. Yes. I really like the word abhor. Yeah. This is actually even stronger than loathe, if you can believe it, because loathe is pretty strong. But if you say you abhor something, that's like the most intense dislike that you can get. Ooh. Okay. So for example, I abhor small spaces. I'm definitely claustrophobic. Are you claustrophobic, Aubrey? Do you mind being in an elevator? I don't think so. What about you? No, I don't think so. I had an apartment in New York or Tokyo too. Right. If ever you lived in New York, you can't be claustrophobic. Basically living in a closet. Yeah, for sure. But if I had to be in an elevator for hours and hours, then I might start getting claustrophobic. I can imagine the length of time definitely would affect that. Yeah. Because your mind would start running, right? That's the key. Absolutely. Okay. What's another? Or maybe this is about Michelle. She pours olives. She doesn't even like the smell of them. I know. I think Michelle doesn't love olives. I love them so much. And I'm sure a lot of our listeners love. I mean, it's a Mediterranean food. I love olives. I can't imagine a life without olives or olive oil. I mean, I eat so much olive oil. I have a feeling Michelle does eat olive oil. I think it's just the taste of like actual olives, a bowl of green and Kalamata olives, but she is missing out. I love them so much. So abhor is a good one. And then disdain is good. This is a noun, right? Well, this is tricky. It exists as a noun and a verb. So let's go over the noun first. So this is the feeling that something isn't worthy of respect or consideration. So maybe like she looked at him with disdain. So this is a noun here, right? It's what she's using to look that her expression has disdain in it. Disdain, like not giving the person a chance, very close-minded, very negative, right? Absolutely. Or he's always treated her with disdain. Oh, strange word. Yes. Yeah, it is kind of a strange word. And it also exists as a verb. They are spelled the same, luckily, unlike loathe. Good. So this is like, if you treat someone like they're not worthy of respect, you can use this as a verb. So the main way I've seen this is if you disdain to answer questions. This is very rare in English that we use this as a verb. When I saw this in the dictionary, there's a verb and a noun. I had to think, I'm like, I don't hear that very often. So maybe if someone say like, I disdained to answer the questions or in present. And this is like, because you dislike the questions or maybe if it were like on a crime show, be like, she disdained to answer. But this is definitely a connotation of you are not answering them because you disagree with them. Something like that, right? Interesting. You don't respect the questions for whatever reason. Yeah. I mean, the courtroom idea, I know in a courtroom, well, at least on courtroom dramas, we hear the word sustained a lot, right? A different word, but maybe in a similar family sort of thing. I don't know. Good point. Good point. So this is really interesting too, to think about. Some of these we use more often in the past tense, right? We wouldn't really use abhor in the past tense. We're like, I abhorred that film. I loathed that restaurant. Not really, right? Instead, we usually use these in the present because they're such strong feelings. It's more like, this is something I always will always abhor or loathe because it's a strong, it's hard to like hate something that strongly that just happened once, like one trip to a restaurant or something, right? I see what you're saying. So it's almost like a state of being, this feeling. Yes. Almost like state of being. Exactly. That's how strong it is, right? If it's something we don't like, we probably would say like, oh, I really hated that. I absolutely detested it. Or I would say, I couldn't stand it. Yes. I mean, I really didn't like it. Yes. I love that. That's a really important point. So it's the things that we really hate and we've always hated. We just don't identify with those things. We're using abhor or loathe, whereas it's a single experience or a movie or a meal, lighter things like hate. I mean, light is not, hate is not light, but like we don't get that passionate about like an individual experience, right? So these words, this is what's important to, to pay attention to here is these words really signify that a passionate dislike. And so it is strange if you use them. We don't even really use them jokingly to talk about how much we dislike something. I guess you could be like, oh, I loathe spinach. And you're just joking, right? But for the most part, it's for things that you really hate, feel very strongly about. All right. Good to know. Excellent. Okay, Aubrey, we are back from break. Let's do a role play for today. Here we are discussing the foods that we dislike. Okay. Let's see. I'll start us out. I'm not usually picky, but I absolutely abhor eel. Really? I love eel rolls at a sushi place. Eel sauce is delicious. Yeah. It's not for me. I'm loathe to even think about eating eel. What about you? Are there any foods that you can't stand? Hmm. Yeah. I've always had disdain for masago. Oh, that's fish eggs, right? Yeah. I don't know what it is, but I loathe it. Okay. Nice. So this is possible that you like really dislike certain foods and we would use, you know, abhor, loathe, whatever. But if you just mean to say like, I don't really like that very much, we wouldn't use these bigger, stronger words. I think a lot of people have issues around texture, right? The texture of things. Even some people don't like the texture of eggs, but sort of fish eggs, the texture, I think people probably struggle with. That's why I like eel, but I know someone who doesn't and I think it's a texture thing. Yeah. No, eel is great. When I lived in Japan for my last meal there, my friends, my Japanese friends took me to an eel restaurant, a place like in the mountains that specialized just in eel. It was amazing. Wow. That's awesome. And I do love eel sauce. This is me. I think it's so tasty. It's a little bit sweeter on some rolls. Oh, so delicious. Yes, exactly. Let's go through this. So again, you said, I'm usually not picky, but I absolutely abhor eel. It's kind of a weird word too, because we are kind of pronouncing the H, aren't we, Aubrey? Yes, we do have to, right? You hit that H kind of hard, abhor. And this is another one where you have to like hit the word hard. You have to emphasize it just like loathe. It's a little strange. We're like, I really abhor that. No, you need the intonation needs to be powerful because the emotion, the passion about the dislike is powerful. The word implies that. Yeah. We don't want to have that flat affect, right? We want to make sure we're putting life into our words here. What else, Aubrey? Yeah. And then I said, oh, it's not for me. I'm loathe to even think about eating eel. So this is where it's an adjective. I'm describing myself, myself as loathe to think about something. Hmm. Very interesting construction too. I'm loathe to even think. Yeah. So it would be written without the E there, right? Okay. No E there. Good. And then I said, hmm, yeah, I've always had disdain for masago. Hmm. Yeah. Since I've always, and this is an interesting thing to say, like disdain, because it does often imply more of like a lack of respect. So this would be sort of joking even to say this about food. Be like, oh, I have such disdain for masago. Like I would probably laugh at that. I'd be like, that's really funny because it usually does imply more like, um, you, you, you are critical of something. You, you disrespect something. Right. For bigger things than just food. Like food is just a taste. That's all it is. It's sort of one dimensional. Right. But for example, like political parties or certain ways of thinking would be more common for disdain, right? That's where you would use it more literally, right? This is kind of more joking, but yeah, you could say you have disdain for a certain political candidate or people who agree with a certain policy. That would be more because it's more of a lack of respect for that for sure. And then the last thing that was said was, yeah, I don't know what it is, but I loathe it. Right. So they just mean like, I really hate it. Yeah. And this is because it's a verb. This would be spelled with an E L O A T H E. All right. Good high level episode for our listeners today at the B2 C1 level. I love it. Yes, absolutely. We want to make sure that you don't miss the episode that inspired this and 2402 right here on All There's English. Should you call someone cheap in English where we said sometimes people are loathe to spend money. So use it as an adjective there. So it wouldn't have an E. Yes. And the connection piece here guys for today is it's not always, it doesn't have to be about these positive, amazing things that you're connecting over. You can build just as strong of a connection over things that you loathe and have disdain for. Okay. Absolutely. Yes. Right. I feel like that would even bond you closer with someone if you are sort of willing to admit these more negative things. I don't love everything, right? The people I'm close to, I want to build a stronger connection with. I'm going to admit the things that I really abhor. Especially if it's very, like most people love this thing, but you actually have disdain for it, right? Yeah. A hot take. Yes, exactly. That could build a really strong sense of trust and connection right there. So that's what we're going for. All right. Good stuff, Aubrey. We'll see you in the next episode of All There's English and guys hit the follow button now. Yes. Awesome. We'll see you guys next time. All right. Take care. Bye. Bye. Bye. Thanks for listening to All Ears English. Would you like to know your English level? Take our two-minute quiz. Go to allearsenglish.com forward slash fluency score. And if you believe in connection, not perfection, then hit subscribe now to make sure you don't miss anything. See you next time. Bye.
tests/cache/synthetic_multi_speaker.srt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 1
2
+ 00:00:00,000 --> 00:00:29,980
3
+ Thank you.
tests/cache/synthetic_multi_speaker.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Thank you.
tests/cache/synthetic_multi_speaker.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb6c0b965c182d27bd29d3a29117a3617934869132097ab6665ad4f4c811064
3
+ size 960044
tests/cache/transcribe/speaker_diarization/apple_podcast_episode/with_speaker_diarization_result.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processing_status": "success",
3
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749530106.txt",
4
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749530106.srt",
5
+ "audio_duration": 1051.44,
6
+ "segment_count": 219,
7
+ "language_detected": "en",
8
+ "model_used": "turbo",
9
+ "distributed_processing": true,
10
+ "chunks_processed": 18,
11
+ "chunks_failed": 0,
12
+ "speaker_diarization_enabled": true,
13
+ "global_speaker_count": 0,
14
+ "speakers_detected": [],
15
+ "speaker_summary": {}
16
+ }
tests/cache/transcribe/speaker_diarization/apple_podcast_episode/without_speaker_diarization_result.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processing_status": "success",
3
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529970.txt",
4
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529970.srt",
5
+ "audio_duration": 1051.44,
6
+ "segment_count": 222,
7
+ "language_detected": "en",
8
+ "model_used": "turbo",
9
+ "distributed_processing": true,
10
+ "chunks_processed": 18,
11
+ "chunks_failed": 0,
12
+ "speaker_diarization_enabled": false
13
+ }
tests/cache/transcribe/speaker_diarization/comprehensive_test_results.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_file": "tests/cache/xyz_podcast_episode.mp3",
4
+ "file_size_mb": 11.427632331848145,
5
+ "tests": {
6
+ "without_speaker_diarization": {
7
+ "config": {
8
+ "name": "without_speaker_diarization",
9
+ "enable_speaker_diarization": false,
10
+ "model_size": "turbo",
11
+ "description": "Baseline transcription without speaker identification"
12
+ },
13
+ "result": {
14
+ "processing_status": "success",
15
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529774.txt",
16
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529774.srt",
17
+ "audio_duration": 749.98,
18
+ "segment_count": 232,
19
+ "language_detected": "zh",
20
+ "model_used": "turbo",
21
+ "distributed_processing": true,
22
+ "chunks_processed": 13,
23
+ "chunks_failed": 0,
24
+ "speaker_diarization_enabled": false
25
+ },
26
+ "processing_time": 60.62069916725159
27
+ },
28
+ "with_speaker_diarization": {
29
+ "config": {
30
+ "name": "with_speaker_diarization",
31
+ "enable_speaker_diarization": true,
32
+ "model_size": "turbo",
33
+ "description": "Full transcription with speaker identification"
34
+ },
35
+ "result": {
36
+ "processing_status": "success",
37
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529901.txt",
38
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529901.srt",
39
+ "audio_duration": 749.98,
40
+ "segment_count": 241,
41
+ "language_detected": "zh",
42
+ "model_used": "turbo",
43
+ "distributed_processing": true,
44
+ "chunks_processed": 13,
45
+ "chunks_failed": 0,
46
+ "speaker_diarization_enabled": true,
47
+ "global_speaker_count": 0,
48
+ "speakers_detected": [],
49
+ "speaker_summary": {}
50
+ },
51
+ "processing_time": 127.10918402671814
52
+ }
53
+ }
54
+ },
55
+ {
56
+ "audio_file": "tests/cache/apple_podcast_episode.mp3",
57
+ "file_size_mb": 32.30221080780029,
58
+ "tests": {
59
+ "without_speaker_diarization": {
60
+ "config": {
61
+ "name": "without_speaker_diarization",
62
+ "enable_speaker_diarization": false,
63
+ "model_size": "turbo",
64
+ "description": "Baseline transcription without speaker identification"
65
+ },
66
+ "result": {
67
+ "processing_status": "success",
68
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529970.txt",
69
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529970.srt",
70
+ "audio_duration": 1051.44,
71
+ "segment_count": 222,
72
+ "language_detected": "en",
73
+ "model_used": "turbo",
74
+ "distributed_processing": true,
75
+ "chunks_processed": 18,
76
+ "chunks_failed": 0,
77
+ "speaker_diarization_enabled": false
78
+ },
79
+ "processing_time": 68.2933440208435
80
+ },
81
+ "with_speaker_diarization": {
82
+ "config": {
83
+ "name": "with_speaker_diarization",
84
+ "enable_speaker_diarization": true,
85
+ "model_size": "turbo",
86
+ "description": "Full transcription with speaker identification"
87
+ },
88
+ "result": {
89
+ "processing_status": "success",
90
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749530106.txt",
91
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749530106.srt",
92
+ "audio_duration": 1051.44,
93
+ "segment_count": 219,
94
+ "language_detected": "en",
95
+ "model_used": "turbo",
96
+ "distributed_processing": true,
97
+ "chunks_processed": 18,
98
+ "chunks_failed": 0,
99
+ "speaker_diarization_enabled": true,
100
+ "global_speaker_count": 0,
101
+ "speakers_detected": [],
102
+ "speaker_summary": {}
103
+ },
104
+ "processing_time": 136.49856114387512
105
+ }
106
+ }
107
+ }
108
+ ]
tests/cache/transcribe/speaker_diarization/download_log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
tests/cache/transcribe/speaker_diarization/environment_status.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "unhealthy",
3
+ "timestamp": "2025-06-10T05:11:02.289001Z",
4
+ "whisper": {
5
+ "status": "healthy",
6
+ "default_model": "turbo",
7
+ "available_models": [
8
+ "tiny.en",
9
+ "tiny",
10
+ "base.en",
11
+ "base",
12
+ "small.en",
13
+ "small",
14
+ "medium.en",
15
+ "medium",
16
+ "large-v1",
17
+ "large-v2",
18
+ "large-v3",
19
+ "large",
20
+ "large-v3-turbo",
21
+ "turbo"
22
+ ],
23
+ "model_cache_exists": false,
24
+ "model_cache_directory": null,
25
+ "model_loaded": true,
26
+ "load_source": "download",
27
+ "whisper_version": "20240930"
28
+ },
29
+ "speaker_diarization": {
30
+ "status": "disabled",
31
+ "hf_token_available": false,
32
+ "speaker_cache_exists": false,
33
+ "speaker_cache_directory": null,
34
+ "config_exists": false,
35
+ "pipeline_loaded": false,
36
+ "pipeline_error": "HF_TOKEN not available",
37
+ "model_name": "pyannote/speaker-diarization-3.1"
38
+ },
39
+ "version": "1.0.0"
40
+ }
tests/cache/transcribe/speaker_diarization/local_vs_modal_comparison.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_audio": "tests/cache/synthetic_multi_speaker.wav",
3
+ "local_transcription": {
4
+ "result": {
5
+ "txt_file_path": "tests/cache/synthetic_multi_speaker.txt",
6
+ "srt_file_path": "tests/cache/synthetic_multi_speaker.srt",
7
+ "audio_file": "tests/cache/synthetic_multi_speaker.wav",
8
+ "model_used": "turbo",
9
+ "segment_count": 1,
10
+ "audio_duration": 29.98,
11
+ "processing_status": "success",
12
+ "saved_files": [
13
+ "tests/cache/synthetic_multi_speaker.txt",
14
+ "tests/cache/synthetic_multi_speaker.srt"
15
+ ],
16
+ "speaker_diarization_enabled": false,
17
+ "global_speaker_count": 0,
18
+ "speaker_summary": {},
19
+ "language_detected": "en",
20
+ "text": "Thank you.",
21
+ "segments": [
22
+ {
23
+ "start": 0.0,
24
+ "end": 29.98,
25
+ "text": "Thank you.",
26
+ "speaker": null
27
+ }
28
+ ]
29
+ },
30
+ "processing_time": 19.540891885757446
31
+ },
32
+ "modal_transcription": {
33
+ "result": {
34
+ "txt_file_path": "/tmp/tmplc_epgzf.txt",
35
+ "srt_file_path": "/tmp/tmplc_epgzf.srt",
36
+ "audio_file": "/tmp/tmplc_epgzf.mp3",
37
+ "model_used": "turbo",
38
+ "segment_count": 3,
39
+ "audio_duration": 26.32,
40
+ "processing_status": "success",
41
+ "saved_files": [
42
+ "/tmp/tmplc_epgzf.txt",
43
+ "/tmp/tmplc_epgzf.srt"
44
+ ],
45
+ "speaker_diarization_enabled": true,
46
+ "global_speaker_count": 0,
47
+ "speaker_summary": {},
48
+ "language_detected": "en",
49
+ "text": "One time of the Laser And a herb Tre\u00f6z Fairly Its own",
50
+ "segments": [
51
+ {
52
+ "start": 0.0,
53
+ "end": 4.0,
54
+ "text": "One time of the Laser",
55
+ "speaker": null
56
+ },
57
+ {
58
+ "start": 12.0,
59
+ "end": 14.84,
60
+ "text": "And a herb Tre\u00f6z",
61
+ "speaker": null
62
+ },
63
+ {
64
+ "start": 24.0,
65
+ "end": 26.32,
66
+ "text": "Fairly Its own",
67
+ "speaker": null
68
+ }
69
+ ],
70
+ "distributed_processing": false
71
+ },
72
+ "processing_time": 94.39337015151978
73
+ },
74
+ "comparison": {
75
+ "processing_time_difference": 74.85247826576233,
76
+ "speaker_count_match": true,
77
+ "local_speakers": 0,
78
+ "modal_speakers": 0
79
+ }
80
+ }
tests/cache/transcribe/speaker_diarization/pipeline_test.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "status": "skipped",
3
+ "reason": "HF_TOKEN not available"
4
+ }
tests/cache/transcribe/speaker_diarization/speaker_diarization_report.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_summary": {
3
+ "total_files_tested": 2,
4
+ "timestamp": "2025-06-10 12:35:02",
5
+ "test_configurations": [
6
+ "without_speaker_diarization",
7
+ "with_speaker_diarization"
8
+ ]
9
+ },
10
+ "detailed_results": {
11
+ "xyz_podcast_episode.mp3": {
12
+ "file_size_mb": 11.427632331848145,
13
+ "tests": {
14
+ "without_speaker_diarization": {
15
+ "status": "success",
16
+ "processing_time": 60.62069916725159,
17
+ "segment_count": 232,
18
+ "audio_duration": 749.98,
19
+ "language_detected": "zh",
20
+ "speaker_diarization_enabled": false
21
+ },
22
+ "with_speaker_diarization": {
23
+ "status": "success",
24
+ "processing_time": 127.10918402671814,
25
+ "segment_count": 241,
26
+ "audio_duration": 749.98,
27
+ "language_detected": "zh",
28
+ "speaker_diarization_enabled": true,
29
+ "speakers_detected": 0,
30
+ "speaker_summary": {}
31
+ }
32
+ }
33
+ },
34
+ "apple_podcast_episode.mp3": {
35
+ "file_size_mb": 32.30221080780029,
36
+ "tests": {
37
+ "without_speaker_diarization": {
38
+ "status": "success",
39
+ "processing_time": 68.2933440208435,
40
+ "segment_count": 222,
41
+ "audio_duration": 1051.44,
42
+ "language_detected": "en",
43
+ "speaker_diarization_enabled": false
44
+ },
45
+ "with_speaker_diarization": {
46
+ "status": "success",
47
+ "processing_time": 136.49856114387512,
48
+ "segment_count": 219,
49
+ "audio_duration": 1051.44,
50
+ "language_detected": "en",
51
+ "speaker_diarization_enabled": true,
52
+ "speakers_detected": 0,
53
+ "speaker_summary": {}
54
+ }
55
+ }
56
+ }
57
+ },
58
+ "performance_analysis": {
59
+ "average_processing_time": 98.13044708967209,
60
+ "total_processing_time": 392.52178835868835,
61
+ "successful_tests": 4,
62
+ "total_tests": 4
63
+ },
64
+ "speaker_detection_analysis": {
65
+ "files_with_speaker_detection": 2,
66
+ "total_speakers_detected": 0,
67
+ "average_speakers_per_file": 0.0,
68
+ "speaker_detection_details": [
69
+ {
70
+ "file": "xyz_podcast_episode.mp3",
71
+ "speakers_detected": 0,
72
+ "speaker_summary": {},
73
+ "segments_with_speakers": 0
74
+ },
75
+ {
76
+ "file": "apple_podcast_episode.mp3",
77
+ "speakers_detected": 0,
78
+ "speaker_summary": {},
79
+ "segments_with_speakers": 0
80
+ }
81
+ ]
82
+ }
83
+ }
tests/cache/transcribe/speaker_diarization/speaker_diarization_report.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Speaker Diarization Test Report
2
+
3
+ Generated: 2025-06-10 12:35:02
4
+
5
+ ## Summary
6
+
7
+ - **Files Tested**: 2
8
+ - **Test Configurations**: 2
9
+
10
+ ## Performance Analysis
11
+
12
+ - **Successful Tests**: 4/4
13
+ - **Average Processing Time**: 98.13 seconds
14
+ - **Total Processing Time**: 392.52 seconds
15
+
16
+ ## Speaker Detection Analysis
17
+
18
+ - **Files with Speaker Detection**: 2
19
+ - **Total Speakers Detected**: 0
20
+ - **Average Speakers per File**: 0.0
21
+
22
+ ### Speaker Detection Details
23
+
24
+ #### xyz_podcast_episode.mp3
25
+ - Speakers: 0
26
+ - Segments with speakers: 0
27
+ - Speaker summary: {}
28
+
29
+ #### apple_podcast_episode.mp3
30
+ - Speakers: 0
31
+ - Segments with speakers: 0
32
+ - Speaker summary: {}
33
+
34
+ ## Detailed Results
35
+
36
+ ### xyz_podcast_episode.mp3
37
+ - File size: 11.43 MB
38
+
39
+ #### without_speaker_diarization ✅
40
+ - Processing time: 60.62s
41
+ - Segments: 232
42
+ - Duration: 749.98s
43
+ - Language: zh
44
+ - Speaker diarization: False
45
+
46
+ #### with_speaker_diarization ✅
47
+ - Processing time: 127.11s
48
+ - Segments: 241
49
+ - Duration: 749.98s
50
+ - Language: zh
51
+ - Speaker diarization: True
52
+
53
+ ### apple_podcast_episode.mp3
54
+ - File size: 32.30 MB
55
+
56
+ #### without_speaker_diarization ✅
57
+ - Processing time: 68.29s
58
+ - Segments: 222
59
+ - Duration: 1051.44s
60
+ - Language: en
61
+ - Speaker diarization: False
62
+
63
+ #### with_speaker_diarization ✅
64
+ - Processing time: 136.50s
65
+ - Segments: 219
66
+ - Duration: 1051.44s
67
+ - Language: en
68
+ - Speaker diarization: True
69
+
tests/cache/transcribe/speaker_diarization/test_summary.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2025-06-10 12:36:56",
3
+ "test_files_generated": [
4
+ "speaker_diarization_report.json",
5
+ "download_log.json",
6
+ "environment_status.json",
7
+ "comprehensive_test_results.json",
8
+ "test_summary.json",
9
+ "pipeline_test.json",
10
+ "local_vs_modal_comparison.json"
11
+ ],
12
+ "results_directory": "tests/cache/transcribe/speaker_diarization",
13
+ "test_conclusions": [
14
+ "Tested 2 audio files with speaker diarization",
15
+ "Successfully detected speakers in 0 tests",
16
+ "Speaker diarization environment status: disabled"
17
+ ]
18
+ }
tests/cache/transcribe/speaker_diarization/xyz_podcast_episode/with_speaker_diarization_result.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processing_status": "success",
3
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529901.txt",
4
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529901.srt",
5
+ "audio_duration": 749.98,
6
+ "segment_count": 241,
7
+ "language_detected": "zh",
8
+ "model_used": "turbo",
9
+ "distributed_processing": true,
10
+ "chunks_processed": 13,
11
+ "chunks_failed": 0,
12
+ "speaker_diarization_enabled": true,
13
+ "global_speaker_count": 0,
14
+ "speakers_detected": [],
15
+ "speaker_summary": {}
16
+ }
tests/cache/transcribe/speaker_diarization/xyz_podcast_episode/without_speaker_diarization_result.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processing_status": "success",
3
+ "txt_file_path": "/root/cache/transcribe/distributed_transcription_1749529774.txt",
4
+ "srt_file_path": "/root/cache/transcribe/distributed_transcription_1749529774.srt",
5
+ "audio_duration": 749.98,
6
+ "segment_count": 232,
7
+ "language_detected": "zh",
8
+ "model_used": "turbo",
9
+ "distributed_processing": true,
10
+ "chunks_processed": 13,
11
+ "chunks_failed": 0,
12
+ "speaker_diarization_enabled": false
13
+ }
tests/cache/transcribe/xyz_podcast_episode.srt ADDED
@@ -0,0 +1,584 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1
2
+ 00:00:00,000 --> 00:00:05,040
3
+ Make detailed research work.
4
+
5
+ 2
6
+ 00:00:05,060 --> 00:00:06,440
7
+ Hello everyone.
8
+
9
+ 3
10
+ 00:00:06,440 --> 00:00:08,060
11
+ Welcome to Hocening Face
12
+
13
+ 4
14
+ 00:00:08,060 --> 00:00:13,640
15
+ Mayuya Lund
16
+
17
+ 5
18
+ 00:00:13,640 --> 00:00:14,560
19
+ Monday morning
20
+
21
+ 6
22
+ 00:00:14,560 --> 00:00:18,160
23
+ Numerous examine of Hocening Face
24
+
25
+ 7
26
+ 00:00:18,160 --> 00:00:18,760
27
+ at 28 February
28
+
29
+ 8
30
+ 00:00:18,760 --> 00:00:19,879
31
+ in Bed 7
32
+
33
+ 9
34
+ 00:00:19,879 --> 00:00:20,600
35
+ We call it
36
+
37
+ 10
38
+ 00:00:20,600 --> 00:00:24,120
39
+ globally
40
+
41
+ 11
42
+ 00:00:24,120 --> 00:00:25,219
43
+ God's grace
44
+
45
+ 12
46
+ 00:00:25,219 --> 00:00:27,240
47
+ the
48
+
49
+ 13
50
+ 00:00:27,240 --> 00:00:29,839
51
+ yun
52
+
53
+ 14
54
+ 00:00:59,840 --> 00:01:01,840
55
+ ...the women's provider labismate ITkaa and DANS-G breast massage,
56
+
57
+ 15
58
+ 00:01:01,840 --> 00:01:03,840
59
+ ...who gets MTM K timer for exam,
60
+
61
+ 16
62
+ 00:01:03,840 --> 00:01:05,840
63
+ ...sealing wisdomist your health support,
64
+
65
+ 17
66
+ 00:01:05,840 --> 00:01:08,840
67
+ ...and personal youth understanding of CARM ASS.
68
+
69
+ 18
70
+ 00:01:08,840 --> 00:01:11,840
71
+ ...This understanding of section of competition
72
+
73
+ 19
74
+ 00:01:11,840 --> 00:01:13,840
75
+ ...互仲 with potential medical infection in Maori American health care.
76
+
77
+ 20
78
+ 00:01:13,840 --> 00:01:14,840
79
+ ...and fencing aid in WIC,
80
+
81
+ 21
82
+ 00:01:14,840 --> 00:01:16,840
83
+ ...and teaching ethical media lying
84
+
85
+ 22
86
+ 00:01:16,840 --> 00:01:18,840
87
+ ...of vehicles online,
88
+
89
+ 23
90
+ 00:01:18,840 --> 00:01:19,879
91
+ ...the training staff
92
+
93
+ 24
94
+ 00:01:19,879 --> 00:01:21,840
95
+ ...ηnsад and participants,
96
+
97
+ 25
98
+ 00:01:21,840 --> 00:01:23,840
99
+ "... guitarism starts the project
100
+
101
+ 26
102
+ 00:01:23,840 --> 00:01:25,840
103
+ ...it has started in a constant meeting
104
+
105
+ 27
106
+ 00:01:25,840 --> 00:01:26,740
107
+ ....which means
108
+
109
+ 28
110
+ 00:01:26,740 --> 00:01:27,840
111
+ ...to get them medical treatment from results
112
+
113
+ 29
114
+ 00:01:57,840 --> 00:02:00,439
115
+ and gave birth to a laptop as a merged.
116
+
117
+ 30
118
+ 00:02:00,000 --> 00:02:02,359
119
+ So the back of those countries that traditionally existed throughout sea 25 years,
120
+
121
+ 31
122
+ 00:02:02,359 --> 00:02:03,980
123
+ it spoke about the North Korea, fiscal economy,
124
+
125
+ 32
126
+ 00:02:03,980 --> 00:02:05,359
127
+ Saudi Arabia, Europe, Korea, and other backiers!
128
+
129
+ 33
130
+ 00:02:05,359 --> 00:02:08,139
131
+ Test papers were strengthened using financial resources from房 cities,
132
+
133
+ 34
134
+ 00:02:08,139 --> 00:02:09,020
135
+ public land originating such incidents,
136
+
137
+ 35
138
+ 00:02:09,020 --> 00:02:12,139
139
+ maximizing medical information ratio,
140
+
141
+ 36
142
+ 00:02:12,139 --> 00:02:12,360
143
+ medium-sized government,
144
+
145
+ 37
146
+ 00:02:12,360 --> 00:02:14,819
147
+ tax Americas,
148
+
149
+ 38
150
+ 00:02:14,819 --> 00:02:17,020
151
+ communication AP soared stock economy
152
+
153
+ 39
154
+ 00:02:17,340 --> 00:02:18,479
155
+ West Air populations belong
156
+
157
+ 40
158
+ 00:02:18,479 --> 00:02:21,780
159
+ to CSA State Laramoste trade-hift China
160
+
161
+ 41
162
+ 00:02:21,860 --> 00:02:23,560
163
+ also used 조hezura interest impact
164
+
165
+ 42
166
+ 00:02:23,560 --> 00:02:26,120
167
+ In mountains,
168
+
169
+ 43
170
+ 00:02:26,120 --> 00:02:27,020
171
+ on top of it,
172
+
173
+ 44
174
+ 00:02:27,020 --> 00:02:32,879
175
+ Yeah, the information of this method isᴃ
176
+
177
+ 45
178
+ 00:02:32,879 --> 00:02:35,180
179
+ dependency Don't you serious?
180
+
181
+ 46
182
+ 00:02:35,180 --> 00:02:37,680
183
+ OK, then
184
+
185
+ 47
186
+ 00:02:37,680 --> 00:02:47,180
187
+ Well, I, ah, let's all see a way in the screen.
188
+
189
+ 48
190
+ 00:02:47,180 --> 00:02:50,840
191
+ Directora Mikhail
192
+
193
+ 49
194
+ 00:02:50,840 --> 00:02:52,819
195
+ calculator
196
+
197
+ 50
198
+ 00:02:52,819 --> 00:02:53,860
199
+ did not close whisk
200
+
201
+ 51
202
+ 00:02:53,860 --> 00:02:55,180
203
+ the
204
+
205
+ 52
206
+ 00:02:55,180 --> 00:02:57,259
207
+ This is a lot of termination.
208
+
209
+ 53
210
+ 00:02:57,500 --> 00:02:59,460
211
+ content reads기가 26ètres
212
+
213
+ 54
214
+ 00:02:59,479 --> 00:03:00,599
215
+ G Mail in assets
216
+
217
+ 55
218
+ 00:03:00,800 --> 00:03:01,139
219
+ ok
220
+
221
+ 56
222
+ 00:03:01,420 --> 00:03:02,439
223
+ Inlunes
224
+
225
+ 57
226
+ 00:03:02,719 --> 00:03:03,599
227
+ sk i
228
+
229
+ 58
230
+ 00:03:03,599 --> 00:03:03,900
231
+ expressed
232
+
233
+ 59
234
+ 00:03:03,900 --> 00:03:05,000
235
+ n snel
236
+
237
+ 60
238
+ 00:03:05,000 --> 00:03:15,400
239
+ can floats
240
+
241
+ 61
242
+ 00:03:15,599 --> 00:03:21,079
243
+ fresh
244
+
245
+ 62
246
+ 00:03:21,079 --> 00:03:23,219
247
+ can also be ordinaryru忤3-i talker
248
+
249
+ 63
250
+ 00:03:23,219 --> 00:03:25,000
251
+ from LYNT twelve to LV2's
252
+
253
+ 64
254
+ 00:03:25,000 --> 00:03:25,900
255
+ United Army
256
+
257
+ 65
258
+ 00:03:25,900 --> 00:03:26,980
259
+ hornata
260
+
261
+ 66
262
+ 00:03:26,980 --> 00:03:27,400
263
+ Motor방
264
+
265
+ 67
266
+ 00:03:40,740 --> 00:03:42,300
267
+ automatic
268
+
269
+ 68
270
+ 00:03:42,300 --> 00:03:42,480
271
+ bolt
272
+
273
+ 69
274
+ 00:03:42,480 --> 00:03:42,819
275
+ counter hop
276
+
277
+ 70
278
+ 00:03:42,980 --> 00:03:43,699
279
+ is
280
+
281
+ 71
282
+ 00:03:43,699 --> 00:03:45,139
283
+ to be ripped off
284
+
285
+ 72
286
+ 00:03:45,139 --> 00:03:46,340
287
+ more
288
+
289
+ 73
290
+ 00:03:46,340 --> 00:03:46,539
291
+ caps
292
+
293
+ 74
294
+ 00:03:46,539 --> 00:03:47,560
295
+ And
296
+
297
+ 75
298
+ 00:03:47,560 --> 00:03:48,039
299
+ LYNT
300
+
301
+ 76
302
+ 00:03:48,039 --> 00:03:50,459
303
+ ydia
304
+
305
+ 77
306
+ 00:03:50,459 --> 00:03:51,839
307
+ The anime or version of the異hammer,
308
+
309
+ 78
310
+ 00:03:51,959 --> 00:04:04,899
311
+ the
312
+
313
+ 79
314
+ 00:05:00,000 --> 00:05:04,000
315
+ the key to the development of the Air-Li-Mew-Hat-T-Going-Li-S.
316
+
317
+ 80
318
+ 00:05:06,000 --> 00:05:09,000
319
+ This is a detailed description of the development of the D-3-Pen-Lin-Wen-T-Mew-T-Mew-T-Po
320
+
321
+ 81
322
+ 00:05:39,000 --> 00:05:45,000
323
+ The core of the development of the Air-Li-Mew-T-T-Going-Li-T-Going-Li-Mew-T-T-Going-Li-T-Going-Li-Mew-T-Po
324
+
325
+ 82
326
+ 00:05:46,000 --> 00:05:52,000
327
+ The core of the development of the Air-Li-Mew-T-T-Po and the Air-Li-Mew-T-Going-T-Going-T-Going-T-G, has been developed for 3 decades
328
+
329
+ 83
330
+ 00:05:53,000 --> 00:05:55,000
331
+ The D-Pen-Lin-Wen-T-T-T-Po has been developed for 3 years
332
+
333
+ 84
334
+ 00:05:56,000 --> 00:05:57,000
335
+ The D-Pen-Lin-T-Po has even been developed for 1-5 days
336
+
337
+ 85
338
+ 00:05:57,000 --> 00:06:01,000
339
+ So we will try to reach a certain stage of the D-3-Lin-Wen-T-Po
340
+
341
+ 86
342
+ 00:06:00,000 --> 00:06:05,160
343
+ Xi'an Tавливator and Golden Kill from Greece
344
+
345
+ 87
346
+ 00:06:05,160 --> 00:06:07,300
347
+ With primitive specifications, gave away twoAND Mobile our
348
+
349
+ 88
350
+ 00:06:07,300 --> 00:06:09,379
351
+ Animal Fisheries to sit on the blanket
352
+
353
+ 89
354
+ 00:06:09,379 --> 00:06:15,379
355
+ with the instructions that PEOPLE sent it for
356
+
357
+ 90
358
+ 00:06:15,379 --> 00:06:18,279
359
+ For example, citizenship, human rights,
360
+
361
+ 91
362
+ 00:06:18,279 --> 00:06:22,279
363
+ but the stats and only dangerous times have been spread
364
+
365
+ 92
366
+ 00:06:22,279 --> 00:06:27,139
367
+ after acknowledging governments of African society
368
+
369
+ 93
370
+ 00:06:27,139 --> 00:06:33,240
371
+ food in the hospital in the non-exceptive manner
372
+
373
+ 94
374
+ 00:06:33,240 --> 00:06:36,959
375
+ birth in Indonesia just 10.7
376
+
377
+ 95
378
+ 00:06:36,959 --> 00:06:41,959
379
+ days
380
+
381
+ 96
382
+ 00:06:41,959 --> 00:06:44,399
383
+ training in 1943
384
+
385
+ 97
386
+ 00:07:14,399 --> 00:07:17,319
387
+ can go up to the sampling distance instant by technology
388
+
389
+ 98
390
+ 00:07:17,319 --> 00:07:19,420
391
+ connected to linear
392
+
393
+ 99
394
+ 00:07:19,420 --> 00:07:21,720
395
+ this can be used with software
396
+
397
+ 100
398
+ 00:07:21,720 --> 00:07:24,960
399
+ it can 약ify produce technological
400
+
401
+ 101
402
+ 00:07:24,960 --> 00:07:26,480
403
+ availability.
404
+
405
+ 102
406
+ 00:07:27,560 --> 00:07:28,600
407
+ We manage to correct this
408
+
409
+ 103
410
+ 00:07:28,600 --> 00:07:29,199
411
+ problem
412
+
413
+ 104
414
+ 00:07:29,199 --> 00:07:30,819
415
+ and to long header
416
+
417
+ 105
418
+ 00:07:30,819 --> 00:07:31,660
419
+ all realized
420
+
421
+ 106
422
+ 00:07:31,660 --> 00:07:32,300
423
+ a quick look at the
424
+
425
+ 107
426
+ 00:07:32,300 --> 00:07:33,240
427
+ complete
428
+
429
+ 108
430
+ 00:07:33,240 --> 00:07:37,960
431
+ cie
432
+
433
+ 109
434
+ 00:07:37,960 --> 00:07:42,439
435
+ you have much to
436
+
437
+ 110
438
+ 00:07:42,439 --> 00:07:43,560
439
+ adjust purpose
440
+
441
+ 111
442
+ 00:08:00,000 --> 00:08:03,339
443
+ as well as critical social denial of grasses areski
444
+
445
+ 112
446
+ 00:08:03,339 --> 00:08:14,439
447
+ in the hauppytek by the assignment of
448
+
449
+ 113
450
+ 00:08:14,439 --> 00:08:17,639
451
+ Galaxy BerryB Cookies
452
+
453
+ 114
454
+ 00:08:17,639 --> 00:08:21,019
455
+ 170g.
456
+
457
+ 115
458
+ 00:08:21,019 --> 00:08:23,240
459
+ 24
460
+
461
+ 116
462
+ 00:08:23,240 --> 00:08:25,680
463
+ but
464
+
465
+ 117
466
+ 00:08:25,680 --> 00:08:28,759
467
+ ,
468
+
469
+ 118
470
+ 00:08:28,759 --> 00:08:30,259
471
+ In the future, even though it was already built.
472
+
473
+ 119
474
+ 00:08:30,259 --> 00:08:51,360
475
+ We saw
476
+
477
+ 120
478
+ 00:08:51,360 --> 00:08:54,080
479
+ In our computer , acquainted with all the phones and phone,
480
+
481
+ 121
482
+ 00:08:54,080 --> 00:08:56,159
483
+ technology sensors navigation enlightenment
484
+
485
+ 122
486
+ 00:08:56,159 --> 00:09:01,480
487
+ the AbeBA Androidron 2010
488
+
489
+ 123
490
+ 00:09:01,480 --> 00:09:04,659
491
+ The future gradually shows all kinds of passengers
492
+
493
+ 124
494
+ 00:09:04,659 --> 00:09:08,360
495
+ I guess when we took documentaries ,
496
+
497
+ 125
498
+ 00:09:08,360 --> 00:09:10,180
499
+ we play with no phones,
500
+
501
+ 126
502
+ 00:09:10,180 --> 00:09:15,700
503
+ we focus to witness buying products
504
+
505
+ 127
506
+ 00:09:46,680 --> 00:09:49,860
507
+ This is the theme of the blockings drama,
508
+
509
+ 128
510
+ 00:09:49,860 --> 00:09:52,600
511
+ so early in the time Aladdin phased me,
512
+
513
+ 129
514
+ 00:09:52,600 --> 00:09:54,279
515
+ grandparents aimotableку
516
+
517
+ 130
518
+ 00:09:54,279 --> 00:09:54,480
519
+ её
520
+
521
+ 131
522
+ 00:09:54,480 --> 00:09:55,600
523
+ لجне
524
+
525
+ 132
526
+ 00:09:55,600 --> 00:10:12,600
527
+ ..
528
+
529
+ 133
530
+ 00:10:30,000 --> 00:10:33,600
531
+ Geez it's harder to lose its mind to think about the extremely sexy person.
532
+
533
+ 134
534
+ 00:10:33,600 --> 00:10:53,379
535
+ On explode forgive the
536
+
537
+ 135
538
+ 00:11:23,379 --> 00:11:25,860
539
+ This series's organizations' various eruptions
540
+
541
+ 136
542
+ 00:11:26,059 --> 00:11:27,220
543
+ were declared shopping in the city
544
+
545
+ 137
546
+ 00:11:27,440 --> 00:11:28,440
547
+ And for example,
548
+
549
+ 138
550
+ 00:11:28,440 --> 00:11:34,860
551
+ We will compartmentit the facebook pages
552
+
553
+ 139
554
+ 00:11:35,259 --> 00:11:38,819
555
+ The facebook page by allegedly
556
+
557
+ 140
558
+ 00:11:39,460 --> 00:11:41,299
559
+ If there is a particular error,
560
+
561
+ 141
562
+ 00:11:41,379 --> 00:11:44,179
563
+ Then they will guess
564
+
565
+ 142
566
+ 00:11:44,460 --> 00:11:45,700
567
+ ''Because it will be useless''
568
+
569
+ 143
570
+ 00:11:45,740 --> 00:11:48,379
571
+ Because of his type of fate and exceptions
572
+
573
+ 144
574
+ 00:11:48,379 --> 00:11:51,340
575
+ ...to banned the energy level.
576
+
577
+ 145
578
+ 00:11:51,460 --> 00:11:56,220
579
+ However, again it doesn't need to maintain highs.
580
+
581
+ 146
582
+ 00:11:56,299 --> 00:12:00,460
583
+ Thank you so much!
584
+
tests/cache/transcribe/xyz_podcast_episode.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Make detailed research work. Hello everyone. Welcome to Hocening Face Mayuya Lund Monday morning Numerous examine of Hocening Face at 28 February in Bed 7 We call it globally God's grace the yun ...the women's provider labismate ITkaa and DANS-G breast massage, ...who gets MTM K timer for exam, ...sealing wisdomist your health support, ...and personal youth understanding of CARM ASS. ...This understanding of section of competition ...互仲 with potential medical infection in Maori American health care. ...and fencing aid in WIC, ...and teaching ethical media lying ...of vehicles online, ...the training staff ...ηnsад and participants, "... guitarism starts the project ...it has started in a constant meeting ....which means ...to get them medical treatment from results and gave birth to a laptop as a merged. So the back of those countries that traditionally existed throughout sea 25 years, it spoke about the North Korea, fiscal economy, Saudi Arabia, Europe, Korea, and other backiers! Test papers were strengthened using financial resources from房 cities, public land originating such incidents, maximizing medical information ratio, medium-sized government, tax Americas, communication AP soared stock economy West Air populations belong to CSA State Laramoste trade-hift China also used 조hezura interest impact In mountains, on top of it, Yeah, the information of this method isᴃ dependency Don't you serious? OK, then Well, I, ah, let's all see a way in the screen. Directora Mikhail calculator did not close whisk the This is a lot of termination. content reads기가 26ètres G Mail in assets ok Inlunes sk i expressed n snel can floats fresh can also be ordinaryru忤3-i talker from LYNT twelve to LV2's United Army hornata Motor방 automatic bolt counter hop is to be ripped off more caps And LYNT ydia The anime or version of the異hammer, the the key to the development of the Air-Li-Mew-Hat-T-Going-Li-S. This is a detailed description of the development of the D-3-Pen-Lin-Wen-T-Mew-T-Mew-T-Po The core of the development of the Air-Li-Mew-T-T-Going-Li-T-Going-Li-Mew-T-T-Going-Li-T-Going-Li-Mew-T-Po The core of the development of the Air-Li-Mew-T-T-Po and the Air-Li-Mew-T-Going-T-Going-T-Going-T-G, has been developed for 3 decades The D-Pen-Lin-Wen-T-T-T-Po has been developed for 3 years The D-Pen-Lin-T-Po has even been developed for 1-5 days So we will try to reach a certain stage of the D-3-Lin-Wen-T-Po Xi'an Tавливator and Golden Kill from Greece With primitive specifications, gave away twoAND Mobile our Animal Fisheries to sit on the blanket with the instructions that PEOPLE sent it for For example, citizenship, human rights, but the stats and only dangerous times have been spread after acknowledging governments of African society food in the hospital in the non-exceptive manner birth in Indonesia just 10.7 days training in 1943 can go up to the sampling distance instant by technology connected to linear this can be used with software it can 약ify produce technological availability. We manage to correct this problem and to long header all realized a quick look at the complete cie you have much to adjust purpose as well as critical social denial of grasses areski in the hauppytek by the assignment of Galaxy BerryB Cookies 170g. 24 but , In the future, even though it was already built. We saw In our computer , acquainted with all the phones and phone, technology sensors navigation enlightenment the AbeBA Androidron 2010 The future gradually shows all kinds of passengers I guess when we took documentaries , we play with no phones, we focus to witness buying products This is the theme of the blockings drama, so early in the time Aladdin phased me, grandparents aimotableку её لجне .. Geez it's harder to lose its mind to think about the extremely sexy person. On explode forgive the This series's organizations' various eruptions were declared shopping in the city And for example, We will compartmentit the facebook pages The facebook page by allegedly If there is a particular error, Then they will guess ''Because it will be useless'' Because of his type of fate and exceptions ...to banned the energy level. However, again it doesn't need to maintain highs. Thank you so much!
tests/cache/xyz_podcast_episode.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59cdb1ed827a67746930b5bdd7e122a325758f4381598dd329d87e7215bed756
3
+ size 11982741
tests/cache/xyz_podcast_episode.srt ADDED
@@ -0,0 +1,1231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1
2
+ 00:00:00,000 --> 00:00:06,600
3
+ 各位听众朋友大家好
4
+
5
+ 2
6
+ 00:00:06,600 --> 00:00:09,980
7
+ 欢迎收听Hugging Face每日爱论文速递周末特辑
8
+
9
+ 3
10
+ 00:00:09,980 --> 00:00:14,280
11
+ 每周日准时为您带来一周内Hugging Face向最受欢迎的论文汇总
12
+
13
+ 4
14
+ 00:00:14,280 --> 00:00:18,379
15
+ 本期节目涵盖的时间段是2025年6月2日至6月8日
16
+
17
+ 5
18
+ 00:00:18,379 --> 00:00:25,199
19
+ 在本期节目中我们将为您精选五篇备受关注的论文内容涵盖了通过强化学习RL
20
+
21
+ 6
22
+ 00:00:25,199 --> 00:00:28,400
23
+ 提升大型语言模型LLM的自我改进
24
+
25
+ 7
26
+ 00:00:28,399 --> 00:00:33,079
27
+ 高商仇恳在推理中的应用延长的强化学习对LM推理的拓展
28
+
29
+ 8
30
+ 00:00:33,079 --> 00:00:37,859
31
+ 测试时驱动的大模型快慢思考框架以及一种经济高效的视觉
32
+
33
+ 9
34
+ 00:00:37,859 --> 00:00:39,500
35
+ 语言动作模型
36
+
37
+ 10
38
+ 00:00:39,500 --> 00:00:44,159
39
+ 接下来让我们一起深入这些前沿研究探索AI技术的最新进展
40
+
41
+ 11
42
+ 00:00:44,159 --> 00:00:45,340
43
+ 节目正式开始
44
+
45
+ 12
46
+ 00:00:45,340 --> 00:00:53,500
47
+ 本期节目的第一篇论文是反思重视奖励通过强化学习实现LM的自我提升
48
+
49
+ 13
50
+ 00:00:53,500 --> 00:00:57,039
51
+ 这篇论文在Hugging Face社区获得了169个点赞
52
+
53
+ 14
54
+ 00:00:57,039 --> 00:00:59,759
55
+ 显示出其研究价值和社区的关注度
56
+
57
+ 15
58
+ 00:00:59,759 --> 00:01:04,879
59
+ 这篇论文的核心目标是提升大型语言模型LMS的性能
60
+
61
+ 16
62
+ 00:01:04,879 --> 00:01:06,700
63
+ 通过一种名为反思
64
+
65
+ 17
66
+ 00:01:06,700 --> 00:01:07,359
67
+ 重视
68
+
69
+ 18
70
+ 00:01:07,359 --> 00:01:09,239
71
+ 奖励的新框架来实现
72
+
73
+ 19
74
+ 00:01:09,239 --> 00:01:13,219
75
+ 这个框架的关键在于让模型在任务失败后进行自我反思
76
+
77
+ 20
78
+ 00:01:13,219 --> 00:01:14,400
79
+ 分析失败原因
80
+
81
+ 21
82
+ 00:01:14,400 --> 00:01:17,799
83
+ 并在再次尝试时利用这些反思来改进表现
84
+
85
+ 22
86
+ 00:01:17,799 --> 00:01:18,759
87
+ 具体来说
88
+
89
+ 23
90
+ 00:01:18,759 --> 00:01:22,099
91
+ 模型在失败后会生成一段自我反思的评论
92
+
93
+ 24
94
+ 00:01:22,099 --> 00:01:23,579
95
+ 解释哪里出了问题
96
+
97
+ 25
98
+ 00:01:23,579 --> 00:01:25,019
99
+ 并提出改进建议
100
+
101
+ 26
102
+ 00:01:25,019 --> 00:01:28,179
103
+ 然后模型会根据这些反思再次尝试任务
104
+
105
+ 27
106
+ 00:01:28,179 --> 00:01:29,879
107
+ 如果第二次尝试成功
108
+
109
+ 28
110
+ 00:01:29,879 --> 00:01:32,140
111
+ 模型在反思阶段生成的内容
112
+
113
+ 29
114
+ 00:01:32,140 --> 00:01:34,920
115
+ 会通过一种名为Group Relative Policy Optimization
116
+
117
+ 30
118
+ 00:01:34,920 --> 00:01:36,699
119
+ Gruple的算法获得奖励
120
+
121
+ 31
122
+ 00:01:36,699 --> 00:01:39,239
123
+ 从而进一步优化其自我反思的能力
124
+
125
+ 32
126
+ 00:01:39,239 --> 00:01:42,319
127
+ 论文中使用了多个模型进行实验
128
+
129
+ 33
130
+ 00:01:42,319 --> 00:01:43,379
131
+ 包括Cornar
132
+
133
+ 34
134
+ 00:01:43,379 --> 00:01:44,519
135
+ Lama 3.1
136
+
137
+ 35
138
+ 00:01:44,519 --> 00:01:45,599
139
+ Fi 3.5
140
+
141
+ 36
142
+ 00:01:45,599 --> 00:01:46,799
143
+ Mini Instruct等
144
+
145
+ 37
146
+ 00:01:46,799 --> 00:01:48,579
147
+ 并基于两个主要数据集
148
+
149
+ 38
150
+ 00:01:48,579 --> 00:01:49,780
151
+ Epojin和Countdown
152
+
153
+ 39
154
+ 00:01:49,780 --> 00:01:52,780
155
+ Epojin数据集包含6万个高质量的函数调用
156
+
157
+ 40
158
+ 00:01:52,780 --> 00:01:55,140
159
+ 要求模型生成正确的工具调用
160
+
161
+ 41
162
+ 00:01:55,140 --> 00:01:56,299
163
+ Countdown数据集
164
+
165
+ 42
166
+ 00:01:56,299 --> 00:01:59,280
167
+ 则包含45万个数字列表和目标数字
168
+
169
+ 43
170
+ 00:01:59,280 --> 00:02:03,000
171
+ 要求模型通过这些数字生成正确的方程来达到目标
172
+
173
+ 44
174
+ 00:02:03,000 --> 00:02:04,299
175
+ 研究结果显示
176
+
177
+ 45
178
+ 00:02:04,299 --> 00:02:05,200
179
+ 这种反思
180
+
181
+ 46
182
+ 00:02:05,200 --> 00:02:05,820
183
+ 重视
184
+
185
+ 47
186
+ 00:02:05,820 --> 00:02:09,219
187
+ 奖励的方法在提升模型性能方面非常有效
188
+
189
+ 48
190
+ 00:02:09,219 --> 00:02:11,159
191
+ 特别是在Epojin数据集上
192
+
193
+ 49
194
+ 00:02:11,159 --> 00:02:13,639
195
+ 经过Gurple训练的Quin27B模型
196
+
197
+ 50
198
+ 00:02:13,639 --> 00:02:17,020
199
+ 甚至超过了未经过训练的Quin272B模型
200
+
201
+ 51
202
+ 00:02:17,020 --> 00:02:17,639
203
+ 此外
204
+
205
+ 52
206
+ 00:02:17,639 --> 00:02:21,620
207
+ 自我反思显著提升了模型在Countdown数据集上的表现
208
+
209
+ 53
210
+ 00:02:21,620 --> 00:02:24,379
211
+ 尤其是对于那些初始表现较差的模型
212
+
213
+ 54
214
+ 00:02:24,379 --> 00:02:26,000
215
+ 论文还指出
216
+
217
+ 55
218
+ 00:02:26,000 --> 00:02:30,139
219
+ 这种自我反思的方法不仅增强了模型解决复杂任务的能力
220
+
221
+ 56
222
+ 00:02:30,139 --> 00:02:33,599
223
+ 还使得较小的模型能够超越较大的未训练模型
224
+
225
+ 57
226
+ 00:02:33,599 --> 00:02:36,359
227
+ 显示出其在效率和通用性上的优势
228
+
229
+ 58
230
+ 00:02:36,359 --> 00:02:36,800
231
+ 此外
232
+
233
+ 59
234
+ 00:02:36,800 --> 00:02:39,780
235
+ 研究中几乎没有观察到灾难性遗忘的现象
236
+
237
+ 60
238
+ 00:02:39,780 --> 00:02:43,380
239
+ 表明这种方法在模型乳棒性方面也有显著提升
240
+
241
+ 61
242
+ 00:02:43,380 --> 00:02:44,219
243
+ 总的来说
244
+
245
+ 62
246
+ 00:02:44,219 --> 00:02:46,840
247
+ 这篇论文提出了一种创新的方法
248
+
249
+ 63
250
+ 00:02:46,840 --> 00:02:48,660
251
+ 通过强化学习的方式
252
+
253
+ 64
254
+ 00:02:48,660 --> 00:02:51,260
255
+ 让LLMS进行自我反思和改进
256
+
257
+ 65
258
+ 00:02:51,260 --> 00:02:53,800
259
+ 从而在复杂任务上取得更好���表现
260
+
261
+ 66
262
+ 00:02:54,500 --> 00:02:57,300
263
+ 这是本期节目的第二篇论文
264
+
265
+ 67
266
+ 00:02:57,300 --> 00:02:59,300
267
+ 题目是超越8020法则
268
+
269
+ 68
270
+ 00:02:59,300 --> 00:03:03,220
271
+ 高商少数Token驱动LLM推理的有效强化学习
272
+
273
+ 69
274
+ 00:03:03,219 --> 00:03:07,319
275
+ 这篇论文目前在Hugging Face社区获得了130个点赞
276
+
277
+ 70
278
+ 00:03:07,319 --> 00:03:10,120
279
+ 显示出它在学术界引起了广泛关注
280
+
281
+ 71
282
+ 00:03:10,120 --> 00:03:12,300
283
+ 这篇论文的核心研究问题是
284
+
285
+ 72
286
+ 00:03:12,300 --> 00:03:16,400
287
+ 在大型语言模型LLMS的验证奖励强化学习
288
+
289
+ 73
290
+ 00:03:16,400 --> 00:03:17,379
291
+ RLVR中
292
+
293
+ 74
294
+ 00:03:17,379 --> 00:03:20,120
295
+ 不同类型的Token如何影响推理性能
296
+
297
+ 75
298
+ 00:03:20,199 --> 00:03:24,680
299
+ 以及是否可以通过专注于特定类型的Token来提升RLVR的效果
300
+
301
+ 76
302
+ 00:03:24,680 --> 00:03:26,719
303
+ 研究团队提出了一个假设
304
+
305
+ 77
306
+ 00:03:26,719 --> 00:03:30,699
307
+ 高商的少数Token作为推理路径中的关键分支点
308
+
309
+ 78
310
+ 00:03:30,699 --> 00:03:34,780
311
+ 比低商的多数Token更能有效驱动RLVR他们进一步假设
312
+
313
+ 79
314
+ 00:03:34,780 --> 00:03:37,839
315
+ 通过限制策略梯度更新到这些高商Token
316
+
317
+ 80
318
+ 00:03:37,839 --> 00:03:41,699
319
+ 可以在保持或提升性能的同时提供计算上的优势
320
+
321
+ 81
322
+ 00:03:41,699 --> 00:03:43,599
323
+ 为了验证这一假设
324
+
325
+ 82
326
+ 00:03:43,599 --> 00:03:46,079
327
+ 研究团队进行了详细的实验设计
328
+
329
+ 83
330
+ 00:03:46,199 --> 00:03:51,839
331
+ 他们选择了捆3LLM家族的8B 14B和32B基础模型作为研究对象
332
+
333
+ 84
334
+ 00:03:51,839 --> 00:03:55,219
335
+ 通过链式思维COT推理中的Token商模式分析
336
+
337
+ 85
338
+ 00:03:55,219 --> 00:03:57,459
339
+ 结合控制实验来调节这根商
340
+
341
+ 86
342
+ 00:03:57,460 --> 00:04:00,620
343
+ 并在RLVR训练中选择性的更新策略梯度
344
+
345
+ 87
346
+ 00:04:00,620 --> 00:04:01,860
347
+ 数据收集方面
348
+
349
+ 88
350
+ 00:04:01,860 --> 00:04:04,939
351
+ 他们使用了M24 M25等数据集
352
+
353
+ 89
354
+ 00:04:04,939 --> 00:04:07,580
355
+ 并在多个评估数据集上进行了验证
356
+
357
+ 90
358
+ 00:04:07,580 --> 00:04:08,900
359
+ 实验结果显示
360
+
361
+ 91
362
+ 00:04:08,900 --> 00:04:11,980
363
+ 高商Token在推理过程中起到了关键作用
364
+
365
+ 92
366
+ 00:04:11,980 --> 00:04:14,760
367
+ 他们不仅连接了逻辑推理的各个环节
368
+
369
+ 93
370
+ 00:04:14,760 --> 00:04:18,319
371
+ 还能通过调节节码温度来显著影响模型的性能
372
+
373
+ 94
374
+ 00:04:18,319 --> 00:04:19,240
375
+ 具体来说
376
+
377
+ 95
378
+ 00:04:19,240 --> 00:04:21,819
379
+ 降低高商Token的温度会降低性能
380
+
381
+ 96
382
+ 00:04:21,819 --> 00:04:24,060
383
+ 而增加其温度则能提升性能
384
+
385
+ 97
386
+ 00:04:24,060 --> 00:04:24,620
387
+ 此外
388
+
389
+ 98
390
+ 00:04:24,620 --> 00:04:27,980
391
+ RLVR在训练过程中保留了基础模型的商模式
392
+
393
+ 99
394
+ 00:04:27,980 --> 00:04:30,420
395
+ 并且主要改变了高商Token的商值
396
+
397
+ 100
398
+ 00:04:30,420 --> 00:04:32,259
399
+ 最令人振奋的是
400
+
401
+ 101
402
+ 00:04:32,259 --> 00:04:33,620
403
+ 研究团队发现
404
+
405
+ 102
406
+ 00:04:33,620 --> 00:04:36,000
407
+ 仅关注高商Token的策略梯度更新
408
+
409
+ 103
410
+ 00:04:36,000 --> 00:04:37,459
411
+ 不仅没有降低性能
412
+
413
+ 104
414
+ 00:04:37,459 --> 00:04:40,639
415
+ 反而在Koen3模型上显著提升了推理效果
416
+
417
+ 105
418
+ 00:04:40,639 --> 00:04:44,120
419
+ 这一发现对于优化LM的推理能力具有重要意义
420
+
421
+ 106
422
+ 00:04:44,120 --> 00:04:46,480
423
+ 尤其是在处理复杂推理任务时
424
+
425
+ 107
426
+ 00:04:46,480 --> 00:04:50,399
427
+ 高商Token的聚焦策略能够平衡探索与训练稳定性
428
+
429
+ 108
430
+ 00:04:50,399 --> 00:04:52,560
431
+ 为模型带来更大的性能提升
432
+
433
+ 109
434
+ 00:04:52,560 --> 00:04:57,100
435
+ 总的来说这篇论文通过深入分析Token商对推理性能的影响
436
+
437
+ 110
438
+ 00:04:57,100 --> 00:05:01,019
439
+ 揭示了高商少数Token在驱动LM推理中的关键作用
440
+
441
+ 111
442
+ 00:05:01,019 --> 00:05:04,720
443
+ 为未来的LMU化提供了新的思路和方法
444
+
445
+ 112
446
+ 00:05:04,720 --> 00:05:08,220
447
+ 这是本期节目的第三篇论文
448
+
449
+ 113
450
+ 00:05:08,220 --> 00:05:09,180
451
+ 题目是Po
452
+
453
+ 114
454
+ 00:05:09,180 --> 00:05:12,760
455
+ 延长的强化学习拓展大型语言模型的推理边界
456
+
457
+ 115
458
+ 00:05:12,760 --> 00:05:16,600
459
+ 这篇论文目前在Hugging Face社区获得了115个点赞
460
+
461
+ 116
462
+ 00:05:16,600 --> 00:05:19,680
463
+ 显示出它在研究社区中引起了广泛关注
464
+
465
+ 117
466
+ 00:05:19,680 --> 00:05:21,920
467
+ 这篇论文的核心研究问题是
468
+
469
+ 118
470
+ 00:05:21,920 --> 00:05:26,820
471
+ 延长的强化学习训练能否在大型语言模型中揭示出新的推理策略
472
+
473
+ 119
474
+ 00:05:26,819 --> 00:05:30,779
475
+ 这些策略是基础模型在广泛采样下也无法获得的
476
+
477
+ 120
478
+ 00:05:30,779 --> 00:05:32,639
479
+ 研究团队提出了一个假设
480
+
481
+ 121
482
+ 00:05:32,639 --> 00:05:34,779
483
+ 通过延长的强化学习训练
484
+
485
+ 122
486
+ 00:05:34,779 --> 00:05:38,279
487
+ 模型可以在其基础模型的基础上拓展推理能力
488
+
489
+ 123
490
+ 00:05:38,279 --> 00:05:40,079
491
+ 发现新的解决方案路径
492
+
493
+ 124
494
+ 00:05:40,079 --> 00:05:42,079
495
+ 并在各种任务中表现更好
496
+
497
+ 125
498
+ 00:05:42,079 --> 00:05:43,519
499
+ 为了验证这一假设
500
+
501
+ 126
502
+ 00:05:43,519 --> 00:05:46,719
503
+ 研究团队设计了一种名为Pro的新训练方法
504
+
505
+ 127
506
+ 00:05:46,719 --> 00:05:49,360
507
+ 这种方法结合��KL散度控制
508
+
509
+ 128
510
+ 00:05:49,360 --> 00:05:52,259
511
+ 参考策略重置以及一系列多样化的任务
512
+
513
+ 129
514
+ 00:05:52,259 --> 00:05:54,579
515
+ 他们使用了三个模型进行实验
516
+
517
+ 130
518
+ 00:05:54,579 --> 00:05:55,939
519
+ DeepSea Car 1-1
520
+
521
+ 131
522
+ 00:05:55,939 --> 00:05:57,560
523
+ 5B作为基础模型
524
+
525
+ 132
526
+ 00:05:57,560 --> 00:05:59,779
527
+ Demitra Research Reasoning宽1.5B
528
+
529
+ 133
530
+ 00:05:59,779 --> 00:06:01,660
531
+ 作为经过Pro训练的模型
532
+
533
+ 134
534
+ 00:06:01,660 --> 00:06:04,519
535
+ 以及DeepSea Car 1-7B用于比较
536
+
537
+ 135
538
+ 00:06:04,519 --> 00:06:05,600
539
+ 在实验过程中
540
+
541
+ 136
542
+ 00:06:05,600 --> 00:06:09,100
543
+ Pro训练包括了超过2000步的强化学习训练
544
+
545
+ 137
546
+ 00:06:09,100 --> 00:06:11,819
547
+ 同时引入了KL散度惩罚来保持伤
548
+
549
+ 138
550
+ 00:06:11,819 --> 00:06:13,220
551
+ 并防止策略漂移
552
+
553
+ 139
554
+ 00:06:13,220 --> 00:06:14,980
555
+ 参考策略会定期重置
556
+
557
+ 140
558
+ 00:06:14,980 --> 00:06:16,279
559
+ 以允许持续改进
560
+
561
+ 141
562
+ 00:06:16,279 --> 00:06:18,060
563
+ 训练数据涵盖了数学
564
+
565
+ 142
566
+ 00:06:18,060 --> 00:06:18,759
567
+ 代码
568
+
569
+ 143
570
+ 00:06:18,759 --> 00:06:19,120
571
+ STEM
572
+
573
+ 144
574
+ 00:06:19,120 --> 00:06:21,560
575
+ 逻辑谜题和指令跟随等多种任务
576
+
577
+ 145
578
+ 00:06:21,560 --> 00:06:24,480
579
+ 共构建了一个包含136000个视力的
580
+
581
+ 146
582
+ 00:06:24,480 --> 00:06:25,800
583
+ 多样化训练数据集
584
+
585
+ 147
586
+ 00:06:25,800 --> 00:06:27,160
587
+ 研究结果显示
588
+
589
+ 148
590
+ 00:06:27,160 --> 00:06:29,259
591
+ 经过强化学习训练的模型
592
+
593
+ 149
594
+ 00:06:29,259 --> 00:06:30,620
595
+ 在各种任务中的表现
596
+
597
+ 150
598
+ 00:06:30,620 --> 00:06:32,100
599
+ 显著优于基础模型
600
+
601
+ 151
602
+ 00:06:32,100 --> 00:06:32,700
603
+ 例如
604
+
605
+ 152
606
+ 00:06:32,700 --> 00:06:33,900
607
+ 在数学任务中
608
+
609
+ 153
610
+ 00:06:33,900 --> 00:06:36,900
611
+ PiSide1的提升达到了14.7%
612
+
613
+ 154
614
+ 00:06:36,900 --> 00:06:39,700
615
+ 在编码任务中提升了13.9%
616
+
617
+ 155
618
+ 00:06:39,700 --> 00:06:42,640
619
+ 在逻辑谜题中提升了54.8%
620
+
621
+ 156
622
+ 00:06:42,640 --> 00:06:45,860
623
+ 在STEM推理任务中提升了25.1%
624
+
625
+ 157
626
+ 00:06:45,860 --> 00:06:49,080
627
+ 在指令跟随任务中提升了18.1%
628
+
629
+ 158
630
+ 00:06:49,080 --> 00:06:49,439
631
+ 此外
632
+
633
+ 159
634
+ 00:06:49,439 --> 00:06:50,540
635
+ 研究还发现
636
+
637
+ 160
638
+ 00:06:50,540 --> 00:06:52,540
639
+ Pro训练在超过2000步
640
+
641
+ 161
642
+ 00:06:52,540 --> 00:06:54,860
643
+ 后仍能持续提升模型性能
644
+
645
+ 162
646
+ 00:06:54,860 --> 00:06:57,220
647
+ 论文还引入了创造力指数
648
+
649
+ 163
650
+ 00:06:57,220 --> 00:06:59,160
651
+ 来量化推理路径的吸引性
652
+
653
+ 164
654
+ 00:06:59,160 --> 00:07:00,180
655
+ 结果表明
656
+
657
+ 165
658
+ 00:07:00,180 --> 00:07:01,879
659
+ 延长的强化学习训练
660
+
661
+ 166
662
+ 00:07:01,879 --> 00:07:04,560
663
+ 确实能够产生更具创新性的解决方案
664
+
665
+ 167
666
+ 00:07:04,560 --> 00:07:05,360
667
+ 这一发现
668
+
669
+ 168
670
+ 00:07:05,360 --> 00:07:06,379
671
+ 挑战了之前认为
672
+
673
+ 169
674
+ 00:07:06,379 --> 00:07:07,500
675
+ 强化学习模型
676
+
677
+ 170
678
+ 00:07:07,500 --> 00:07:09,620
679
+ 不会获得新推理能力的研究结论
680
+
681
+ 171
682
+ 00:07:09,620 --> 00:07:10,420
683
+ 总的来说
684
+
685
+ 172
686
+ 00:07:10,420 --> 00:07:12,520
687
+ 这篇论文提供了新的见解
688
+
689
+ 173
690
+ 00:07:12,520 --> 00:07:14,259
691
+ 展示了在什么条件下
692
+
693
+ 174
694
+ 00:07:14,259 --> 00:07:17,560
695
+ 强化学习能够有效拓展语言模型的推理边界
696
+
697
+ 175
698
+ 00:07:17,560 --> 00:07:18,920
699
+ 研究结果表明
700
+
701
+ 176
702
+ 00:07:18,920 --> 00:07:21,500
703
+ 通过稳定且延长的强化学习训练
704
+
705
+ 177
706
+ 00:07:22,540 --> 00:07:24,080
707
+ 开发出超越基础模型
708
+
709
+ 178
710
+ 00:07:24,080 --> 00:07:25,800
711
+ 初始能力的新的推理模式
712
+
713
+ 179
714
+ 00:07:25,800 --> 00:07:29,080
715
+ 本期节目的第四篇论文
716
+
717
+ 180
718
+ 00:07:29,080 --> 00:07:30,220
719
+ 我们来关注一篇
720
+
721
+ 181
722
+ 00:07:30,220 --> 00:07:31,480
723
+ 名为Alpha 1
724
+
725
+ 182
726
+ 00:07:31,480 --> 00:07:33,120
727
+ 测试时驱动大模型
728
+
729
+ 183
730
+ 00:07:33,120 --> 00:07:35,340
731
+ 进行快慢思考的推理框架的研究
732
+
733
+ 184
734
+ 00:07:35,340 --> 00:07:37,740
735
+ 这篇论文目前在Hugging Face社区
736
+
737
+ 185
738
+ 00:07:37,740 --> 00:07:39,180
739
+ 获得了89个点赞
740
+
741
+ 186
742
+ 00:07:39,180 --> 00:07:42,660
743
+ 显示出它在学术界和开发者社区中的广泛关注
744
+
745
+ 187
746
+ 00:07:42,660 --> 00:07:46,200
747
+ 这篇论文的核心目标是解决大型推理模型
748
+
749
+ 188
750
+ 00:07:46,200 --> 00:07:47,860
751
+ LRMS在测试时
752
+
753
+ 189
754
+ 00:07:47,860 --> 00:07:50,140
755
+ 如何动态调节推理过程的挑战
756
+
757
+ 190
758
+ 00:07:50,139 --> 00:07:52,539
759
+ 研究人员提出了一个名为Alpha 1
760
+
761
+ 191
762
+ 00:07:52,539 --> 00:07:53,919
763
+ Alpha 1的框架
764
+
765
+ 192
766
+ 00:07:53,919 --> 00:07:56,879
767
+ 旨在提升LRMS的推理能力和效率
768
+
769
+ 193
770
+ 00:07:56,879 --> 00:07:57,839
771
+ 简单来说
772
+
773
+ 194
774
+ 00:07:57,839 --> 00:07:59,560
775
+ Alpha 1通过在测试时
776
+
777
+ 195
778
+ 00:07:59,560 --> 00:08:02,099
779
+ 动态调度慢思考和快思考的转换
780
+
781
+ 196
782
+ 00:08:02,099 --> 00:08:06,680
783
+ 帮助模型在深度分析和计算效率之间找到平衡
784
+
785
+ 197
786
+ 00:08:06,680 --> 00:08:07,379
787
+ 具体来看
788
+
789
+ 198
790
+ 00:08:07,379 --> 00:08:11,180
791
+ 研究团队使用了三个开源的LRMS作为基础模型
792
+
793
+ 199
794
+ 00:08:11,180 --> 00:08:12,719
795
+ 分别是DeepSeq R1
796
+
797
+ 200
798
+ 00:08:12,719 --> 00:08:14,180
799
+ Distil QN1.5B
800
+
801
+ 201
802
+ 00:08:14,180 --> 00:08:15,079
803
+ DeepSeq R1
804
+
805
+ 202
806
+ 00:08:15,079 --> 00:08:17,379
807
+ Distil QN7B和QNQXRB
808
+
809
+ 203
810
+ 00:08:17,379 --> 00:08:18,899
811
+ 他们在一系列涵盖数学
812
+
813
+ 204
814
+ 00:08:18,899 --> 00:08:22,279
815
+ 编程和科学领域的六个基准测试上进行了实验
816
+
817
+ 205
818
+ 00:08:22,279 --> 00:08:23,699
819
+ 包括M2024
820
+
821
+ 206
822
+ 00:08:23,699 --> 00:08:24,779
823
+ AMCR3
824
+
825
+ 207
826
+ 00:08:24,779 --> 00:08:25,759
827
+ Minerva Math等
828
+
829
+ 208
830
+ 00:08:25,759 --> 00:08:29,339
831
+ 实验在NVIDIA L40S和A100GPU上进行
832
+
833
+ 209
834
+ 00:08:29,339 --> 00:08:32,480
835
+ 确保了计算资源的充足和实验的可靠性
836
+
837
+ 210
838
+ 00:08:32,480 --> 00:08:37,120
839
+ 论文的主要创新点在于引入了Alpha时刻AlphaMoment这一概念
840
+
841
+ 211
842
+ 00:08:37,120 --> 00:08:39,659
843
+ 通过于Alpha和后Alpha时刻的调节
844
+
845
+ 212
846
+ 00:08:39,659 --> 00:08:43,340
847
+ Alpha1能够有效地在测试时对LRMS进行缩放
848
+
849
+ 213
850
+ 00:08:43,340 --> 00:08:45,320
851
+ 研究人员还通过对比实验
852
+
853
+ 214
854
+ 00:08:45,320 --> 00:08:47,899
855
+ 验证了Alpha1在问题解决准确性
856
+
857
+ 215
858
+ 00:08:47,899 --> 00:08:49,680
859
+ PiCity和推理效率
860
+
861
+ 216
862
+ 00:08:49,680 --> 00:08:51,700
863
+ FAP指标上的显著提升
864
+
865
+ 217
866
+ 00:08:51,700 --> 00:08:53,759
867
+ 例如1.5B的模型
868
+
869
+ 218
870
+ 00:08:53,759 --> 00:08:54,920
871
+ 在使用Alpha1后
872
+
873
+ 219
874
+ 00:08:54,920 --> 00:08:58,039
875
+ 问题解决准确性提高了6.15%
876
+
877
+ 220
878
+ 00:08:58,039 --> 00:09:00,480
879
+ 同时令牌长度减少了14%
880
+
881
+ 221
882
+ 00:09:00,480 --> 00:09:02,220
883
+ 研究结果显示
884
+
885
+ 222
886
+ 00:09:02,220 --> 00:09:06,379
887
+ Alpha1不仅在准确性上超越了传统的测试时缩放方法
888
+
889
+ 223
890
+ 00:09:06,379 --> 00:09:07,899
891
+ 如SE和Chain of Draft
892
+
893
+ 224
894
+ 00:09:07,899 --> 00:09:10,220
895
+ 而且在推理效率上也表现出色
896
+
897
+ 225
898
+ 00:09:10,220 --> 00:09:11,060
899
+ 特别是
900
+
901
+ 226
902
+ 00:09:11,060 --> 00:09:14,300
903
+ 论文发现慢思考到快思考的线性调度方式
904
+
905
+ 227
906
+ 00:09:14,300 --> 00:09:16,440
907
+ 能够带来最高的推理准确性
908
+
909
+ 228
910
+ 00:09:16,440 --> 00:09:20,279
911
+ 这表明慢思考在提升推理效率方面起到了关键作用
912
+
913
+ 229
914
+ 00:09:20,279 --> 00:09:21,180
915
+ 总体而言
916
+
917
+ 230
918
+ 00:09:21,180 --> 00:09:25,860
919
+ Alpha1为大型推理模型提供了一个通用的推理过程调节框架
920
+
921
+ 231
922
+ 00:09:25,860 --> 00:09:28,620
923
+ 展示了慢思考和快思考的动态转换
924
+
925
+ 232
926
+ 00:09:28,620 --> 00:09:30,800
927
+ 如何有效提升模型的推理能力
928
+
929
+ 233
930
+ 00:09:30,799 --> 00:09:34,839
931
+ 这一研究不仅为LRMS的实际应用提供了新的思路
932
+
933
+ 234
934
+ 00:09:34,839 --> 00:09:38,719
935
+ 也为未来在测试时优化模型推理提供了宝贵的经验
936
+
937
+ 235
938
+ 00:09:38,719 --> 00:09:44,899
939
+ 这就是本期节目关于Alpha1测试时驱动大模型进行快慢思考的推理框架的介绍
940
+
941
+ 236
942
+ 00:09:44,899 --> 00:09:48,439
943
+ 这是本期节目的第五篇论文
944
+
945
+ 237
946
+ 00:09:48,439 --> 00:09:48,939
947
+ 题目是Small Flux
948
+
949
+ 238
950
+ 00:09:48,939 --> 00:09:52,439
951
+ 一种用于经济高效型机器人的视觉
952
+
953
+ 239
954
+ 00:09:52,439 --> 00:09:53,079
955
+ 语言
956
+
957
+ 240
958
+ 00:09:53,079 --> 00:09:54,059
959
+ 动作模型
960
+
961
+ 241
962
+ 00:09:54,059 --> 00:09:58,000
963
+ 这篇论文目前在Hugging Face社区获得了75个点赞
964
+
965
+ 242
966
+ 00:09:58,000 --> 00:10:00,980
967
+ 论文的核心目标是解决现有大规模视觉
968
+
969
+ 243
970
+ 00:10:00,980 --> 00:10:01,600
971
+ 语言
972
+
973
+ 244
974
+ 00:10:01,600 --> 00:10:02,299
975
+ 动作
976
+
977
+ 245
978
+ 00:10:02,299 --> 00:10:02,779
979
+ Flux
980
+
981
+ 246
982
+ 00:10:02,779 --> 00:10:07,379
983
+ 模型在机器人领域中面临的高训练成本和实际部署困难的问题
984
+
985
+ 247
986
+ 00:10:07,379 --> 00:10:09,879
987
+ 研究团队提出了一个关键问题
988
+
989
+ 248
990
+ 00:10:09,879 --> 00:10:11,679
991
+ 是否可以开发一种小型
992
+
993
+ 249
994
+ 00:10:11,679 --> 00:10:13,980
995
+ 高效且由社区驱动的伐模型
996
+
997
+ 250
998
+ 00:10:13,980 --> 00:10:16,360
999
+ 既能大幅降低训练和推理成本
1000
+
1001
+ 251
1002
+ 00:10:16,360 --> 00:10:19,319
1003
+ 同时还能在机器人任务中保持竞争力
1004
+
1005
+ 252
1006
+ 00:10:19,319 --> 00:10:20,720
1007
+ 论文的答案是Small Flux
1008
+
1009
+ 253
1010
+ 00:10:20,720 --> 00:10:22,579
1011
+ 这是一种紧凑的伐模型
1012
+
1013
+ 254
1014
+ 00:10:22,579 --> 00:10:26,179
1015
+ 专门设计用于单GPU训练和消费级设备的部署
1016
+
1017
+ 255
1018
+ 00:10:26,179 --> 00:10:29,740
1019
+ Small Flux通过利用社区收集的数据和一部推理技术
1020
+
1021
+ 256
1022
+ 00:10:29,740 --> 00:10:33,539
1023
+ 实现了与更大规模模型相媲美的性能
1024
+
1025
+ 257
1026
+ 00:10:33,539 --> 00:10:34,419
1027
+ 在方法论上
1028
+
1029
+ 258
1030
+ 00:10:34,419 --> 00:10:37,019
1031
+ Small Flux有一个紧凑的与训练视觉
1032
+
1033
+ 259
1034
+ 00:10:37,019 --> 00:10:40,259
1035
+ 以N模型VLM和一个动作专家组成
1036
+
1037
+ 260
1038
+ 00:10:40,259 --> 00:10:42,240
1039
+ VLM负责处理语言指令
1040
+
1041
+ 261
1042
+ 00:10:42,240 --> 00:10:44,620
1043
+ RGB图像和机器人传感器状态
1044
+
1045
+ 262
1046
+ 00:10:44,620 --> 00:10:48,919
1047
+ 而动作专家则通过交替的交叉注意力和自注意力快进行训练
1048
+
1049
+ 263
1050
+ 00:10:48,919 --> 00:10:50,299
1051
+ 输出低级别动作
1052
+
1053
+ 264
1054
+ 00:10:50,299 --> 00:10:51,259
1055
+ 数据集方面
1056
+
1057
+ 265
1058
+ 00:10:51,259 --> 00:10:55,980
1059
+ 研究团队使用了来自Hugging Face的481个社区数据集的子集
1060
+
1061
+ 266
1062
+ 00:10:55,980 --> 00:10:57,879
1063
+ 以及新的MetaWorld数据集
1064
+
1065
+ 267
1066
+ 00:10:57,879 --> 00:11:00,679
1067
+ 和几个真实世界的机器人操作任务数据集
1068
+
1069
+ 268
1070
+ 00:11:00,679 --> 00:11:01,820
1071
+ 训练过程中
1072
+
1073
+ 269
1074
+ 00:11:01,820 --> 00:11:03,639
1075
+ Small Flux通过模仿学习
1076
+
1077
+ 270
1078
+ 00:11:03,639 --> 00:11:05,639
1079
+ 在社区数据集上进行运训练
1080
+
1081
+ 271
1082
+ 00:11:05,639 --> 00:11:07,299
1083
+ 并使用现成的VLM
1084
+
1085
+ 272
1086
+ 00:11:07,299 --> 00:11:08,419
1087
+ 如Kun 2.5
1088
+
1089
+ 273
1090
+ 00:11:08,419 --> 00:11:09,860
1091
+ VL3B Instruct
1092
+
1093
+ 274
1094
+ 00:11:09,860 --> 00:11:11,220
1095
+ 自动生成任务描述
1096
+
1097
+ 275
1098
+ 00:11:11,220 --> 00:11:12,639
1099
+ 以改进任务注视
1100
+
1101
+ 276
1102
+ 00:11:12,639 --> 00:11:13,559
1103
+ 推理阶段
1104
+
1105
+ 277
1106
+ 00:11:13,559 --> 00:11:14,700
1107
+ 一部推理技术
1108
+
1109
+ 278
1110
+ 00:11:14,700 --> 00:11:17,340
1111
+ 将动作执行与观察处理和动作预测机
1112
+
1113
+ 279
1114
+ 00:11:17,340 --> 00:11:19,320
1115
+ 从而提高了控制频率
1116
+
1117
+ 280
1118
+ 00:11:19,320 --> 00:11:21,080
1119
+ 并减少了任务完成时间
1120
+
1121
+ 281
1122
+ 00:11:21,080 --> 00:11:22,059
1123
+ 在评估中
1124
+
1125
+ 282
1126
+ 00:11:22,059 --> 00:11:26,279
1127
+ Small Flux在模拟和真实世界的机器人基准测试中表现出色
1128
+
1129
+ 283
1130
+ 00:11:26,279 --> 00:11:29,740
1131
+ 特别是在识取、放置、堆叠和分类任务中
1132
+
1133
+ 284
1134
+ 00:11:29,740 --> 00:11:31,299
1135
+ 优于其他Fla模型
1136
+
1137
+ 285
1138
+ 00:11:31,299 --> 00:11:32,259
1139
+ 一部推理
1140
+
1141
+ 286
1142
+ 00:11:32,259 --> 00:11:35,839
1143
+ 还使任务完成时间减少了约30%
1144
+
1145
+ 287
1146
+ 00:11:35,839 --> 00:11:36,959
1147
+ 论文的结论表明
1148
+
1149
+ 288
1150
+ 00:11:36,959 --> 00:11:39,000
1151
+ 通过利用社区驱动数据集
1152
+
1153
+ 289
1154
+ 00:11:39,000 --> 00:11:41,600
1155
+ 优化模型架构和一部推理技术
1156
+
1157
+ 290
1158
+ 00:11:41,600 --> 00:11:43,240
1159
+ 紧凑高效的Fla模型
1160
+
1161
+ 291
1162
+ 00:11:43,240 --> 00:11:45,720
1163
+ 可以在机器人任务中取得竞争性表现
1164
+
1165
+ 292
1166
+ 00:11:45,720 --> 00:11:47,299
1167
+ Small Flux成功展示了
1168
+
1169
+ 293
1170
+ 00:11:47,299 --> 00:11:49,720
1171
+ 开发经济高效型Fla模型的可行性
1172
+
1173
+ 294
1174
+ 00:11:49,720 --> 00:11:52,240
1175
+ 为机器人研究提供了新的可能性
1176
+
1177
+ 295
1178
+ 00:11:52,240 --> 00:11:55,419
1179
+ 并使更多资源有限的实际应用成为可能
1180
+
1181
+ 296
1182
+ 00:11:55,419 --> 00:11:59,139
1183
+ 以上就是本期节目的全部内容
1184
+
1185
+ 297
1186
+ 00:11:59,139 --> 00:12:00,459
1187
+ 感谢大家的收听
1188
+
1189
+ 298
1190
+ 00:12:00,459 --> 00:12:02,059
1191
+ 如果你喜欢本期内容
1192
+
1193
+ 299
1194
+ 00:12:02,059 --> 00:12:03,539
1195
+ 欢迎在评论区留言
1196
+
1197
+ 300
1198
+ 00:12:03,539 --> 00:12:04,159
1199
+ 点赞
1200
+
1201
+ 301
1202
+ 00:12:04,159 --> 00:12:04,740
1203
+ 转发
1204
+
1205
+ 302
1206
+ 00:12:04,740 --> 00:12:05,979
1207
+ 并订阅我们的节目
1208
+
1209
+ 303
1210
+ 00:12:05,979 --> 00:12:06,559
1211
+ 同时
1212
+
1213
+ 304
1214
+ 00:12:06,559 --> 00:12:08,659
1215
+ 别忘了关注我们在小红书的账号
1216
+
1217
+ 305
1218
+ 00:12:08,659 --> 00:12:09,199
1219
+ ISOD
1220
+
1221
+ 306
1222
+ 00:12:09,199 --> 00:12:10,539
1223
+ 我们下期节目再见
1224
+
1225
+ 307
1226
+ 00:12:10,539 --> 00:12:12,179
1227
+ Hayae
1228
+
1229
+ 308
1230
+ 00:12:12,179 --> 00:12:28,179
1231
+ ��
tests/cache/xyz_podcast_episode.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 各位听众朋友大家好欢迎收听Hugging Face每日爱论文速递周末特辑每周日准时为您带来一周内Hugging Face向最受欢迎的论文汇总本期节目涵盖的时间段是2025年6月2日至6月8日在本期节目中我们将为您精选五篇备受关注的论文内容涵盖了通过强化学习RL提升大型语言模型LLM的自我改进高商仇恳在推理中的应用延长的强化学习对LM推理的拓展测试时驱动的大模型快慢思考框架以及一种经济高效的视觉语言动作模型接下来让我们一起深入这些前沿研究探索AI技术的最新进展节目正式开始本期节目的第一篇论文是反思重视奖励通过强化学习实现LM的自我提升这篇论文在Hugging Face社区获得了169个点赞显示出其研究价值和社区的关注度这篇论文的核心目标是提升大型语言模型LMS的性能通过一种名为反思重视奖励的新框架来实现这个框架的关键在于让模型在任务失败后进行自我反思分析失败原因并在再次尝试时利用这些反思来改进表现具体来说模型在失败后会生成一段自我反思的评论解释哪里出了问题并提出改进建议然后模型会根据这些反思再次尝试任务如果第二次尝试成功模型在反思阶段生成的内容会通过一种名为Group Relative Policy OptimizationGruple的算法获得奖励从而进一步优化其自我反思的能力论文中使用了多个模型进行实验包括CornarLama 3.1Fi 3.5Mini Instruct等并基于两个主要数据集Epojin和CountdownEpojin数据集包含6万个高质量的函数调用要求模型生成正确的工具调用Countdown数据集则包含45万个数字列表和目标数字要求模型通过这些数字生成正确的方程来达到目标研究结果显示这种反思重视奖励的方法在提升模型性能方面非常有效特别是在Epojin数据集上经过Gurple训练的Quin27B模型甚至超过了未经过训练的Quin272B模型此外自我反思显著提升了模型在Countdown数据集上的表现尤其是对于那些初始表现较差的模型论文还指出这种自我反思的方法不仅增强了模型解决复杂任务的能力还使得较小的模型能够超越较大的未训练模型显示出其在效率和通用性上的优势此外研究中几乎没有观察到灾难性遗忘的现象表明这种方法在模型乳棒性方面也有显著提升总的来说这篇论文提出了一种创新的方法通过强化学习的方式让LLMS进行自我反思和改进从而在复杂任务上取得更好的表现这是本期节目的第二篇论文题目是超越8020法则高商少数Token驱动LLM推理的有效强化学习这篇论文目前在Hugging Face社区获得了130个点赞显示出它在学术界引起了广泛关注这篇论文的核心研究问题是在大型语言模型LLMS的验证奖励强化学习RLVR中不同类型的Token如何影响推理性能以及是否可以通过专注于特定类型的Token来提升RLVR的效果研究团队提出了一个假设高商的少数Token作为推理路径中的关键分支点比低商的多数Token更能有效驱动RLVR他们进一步假设通过限制策略梯度更新到这些高商Token可以在保持或提升性能的同时提供计算上的优势为了验证这一假设研究团队进行了详细的实验设计他们选择了捆3LLM家族的8B 14B和32B基础模型作为研究对象通过链式思维COT推理中的Token商模式分析结合控制实验来调节这根商并在RLVR训练中选择性的更新策略梯度数据收集方面他们使用了M24 M25等数据集并在多个评估数据集上进行了验证实验结果显示高商Token在推理过程中起到了关键作用他们不仅连接了逻辑推理的各个环节还能通过调节节码温度来显著影响模型的性能具体来说降低高商Token的温度会降低性能而增加其温度则能提升性能此外RLVR在训练过程中保留了基础模型的商模式并且主要改变了高商Token的商值最令人振奋的是研究团队发现仅关注高商Token的策略梯度更新不仅没有降低性能反而在Koen3模型上显著提升了推理效果这一发现对于优化LM的推理能力具有重要意义尤其是在处理复杂推理任务时高商Token的聚焦策略能够平衡探索与训练稳定性为模型带来更大的性能提升总的来说这篇论文通过深入分析Token商对推理性能的影响揭示了高商少数Token在驱动LM推理中的关键作用为未来的LMU化提供了新的思路和方法这是本期节目的第三篇论文题目是Po延长的强化学习拓展大型语言模型的推理边界这篇论文目前在Hugging Face社区获得了115个点赞显示出它在研究社区中引起了广泛关注这篇论文的核心研究问题是延长的强化学习训练能否在大型语言模型中揭示出新的推理策略这些策略是基础模型在广泛采样下也无法获得的研究团队提出了一个假设通过延长的强化学习训练模型可以在其基础模型的基础上拓展推理能力发现新的��决方案路径并在各种任务中表现更好为了验证这一假设研究团队设计了一种名为Pro的新训练方法这种方法结合了KL散度控制参考策略重置以及一系列多样化的任务他们使用了三个模型进行实验DeepSea Car 1-15B作为基础模型Demitra Research Reasoning宽1.5B作为经过Pro训练的模型以及DeepSea Car 1-7B用于比较在实验过程中Pro训练包括了超过2000步的强化学习训练同时引入了KL散度惩罚来保持伤并防止策略漂移参考策略会定期重置以允许持续改进训练数据涵盖了数学代码STEM逻辑谜题和指令跟随等多种任务共构建了一个包含136000个视力的多样化训练数据集研究结果显示经过强化学习训练的模型在各种任务中的表现显著优于基础模型例如在数学任务中PiSide1的提升达到了14.7%在编码任务中提升了13.9%在逻辑谜题中提升了54.8%在STEM推理任务中提升了25.1%在指令跟随任务中提升了18.1%此外研究还发现Pro训练在超过2000步后仍能持续提升模型性能论文还引入了创造力指数来量化推理路径的吸引性结果表明延长的强化学习训练确实能够产生更具创新性的解决方案这一发现挑战了之前认为强化学习模型不会获得新推理能力的研究结论总的来说这篇论文提供了新的见解展示了在什么条件下强化学习能够有效拓展语言模型的推理边界研究结果表明通过稳定且延长的强化学习训练开发出超越基础模型初始能力的新的推理模式本期节目的第四篇论文我们来关注一篇名为Alpha 1测试时驱动大模型进行快慢思考的推理框架的研究这篇论文目前在Hugging Face社区获得了89个点赞显示出它在学术界和开发者社区中的广泛关注这篇论文的核心目标是解决大型推理模型LRMS在测试时如何动态调节推理过程的挑战研究人员提出了一个名为Alpha 1Alpha 1的框架旨在提升LRMS的推理能力和效率简单来说Alpha 1通过在测试时动态调度慢思考和快思考的转换帮助模型在深度分析和计算效率之间找到平衡具体来看研究团队使用了三个开源的LRMS作为基础模型分别是DeepSeq R1Distil QN1.5BDeepSeq R1Distil QN7B和QNQXRB他们在一系列涵盖数学编程和科学领域的六个基准测试上进行了实验包括M2024AMCR3Minerva Math等实验在NVIDIA L40S和A100GPU上进行确保了计算资源的充足和实验的可靠性论文的主要创新点在于引入了Alpha时刻AlphaMoment这一概念通过于Alpha和后Alpha时刻的调节Alpha1能够有效地在测试时对LRMS进行缩放研究人员还通过对比实验验证了Alpha1在问题解决准确性PiCity和推理效率FAP指标上的显著提升例如1.5B的模型在使用Alpha1后问题解决准确性提高了6.15%同时令牌长度减少了14%研究结果显示Alpha1不仅在准确性上超越了传统的测试时缩放方法如SE和Chain of Draft而且在推理效率上也表现出色特别是论文发现慢思考到快思考的线性调度方式能够带来最高的推理准确性这表明慢思考在提升推理效率方面起到了关键作用总体而言Alpha1为大型推理模型提供了一个通用的推理过程调节框架展示了慢思考和快思考的动态转换如何有效提升模型的推理能力这一研究不仅为LRMS的实际应用提供了新的思路也为未来在测试时优化模型推理提供了宝贵的经验这就是本期节目关于Alpha1测试时驱动大模型进行快慢思考的推理框架的介绍这是本期节目的第五篇论文题目是Small Flux一种用于经济高效型机器人的视觉语言动作模型这篇论文目前在Hugging Face社区获得了75个点赞论文的核心目标是解决现有大规模视觉语言动作Flux模型在机器人领域中面临的高训练成本和实际部署困难的问题研究团队提出了一个关键问题是否可以开发一种小型高效且由社区驱动的伐模型既能大幅降低训练和推理成本同时还能在机器人任务中保持竞争力论文的答案是Small Flux这是一种紧凑的伐模型专门设计用于单GPU训练和消费级设备的部署Small Flux通过利用社区收集的数据和一部推理技术实现了与更大规模模型相媲美的性能在方法论上Small Flux有一个紧凑的与训练视觉以N模型VLM和一个动作专家组成VLM负责处理语言指令RGB图像和机器人传感器状态而动作专家则通过交替的交叉注意力和自注意力快进行训练输出低级别动作数据集方面研究团队使用了来自Hugging Face的481个社区数据集的子集以及新的MetaWorld数据集和几个真实世界的机器人操作任务数据集训练过程中Small Flux通过模仿学习在社区数据集上进行运训练并使用现成的VLM如Kun 2.5VL3B Instruct自动生成任务描述以改进任务注视推理阶段一部推理技术将动作执行与观察处理和动作预测机从而提高了控制频率并减少了任务完成时间在评估中Small Flux在模拟和���实世界的机器人基准测试中表现出色特别是在识取、放置、堆叠和分类任务中优于其他Fla模型一部推理还使任务完成时间减少了约30%论文的结论表明通过利用社区驱动数据集优化模型架构和一部推理技术紧凑高效的Fla模型可以在机器人任务中取得竞争性表现Small Flux成功展示了开发经济高效型Fla模型的可行性为机器人研究提供了新的可能性并使更多资源有限的实际应用成为可能以上就是本期节目的全部内容感谢大家的收听如果你喜欢本期内容欢迎在评论区留言点赞转发并订阅我们的节目同时别忘了关注我们在小红书的账号ISOD我们下期节目再见 Hayae��
tests/conftest.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test configuration and shared fixtures
3
+ """
4
+
5
+ import pytest
6
+ import asyncio
7
+ import tempfile
8
+ import shutil
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Generator, Dict, Any
12
+
13
+ from src.services.audio_processing_service import AudioProcessingService
14
+ from src.services.podcast_download_service import PodcastDownloadService
15
+ from src.services.file_management_service import FileManagementService
16
+ from src.services.speaker_embedding_service import SpeakerEmbeddingService
17
+
18
+
19
+ @pytest.fixture(scope="session")
20
+ def event_loop():
21
+ """Create an instance of the default event loop for the test session."""
22
+ loop = asyncio.get_event_loop_policy().new_event_loop()
23
+ yield loop
24
+ loop.close()
25
+
26
+
27
+ @pytest.fixture
28
+ def temp_dir() -> Generator[str, None, None]:
29
+ """Create a temporary directory for tests"""
30
+ temp_path = tempfile.mkdtemp(prefix="podcast_test_")
31
+ yield temp_path
32
+ shutil.rmtree(temp_path, ignore_errors=True)
33
+
34
+
35
+ @pytest.fixture
36
+ def sample_mp3_files(temp_dir: str) -> Dict[str, str]:
37
+ """Create sample MP3 files for testing"""
38
+ import ffmpeg
39
+
40
+ files = {}
41
+ for i, name in enumerate(["test1.mp3", "test2.mp3"]):
42
+ file_path = os.path.join(temp_dir, name)
43
+ # Create a short silence audio file for testing
44
+ (
45
+ ffmpeg
46
+ .input('anullsrc=channel_layout=mono:sample_rate=16000', f='lavfi', t=5)
47
+ .output(file_path, acodec='mp3')
48
+ .overwrite_output()
49
+ .run(quiet=True)
50
+ )
51
+ files[name] = file_path
52
+
53
+ return files
54
+
55
+
56
+ @pytest.fixture
57
+ def podcast_download_service() -> PodcastDownloadService:
58
+ """Create podcast download service instance"""
59
+ return PodcastDownloadService()
60
+
61
+
62
+ @pytest.fixture
63
+ def file_management_service() -> FileManagementService:
64
+ """Create file management service instance"""
65
+ return FileManagementService()
66
+
67
+
68
+ @pytest.fixture
69
+ def apple_podcast_url() -> str:
70
+ """Sample Apple Podcast URL for testing"""
71
+ return "https://podcasts.apple.com/us/podcast/the-tim-ferriss-show/id863897795?i=1000640901376"
72
+
73
+
74
+ @pytest.fixture
75
+ def xiaoyuzhou_podcast_url() -> str:
76
+ """Sample XiaoYuZhou Podcast URL for testing"""
77
+ return "https://www.xiaoyuzhoufm.com/episode/654321"
78
+
79
+
80
+ @pytest.fixture
81
+ def test_config() -> Dict[str, Any]:
82
+ """Test configuration"""
83
+ return {
84
+ "audio_processing": {
85
+ "min_segment_length": 10.0,
86
+ "min_silence_length": 0.5,
87
+ "max_concurrent_segments": 2
88
+ },
89
+ "download": {
90
+ "timeout": 30,
91
+ "max_retries": 2
92
+ },
93
+ "transcription": {
94
+ "model_name": "base",
95
+ "language": "auto"
96
+ }
97
+ }
tests/playwright_mcp_testing_guide.md ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Playwright MCP Testing Guide for Gradio UI
2
+
3
+ 本文档详细说明如何使用Playwright MCP工具测试`src/ui/gradio_ui.py`中的所有功能。
4
+
5
+ ## 📋 目录
6
+
7
+ 1. [测试环境设置](#测试环境设置)
8
+ 2. [基本MCP工具使用](#基本mcp工具使用)
9
+ 3. [Tab 1: Podcast Download 测试](#tab-1-podcast-download-测试)
10
+ 4. [Tab 2: Audio Transcription 测试](#tab-2-audio-transcription-测试)
11
+ 5. [Tab 3: MP3 File Management 测试](#tab-3-mp3-file-management-测试)
12
+ 6. [Tab 4: Transcription Text Management 测试](#tab-4-transcription-text-management-测试)
13
+ 7. [完整测试流程示例](#完整测试流程示例)
14
+ 8. [故障排除](#故障排除)
15
+
16
+ ## 测试环境设置
17
+
18
+ ### 1. 启动应用
19
+ ```bash
20
+ # 确保应用在localhost:8000运行
21
+ python app.py
22
+ ```
23
+
24
+ ### 2. 初始化浏览器
25
+ ```python
26
+ # 导航到应用
27
+ mcp_playwright_browser_navigate("http://localhost:8000")
28
+
29
+ # 等待页面加载
30
+ mcp_playwright_browser_wait_for(time=3)
31
+
32
+ # 获取页面快照以查看当前状态
33
+ mcp_playwright_browser_snapshot()
34
+ ```
35
+
36
+ ## 基本MCP工具使用
37
+
38
+ ### 核心工具列表
39
+ - `mcp_playwright_browser_navigate(url)` - 导航到URL
40
+ - `mcp_playwright_browser_snapshot()` - 获取页面快照
41
+ - `mcp_playwright_browser_click(element, ref)` - 点击元素
42
+ - `mcp_playwright_browser_type(element, ref, text)` - 输入文本
43
+ - `mcp_playwright_browser_select_option(element, ref, values)` - 选择下拉选项
44
+ - `mcp_playwright_browser_wait_for(time=seconds)` - 等待指定时间
45
+ - `mcp_playwright_browser_wait_for(text="显示文本")` - 等待文本出现
46
+
47
+ ### 基本测试模式
48
+ 1. 获取快照 → 找到元素ref → 执行操作 → 验证结果
49
+
50
+ ## Tab 1: Podcast Download 测试
51
+
52
+ ### 界面元素映射
53
+ - **播客链接输入框**: `role='textbox', name='Podcast Link'`
54
+ - **平台选择**: `role='radio', name='Apple Podcast'` / `role='radio', name='XiaoYuZhou'`
55
+ - **自动转录复选框**: `role='checkbox', name='Auto-transcribe after download'`
56
+ - **说话人识别复选框**: `role='checkbox', name='Enable speaker diarization'`
57
+ - **下载按钮**: `role='button', name='📥 Start Download'`
58
+
59
+ ### 测试用例 1: Apple Podcast 下载 + 转录 + 说话人识别
60
+
61
+ ```python
62
+ # 1. 导航到Podcast Download tab (默认已选中)
63
+ mcp_playwright_browser_snapshot()
64
+
65
+ # 2. 输入Apple Podcast URL
66
+ mcp_playwright_browser_type(
67
+ element="播客链接输入框",
68
+ ref="[从snapshot获取的ref]",
69
+ text="https://podcasts.apple.com/cn/podcast/all-ears-english-podcast/id751574016?i=1000712048662"
70
+ )
71
+
72
+ # 3. 确保Apple Podcast已选中(默认)
73
+ # 如果需要选择:
74
+ # mcp_playwright_browser_click(element="Apple Podcast选项", ref="[ref]")
75
+
76
+ # 4. 确保自动转录已启用(默认启用)
77
+ # 如果需要切换:
78
+ # mcp_playwright_browser_click(element="自动转录复选框", ref="[ref]")
79
+
80
+ # 5. 启用说话人识别
81
+ mcp_playwright_browser_click(element="说话人识别复选框", ref="[ref]")
82
+
83
+ # 6. 开始下载
84
+ mcp_playwright_browser_click(element="开始下载按钮", ref="[ref]")
85
+
86
+ # 7. 等待处理完成(可能需要2-5分钟)
87
+ mcp_playwright_browser_wait_for(time=180) # 等待3分钟
88
+
89
+ # 8. 检查结果
90
+ mcp_playwright_browser_snapshot()
91
+ # 查看result_output区域是否显示成功结果
92
+ ```
93
+
94
+ ### 测试用例 2: XiaoYuZhou 下载 + 仅下载
95
+
96
+ ```python
97
+ # 1. 切换到XiaoYuZhou平台
98
+ mcp_playwright_browser_click(element="XiaoYuZhou选项", ref="[ref]")
99
+
100
+ # 2. 输入XiaoYuZhou URL
101
+ mcp_playwright_browser_type(
102
+ element="播客链接输入框",
103
+ ref="[ref]",
104
+ text="https://www.xiaoyuzhoufm.com/episode/your-episode-id"
105
+ )
106
+
107
+ # 3. 禁用自动转录
108
+ mcp_playwright_browser_click(element="自动转录复选框", ref="[ref]")
109
+
110
+ # 4. 开始下载
111
+ mcp_playwright_browser_click(element="开始下载按钮", ref="[ref]")
112
+
113
+ # 5. 等待下载完成
114
+ mcp_playwright_browser_wait_for(time=60)
115
+
116
+ # 6. 验证结果
117
+ mcp_playwright_browser_snapshot()
118
+ ```
119
+
120
+ ## Tab 2: Audio Transcription 测试
121
+
122
+ ### 界面元素映射
123
+ - **Tab切换**: `role='tab', name='Audio Transcription'`
124
+ - **文件路径输入**: `role='textbox', name='Audio File Path'`
125
+ - **模型选择**: `role='combobox', name='Model Size'`
126
+ - **语言选择**: `role='combobox', name='Language'`
127
+ - **输出格式**: `role='radio', name='srt'/'txt'/'json'`
128
+ - **说话人识别**: `role='checkbox', name='Enable speaker diarization'`
129
+ - **转录按钮**: `role='button', name='🎤 Start Transcription'`
130
+
131
+ ### 测试用例 1: 转录下载的音频文件
132
+
133
+ ```python
134
+ # 1. 切换到Audio Transcription tab
135
+ mcp_playwright_browser_click(element="Audio Transcription tab", ref="[ref]")
136
+
137
+ # 2. 输入音频文件路径(使用之前下载的文件)
138
+ mcp_playwright_browser_type(
139
+ element="音频文件路径输入框",
140
+ ref="[ref]",
141
+ text="downloads/1000712048662_episode_audio.mp3"
142
+ )
143
+
144
+ # 3. 选择模型大小
145
+ mcp_playwright_browser_select_option(
146
+ element="模型大小下拉框",
147
+ ref="[ref]",
148
+ values=["turbo"]
149
+ )
150
+
151
+ # 4. 选择语言
152
+ mcp_playwright_browser_select_option(
153
+ element="语言下拉框",
154
+ ref="[ref]",
155
+ values=["auto"]
156
+ )
157
+
158
+ # 5. 选择输出格式为SRT
159
+ mcp_playwright_browser_click(element="SRT格式选项", ref="[ref]")
160
+
161
+ # 6. 启用说话人识别
162
+ mcp_playwright_browser_click(element="说话人识别复选框", ref="[ref]")
163
+
164
+ # 7. 开始转录
165
+ mcp_playwright_browser_click(element="开始转录按钮", ref="[ref]")
166
+
167
+ # 8. 等待转录完成
168
+ mcp_playwright_browser_wait_for(time=120)
169
+
170
+ # 9. 检查结果
171
+ mcp_playwright_browser_snapshot()
172
+ ```
173
+
174
+ ### 测试用例 2: 不同参数组合测试
175
+
176
+ ```python
177
+ # 测试不同模型大小
178
+ for model in ["small", "medium", "large"]:
179
+ mcp_playwright_browser_select_option(
180
+ element="模型大小下拉框",
181
+ ref="[ref]",
182
+ values=[model]
183
+ )
184
+ # 执行转录并验证结果
185
+
186
+ # 测试不同输出格式
187
+ for format in ["txt", "json"]:
188
+ mcp_playwright_browser_click(element=f"{format}格式选项", ref="[ref]")
189
+ # 执行转录并验证结果
190
+ ```
191
+
192
+ ## Tab 3: MP3 File Management 测试
193
+
194
+ ### 界面元素映射
195
+ - **Tab切换**: `role='tab', name='MP3 File Management'`
196
+ - **目录选择**: `role='combobox', name='Directory Path'`
197
+ - **文件列表**: `role='textbox', name='MP3 File List'`
198
+
199
+ ### 测试用例: 浏览MP3文件
200
+
201
+ ```python
202
+ # 1. 切换到MP3 File Management tab
203
+ mcp_playwright_browser_click(element="MP3 File Management tab", ref="[ref]")
204
+
205
+ # 2. 选择目录
206
+ mcp_playwright_browser_select_option(
207
+ element="目录路径下拉框",
208
+ ref="[ref]",
209
+ values=["/root/cache/apple_podcasts"]
210
+ )
211
+
212
+ # 3. 等待文件列表更新
213
+ mcp_playwright_browser_wait_for(time=2)
214
+
215
+ # 4. 检查文件列表
216
+ mcp_playwright_browser_snapshot()
217
+
218
+ # 5. 切换到另一个目录
219
+ mcp_playwright_browser_select_option(
220
+ element="目录路径下拉框",
221
+ ref="[ref]",
222
+ values=["/root/cache/xyz_podcasts"]
223
+ )
224
+
225
+ # 6. 验证文件列表更新
226
+ mcp_playwright_browser_wait_for(time=2)
227
+ mcp_playwright_browser_snapshot()
228
+ ```
229
+
230
+ ## Tab 4: Transcription Text Management 测试
231
+
232
+ ### 界面元素映射
233
+ - **Tab切换**: `role='tab', name='Transcription Text Management'`
234
+ - **文件路径输入**: `role='textbox', name='File Path'`
235
+ - **加载文件按钮**: `role='button', name='📂 Load File'`
236
+ - **保存文件按钮**: `role='button', name='💾 Save File'`
237
+ - **刷新按钮**: `role='button', name='🔄 Refresh'`
238
+ - **内容编辑器**: `role='textbox', name='File Content'`
239
+ - **上一个按钮**: `role='button', name='⬅️ Previous'`
240
+ - **下一个按钮**: `role='button', name='➡️ Next'`
241
+
242
+ ### 测试用例 1: 加载和编辑转录文件
243
+
244
+ ```python
245
+ # 1. 切换到Text Management tab
246
+ mcp_playwright_browser_click(element="Transcription Text Management tab", ref="[ref]")
247
+
248
+ # 2. 输入转录文件路径
249
+ mcp_playwright_browser_type(
250
+ element="文件路径输入框",
251
+ ref="[ref]",
252
+ text="downloads/1000712048662_episode_audio.srt"
253
+ )
254
+
255
+ # 3. 加载文件
256
+ mcp_playwright_browser_click(element="加载文件按钮", ref="[ref]")
257
+
258
+ # 4. 等待文件加载
259
+ mcp_playwright_browser_wait_for(time=3)
260
+
261
+ # 5. 检查文件内容
262
+ mcp_playwright_browser_snapshot()
263
+
264
+ # 6. 编辑内容
265
+ mcp_playwright_browser_type(
266
+ element="内容编辑器",
267
+ ref="[ref]",
268
+ text="编辑后的内容..."
269
+ )
270
+
271
+ # 7. 保存文件
272
+ mcp_playwright_browser_click(element="保存文件按钮", ref="[ref]")
273
+
274
+ # 8. 验证保存状态
275
+ mcp_playwright_browser_wait_for(time=2)
276
+ mcp_playwright_browser_snapshot()
277
+ ```
278
+
279
+ ### 测试用例 2: 分段阅读大文件
280
+
281
+ ```python
282
+ # 1. 使用下一个按钮浏览文件
283
+ mcp_playwright_browser_click(element="下一个按钮", ref="[ref]")
284
+ mcp_playwright_browser_wait_for(time=2)
285
+ mcp_playwright_browser_snapshot()
286
+
287
+ # 2. 使用上一个按钮返回
288
+ mcp_playwright_browser_click(element="上一个按钮", ref="[ref]")
289
+ mcp_playwright_browser_wait_for(time=2)
290
+ mcp_playwright_browser_snapshot()
291
+
292
+ # 3. 刷新文件内容
293
+ mcp_playwright_browser_click(element="刷新按钮", ref="[ref]")
294
+ mcp_playwright_browser_wait_for(time=2)
295
+ mcp_playwright_browser_snapshot()
296
+ ```
297
+
298
+ ## 完整测试流程示例
299
+
300
+ ### 端到端测试流程
301
+
302
+ ```python
303
+ # 完整的端到端测试流程
304
+ def complete_e2e_test():
305
+ # Phase 1: 下载播客
306
+ print("=== Phase 1: Podcast Download ===")
307
+ mcp_playwright_browser_navigate("http://localhost:8000")
308
+ mcp_playwright_browser_snapshot()
309
+
310
+ # 输入URL并配置选项
311
+ mcp_playwright_browser_type(
312
+ element="播客链接输入框",
313
+ ref="[ref]",
314
+ text="https://podcasts.apple.com/cn/podcast/all-ears-english-podcast/id751574016?i=1000712048662"
315
+ )
316
+
317
+ # 启用说话人识别
318
+ mcp_playwright_browser_click(element="说话人识别复选框", ref="[ref]")
319
+
320
+ # 开始下载
321
+ mcp_playwright_browser_click(element="开始下载按钮", ref="[ref]")
322
+
323
+ # 等待完成
324
+ mcp_playwright_browser_wait_for(time=180)
325
+
326
+ # Phase 2: 验证下载结果并管理文件
327
+ print("=== Phase 2: File Management ===")
328
+ mcp_playwright_browser_click(element="MP3 File Management tab", ref="[ref]")
329
+ mcp_playwright_browser_snapshot()
330
+
331
+ # Phase 3: 手动转录测试
332
+ print("=== Phase 3: Manual Transcription ===")
333
+ mcp_playwright_browser_click(element="Audio Transcription tab", ref="[ref]")
334
+
335
+ # 使用不同参数进行转录
336
+ mcp_playwright_browser_type(
337
+ element="音频文件路径输入框",
338
+ ref="[ref]",
339
+ text="downloads/1000712048662_episode_audio.mp3"
340
+ )
341
+
342
+ # 测试不同模型
343
+ mcp_playwright_browser_select_option(
344
+ element="模型大小下拉框",
345
+ ref="[ref]",
346
+ values=["medium"]
347
+ )
348
+
349
+ mcp_playwright_browser_click(element="开始转录按钮", ref="[ref]")
350
+ mcp_playwright_browser_wait_for(time=120)
351
+
352
+ # Phase 4: 文本管理和编辑
353
+ print("=== Phase 4: Text Management ===")
354
+ mcp_playwright_browser_click(element="Transcription Text Management tab", ref="[ref]")
355
+
356
+ # 加载和编辑转录文件
357
+ mcp_playwright_browser_type(
358
+ element="文件路径输入框",
359
+ ref="[ref]",
360
+ text="downloads/1000712048662_episode_audio.srt"
361
+ )
362
+
363
+ mcp_playwright_browser_click(element="加载文件按钮", ref="[ref]")
364
+ mcp_playwright_browser_wait_for(time=3)
365
+ mcp_playwright_browser_snapshot()
366
+
367
+ print("=== 测试完成 ===")
368
+
369
+ # 执行完整测试
370
+ complete_e2e_test()
371
+ ```
372
+
373
+ ## 故障排除
374
+
375
+ ### 常见问题和解决方案
376
+
377
+ 1. **元素未找到**
378
+ - 先使用`mcp_playwright_browser_snapshot()`获取当前页面状态
379
+ - 确认元素的正确ref和描述
380
+ - 检查页面是否完全加载
381
+
382
+ 2. **操作超时**
383
+ - 增加等待时间:`mcp_playwright_browser_wait_for(time=更长时间)`
384
+ - 检查网络连接和服务状态
385
+ - 验证Modal endpoints是否正常工作
386
+
387
+ 3. **文件路径错误**
388
+ - 确认文件实际存在于指定路径
389
+ - 使用绝对路径而非相对路径
390
+ - 检查文件权限
391
+
392
+ 4. **表单提交失败**
393
+ - 确认所有必填字段已填写
394
+ - 检查输入格式是否正确
395
+ - 验证服务器端错误日志
396
+
397
+ ### 调试技巧
398
+
399
+ 1. **逐步执行**
400
+ ```python
401
+ # 在每个关键步骤后添加快照
402
+ mcp_playwright_browser_snapshot()
403
+ ```
404
+
405
+ 2. **等待策略**
406
+ ```python
407
+ # 等待特定文本出现
408
+ mcp_playwright_browser_wait_for(text="Processing completed")
409
+
410
+ # 等待特定文本消失
411
+ mcp_playwright_browser_wait_for(textGone="Loading...")
412
+ ```
413
+
414
+ 3. **错误恢复**
415
+ ```python
416
+ # 如果操作失败,刷新页面重试
417
+ mcp_playwright_browser_navigate("http://localhost:8000")
418
+ ```
419
+
420
+ ## 测试数据
421
+
422
+ ### 推荐的测试URL
423
+
424
+ **Apple Podcast URLs:**
425
+ - 短音频: `https://podcasts.apple.com/cn/podcast/short-episode-id`
426
+ - 中等音频: `https://podcasts.apple.com/cn/podcast/all-ears-english-podcast/id751574016?i=1000712048662`
427
+ - 长音频: `https://podcasts.apple.com/cn/podcast/long-episode-id`
428
+
429
+ **XiaoYuZhou URLs:**
430
+ - 测试URL: `https://www.xiaoyuzhoufm.com/episode/test-episode-id`
431
+
432
+ ### 测试文件路径
433
+ - 音频文件: `downloads/*.mp3`
434
+ - 转录文件: `downloads/*.srt`, `downloads/*.txt`
435
+ - JSON文件: `downloads/*.json`
436
+
437
+ ---
438
+
439
+ **注意**: 在使用此指南时,需要根据实际的页面快照结果替换`[ref]`占位符为真实的元素引用。每次测试前建议先获取快照以确认当前页面状态。
tests/run_all_tests.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main test runner for all integration tests
3
+ 主测试运行器,用于执行所有集成测试
4
+ """
5
+
6
+ import pytest
7
+ import sys
8
+ import os
9
+ from pathlib import Path
10
+
11
+
12
+ def main():
13
+ """Run all integration tests in sequence"""
14
+
15
+ print("🚀 Starting Podcast MCP Gradio Integration Tests")
16
+ print("=" * 60)
17
+
18
+ # Get the tests directory
19
+ tests_dir = Path(__file__).parent
20
+
21
+ # Define test files in execution order
22
+ test_files = [
23
+ "test_01_podcast_download.py",
24
+ "test_02_remote_transcription.py",
25
+ "test_03_transcription_file_management.py",
26
+ "test_04_mp3_file_management.py",
27
+ "test_05_real_world_integration.py"
28
+ ]
29
+
30
+ # Test results tracking
31
+ results = {}
32
+ overall_success = True
33
+
34
+ for test_file in test_files:
35
+ test_path = tests_dir / test_file
36
+
37
+ print(f"\n📋 Running: {test_file}")
38
+ print("-" * 40)
39
+
40
+ if not test_path.exists():
41
+ print(f"❌ Test file not found: {test_path}")
42
+ results[test_file] = "NOT_FOUND"
43
+ overall_success = False
44
+ continue
45
+
46
+ # Run the test file
47
+ try:
48
+ exit_code = pytest.main([
49
+ str(test_path),
50
+ "-v", # verbose
51
+ "-s", # no capture (show print statements)
52
+ "--tb=short", # shorter traceback format
53
+ "--disable-warnings" # reduce noise
54
+ ])
55
+
56
+ if exit_code == 0:
57
+ results[test_file] = "PASSED"
58
+ print(f"✅ {test_file}: PASSED")
59
+ else:
60
+ results[test_file] = "FAILED"
61
+ overall_success = False
62
+ print(f"❌ {test_file}: FAILED (exit code: {exit_code})")
63
+
64
+ except Exception as e:
65
+ results[test_file] = f"EXCEPTION: {str(e)}"
66
+ overall_success = False
67
+ print(f"💥 {test_file}: EXCEPTION - {str(e)}")
68
+
69
+ # Print summary
70
+ print("\n" + "=" * 60)
71
+ print("📊 TEST EXECUTION SUMMARY")
72
+ print("=" * 60)
73
+
74
+ for test_file, result in results.items():
75
+ status_icon = "✅" if result == "PASSED" else "❌"
76
+ print(f"{status_icon} {test_file}: {result}")
77
+
78
+ print(f"\n🏁 Overall Result: {'✅ SUCCESS' if overall_success else '❌ FAILURES DETECTED'}")
79
+
80
+ if overall_success:
81
+ print("🎉 All integration tests completed successfully!")
82
+ print("✨ Your Podcast MCP Gradio application is ready for deployment!")
83
+ else:
84
+ print("⚠️ Some tests failed. Please review the output above.")
85
+ print("🔧 Check the specific test failures and fix any issues before deployment.")
86
+
87
+ return 0 if overall_success else 1
88
+
89
+
90
+ if __name__ == "__main__":
91
+ exit_code = main()
92
+ sys.exit(exit_code)
tests/test_01_podcast_download.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test podcast download functionality
3
+ 测试播客下载功能
4
+ """
5
+
6
+ import pytest
7
+ import asyncio
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Dict, Any
11
+
12
+ from src.tools.download_tools import download_apple_podcast_tool, download_xyz_podcast_tool
13
+ from src.services.podcast_download_service import PodcastDownloadService
14
+ from src.interfaces.podcast_downloader import PodcastPlatform
15
+
16
+
17
+ class TestPodcastDownload:
18
+ """Test podcast download integration"""
19
+
20
+ @pytest.mark.asyncio
21
+ async def test_apple_podcast_info_extraction(self, podcast_download_service: PodcastDownloadService):
22
+ """Test Apple podcast information extraction"""
23
+ print("\n🍎 Testing Apple Podcast info extraction...")
24
+
25
+ # Use a known working Apple Podcast URL
26
+ test_url = "https://podcasts.apple.com/us/podcast/the-tim-ferriss-show/id863897795"
27
+
28
+ try:
29
+ # Test platform detection
30
+ can_handle = podcast_download_service.can_handle_url(test_url)
31
+ assert can_handle, "Should be able to handle Apple Podcast URL"
32
+
33
+ # Test podcast info extraction
34
+ podcast_info = await podcast_download_service.extract_podcast_info(test_url)
35
+
36
+ assert podcast_info is not None
37
+ assert podcast_info.platform == PodcastPlatform.APPLE
38
+ assert podcast_info.title is not None
39
+ assert len(podcast_info.title) > 0
40
+
41
+ print(f"✅ Successfully extracted Apple Podcast info:")
42
+ print(f" Title: {podcast_info.title}")
43
+ print(f" Platform: {podcast_info.platform}")
44
+ print(f" Episode ID: {podcast_info.episode_id}")
45
+
46
+ except Exception as e:
47
+ print(f"❌ Apple Podcast info extraction failed: {str(e)}")
48
+ pytest.skip(f"Apple Podcast info extraction failed: {str(e)}")
49
+
50
+ @pytest.mark.asyncio
51
+ async def test_xiaoyuzhou_podcast_info_extraction(self, podcast_download_service: PodcastDownloadService):
52
+ """Test XiaoYuZhou podcast information extraction"""
53
+ print("\n🎵 Testing XiaoYuZhou Podcast info extraction...")
54
+
55
+ # Use a test XYZ URL pattern
56
+ test_url = "https://www.xiaoyuzhoufm.com/episode/example123"
57
+
58
+ try:
59
+ # Test platform detection
60
+ can_handle = podcast_download_service.can_handle_url(test_url)
61
+ assert can_handle, "Should be able to handle XiaoYuZhou Podcast URL"
62
+
63
+ # Test podcast info extraction (might fail due to network/content)
64
+ try:
65
+ podcast_info = await podcast_download_service.extract_podcast_info(test_url)
66
+
67
+ assert podcast_info is not None
68
+ assert podcast_info.platform == PodcastPlatform.XIAOYUZHOU
69
+
70
+ print(f"✅ Successfully extracted XiaoYuZhou Podcast info:")
71
+ print(f" Title: {podcast_info.title}")
72
+ print(f" Platform: {podcast_info.platform}")
73
+ print(f" Episode ID: {podcast_info.episode_id}")
74
+
75
+ except Exception as e:
76
+ print(f"⚠️ XiaoYuZhou info extraction failed (expected for test URL): {str(e)}")
77
+
78
+ except Exception as e:
79
+ print(f"❌ XiaoYuZhou platform detection failed: {str(e)}")
80
+
81
+ @pytest.mark.asyncio
82
+ async def test_apple_podcast_download_simulation(self, temp_dir: str):
83
+ """Test Apple podcast download simulation (without actual download)"""
84
+ print("\n🍎 Testing Apple Podcast download simulation...")
85
+
86
+ # Use a known Apple Podcast URL for testing the download flow
87
+ test_url = "https://podcasts.apple.com/us/podcast/the-tim-ferriss-show/id863897795"
88
+
89
+ try:
90
+ # Test the download tool interface
91
+ result = await download_apple_podcast_tool(test_url)
92
+
93
+ print(f"📋 Download tool result:")
94
+ print(f" Status: {result.get('status', 'unknown')}")
95
+ print(f" Original URL: {result.get('original_url', 'N/A')}")
96
+
97
+ if result.get("status") == "success":
98
+ print(f" Audio file path: {result.get('audio_file_path', 'N/A')}")
99
+ print("✅ Apple Podcast download simulation successful")
100
+ else:
101
+ print(f" Error: {result.get('error_message', 'Unknown error')}")
102
+ print("⚠️ Apple Podcast download simulation failed (might be network-related)")
103
+
104
+ except Exception as e:
105
+ print(f"❌ Apple Podcast download test failed: {str(e)}")
106
+ pytest.skip(f"Apple Podcast download test failed: {str(e)}")
107
+
108
+ @pytest.mark.asyncio
109
+ async def test_xiaoyuzhou_podcast_download_simulation(self, temp_dir: str):
110
+ """Test XiaoYuZhou podcast download simulation"""
111
+ print("\n🎵 Testing XiaoYuZhou Podcast download simulation...")
112
+
113
+ # Use a test XYZ URL
114
+ test_url = "https://www.xiaoyuzhoufm.com/episode/example123"
115
+
116
+ try:
117
+ # Test the download tool interface
118
+ result = await download_xyz_podcast_tool(test_url)
119
+
120
+ print(f"📋 Download tool result:")
121
+ print(f" Status: {result.get('status', 'unknown')}")
122
+ print(f" Original URL: {result.get('original_url', 'N/A')}")
123
+
124
+ if result.get("status") == "success":
125
+ print(f" Audio file path: {result.get('audio_file_path', 'N/A')}")
126
+ print("✅ XiaoYuZhou Podcast download simulation successful")
127
+ else:
128
+ print(f" Error: {result.get('error_message', 'Unknown error')}")
129
+ print("⚠️ XiaoYuZhou Podcast download simulation failed (expected for test URL)")
130
+
131
+ except Exception as e:
132
+ print(f"❌ XiaoYuZhou Podcast download test failed: {str(e)}")
133
+ # This is expected for test URLs, so we don't fail the test
134
+
135
+ @pytest.mark.asyncio
136
+ async def test_supported_platforms(self, podcast_download_service: PodcastDownloadService):
137
+ """Test supported platforms detection"""
138
+ print("\n🌐 Testing supported platforms...")
139
+
140
+ platforms = podcast_download_service.get_supported_platforms()
141
+
142
+ assert PodcastPlatform.APPLE in platforms
143
+ assert PodcastPlatform.XIAOYUZHOU in platforms
144
+
145
+ print(f"✅ Supported platforms: {[p.value for p in platforms]}")
146
+
147
+ @pytest.mark.asyncio
148
+ async def test_url_validation(self, podcast_download_service: PodcastDownloadService):
149
+ """Test URL validation"""
150
+ print("\n🔗 Testing URL validation...")
151
+
152
+ test_cases = [
153
+ ("https://podcasts.apple.com/us/podcast/test", True, "Apple Podcast URL"),
154
+ ("https://www.xiaoyuzhoufm.com/episode/test", True, "XiaoYuZhou URL"),
155
+ ("https://example.com/podcast", False, "Generic URL"),
156
+ ("invalid-url", False, "Invalid URL"),
157
+ ]
158
+
159
+ for url, expected, description in test_cases:
160
+ result = podcast_download_service.can_handle_url(url)
161
+ assert result == expected, f"URL validation failed for {description}: {url}"
162
+ print(f"✅ {description}: {'✓' if result else '✗'}")
163
+
164
+ def test_download_tools_initialization(self):
165
+ """Test download tools initialization"""
166
+ print("\n🔧 Testing download tools initialization...")
167
+
168
+ # Test that the tools can be imported
169
+ assert download_apple_podcast_tool is not None
170
+ assert download_xyz_podcast_tool is not None
171
+
172
+ print("✅ Download tools initialized successfully")
173
+
174
+
175
+ if __name__ == "__main__":
176
+ # Run tests with verbose output
177
+ pytest.main([__file__, "-v", "-s"])
tests/test_02_remote_transcription.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test remote GPU transcription functionality
3
+ 测试远程GPU转录功能
4
+ """
5
+
6
+ import pytest
7
+ import asyncio
8
+ import os
9
+ import tempfile
10
+ from pathlib import Path
11
+ from typing import Dict, Any
12
+
13
+ from src.tools.transcription_tools import transcribe_audio_file_tool
14
+ from src.services.audio_processing_service import AudioProcessingService
15
+
16
+
17
+ class TestRemoteTranscription:
18
+ """Test remote GPU transcription integration"""
19
+
20
+ def test_transcription_tools_initialization(self):
21
+ """Test transcription tools initialization"""
22
+ print("\n🔧 Testing transcription tools initialization...")
23
+
24
+ # Test that the tool can be imported
25
+ assert transcribe_audio_file_tool is not None
26
+
27
+ print("✅ Transcription tools initialized successfully")
28
+
29
+ @pytest.mark.asyncio
30
+ async def test_create_sample_audio_file(self, temp_dir: str):
31
+ """Test creating a sample audio file for transcription testing"""
32
+ print("\n🎵 Creating sample audio file for testing...")
33
+
34
+ import ffmpeg
35
+
36
+ # Create a short sample audio file
37
+ sample_file = os.path.join(temp_dir, "sample_audio.mp3")
38
+
39
+ try:
40
+ # Generate a short sine wave audio for testing
41
+ (
42
+ ffmpeg
43
+ .input('sine=frequency=440:duration=5', f='lavfi')
44
+ .output(sample_file, acodec='mp3', ar=16000)
45
+ .overwrite_output()
46
+ .run(quiet=True)
47
+ )
48
+
49
+ assert os.path.exists(sample_file)
50
+ assert os.path.getsize(sample_file) > 0
51
+
52
+ print(f"✅ Sample audio file created: {sample_file}")
53
+ print(f" File size: {os.path.getsize(sample_file)} bytes")
54
+
55
+ return sample_file
56
+
57
+ except Exception as e:
58
+ print(f"❌ Failed to create sample audio file: {str(e)}")
59
+ pytest.skip(f"Failed to create sample audio file: {str(e)}")
60
+
61
+ @pytest.mark.asyncio
62
+ async def test_remote_transcription_endpoint_connectivity(self):
63
+ """Test connectivity to remote transcription endpoint"""
64
+ print("\n🌐 Testing remote transcription endpoint connectivity...")
65
+
66
+ import aiohttp
67
+ import json
68
+
69
+ # Read endpoint config
70
+ try:
71
+ with open("endpoint_config.json", "r") as f:
72
+ endpoint_config = json.load(f)
73
+
74
+ endpoint_url = endpoint_config["transcribe_audio"]
75
+
76
+ async with aiohttp.ClientSession() as session:
77
+ # Test with a simple HEAD request to check if endpoint is reachable
78
+ async with session.head(endpoint_url, timeout=10) as response:
79
+ print(f"✅ Endpoint connectivity test:")
80
+ print(f" URL: {endpoint_url}")
81
+ print(f" Status: {response.status}")
82
+ print(f" Headers: {dict(response.headers)}")
83
+
84
+ # We expect either 200 (OK) or 405 (Method Not Allowed) for HEAD requests
85
+ assert response.status in [200, 405, 404], f"Unexpected status: {response.status}"
86
+
87
+ except asyncio.TimeoutError:
88
+ print(f"⚠️ Endpoint connectivity timeout (expected if Modal is sleeping)")
89
+ pytest.skip("Endpoint connectivity timeout")
90
+ except Exception as e:
91
+ print(f"⚠️ Endpoint connectivity test failed: {str(e)}")
92
+ print(" This might be expected if Modal endpoint is not running")
93
+
94
+ @pytest.mark.asyncio
95
+ async def test_transcription_tool_interface(self, temp_dir: str):
96
+ """Test transcription tool interface with sample audio"""
97
+ print("\n🎤 Testing transcription tool interface...")
98
+
99
+ # Create a sample audio file first
100
+ sample_file = await self.test_create_sample_audio_file(temp_dir)
101
+
102
+ try:
103
+ # Test the transcription tool
104
+ result = await transcribe_audio_file_tool(
105
+ audio_file_path=sample_file,
106
+ model_size="base",
107
+ language="en",
108
+ output_format="srt",
109
+ enable_speaker_diarization=False
110
+ )
111
+
112
+ print(f"📋 Transcription tool result:")
113
+ print(f" Status: {result.get('processing_status', 'unknown')}")
114
+ print(f" Audio file: {result.get('audio_file', 'N/A')}")
115
+ print(f" Model used: {result.get('model_used', 'N/A')}")
116
+ print(f" Duration: {result.get('audio_duration', 0):.2f}s")
117
+
118
+ if result.get("processing_status") == "success":
119
+ print(f" TXT file: {result.get('txt_file_path', 'N/A')}")
120
+ print(f" SRT file: {result.get('srt_file_path', 'N/A')}")
121
+ print(f" Segments: {result.get('segment_count', 0)}")
122
+ print("✅ Transcription tool interface test successful")
123
+
124
+ # Verify output files exist
125
+ if result.get('txt_file_path'):
126
+ assert os.path.exists(result['txt_file_path'])
127
+ if result.get('srt_file_path'):
128
+ assert os.path.exists(result['srt_file_path'])
129
+
130
+ else:
131
+ print(f" Error: {result.get('error_message', 'Unknown error')}")
132
+ print("⚠️ Transcription failed (might be due to remote endpoint)")
133
+
134
+ except Exception as e:
135
+ print(f"❌ Transcription tool test failed: {str(e)}")
136
+ print(" This might be expected if remote endpoint is not available")
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_transcription_with_speaker_diarization(self, temp_dir: str):
140
+ """Test transcription with speaker diarization enabled"""
141
+ print("\n👥 Testing transcription with speaker diarization...")
142
+
143
+ # Create a sample audio file
144
+ sample_file = await self.test_create_sample_audio_file(temp_dir)
145
+
146
+ try:
147
+ # Test transcription with speaker diarization
148
+ result = await transcribe_audio_file_tool(
149
+ audio_file_path=sample_file,
150
+ model_size="base",
151
+ language="auto",
152
+ output_format="srt",
153
+ enable_speaker_diarization=True
154
+ )
155
+
156
+ print(f"📋 Speaker diarization result:")
157
+ print(f" Status: {result.get('processing_status', 'unknown')}")
158
+ print(f" Speaker diarization enabled: {result.get('speaker_diarization_enabled', False)}")
159
+ print(f" Global speaker count: {result.get('global_speaker_count', 0)}")
160
+
161
+ if result.get("processing_status") == "success":
162
+ speaker_summary = result.get('speaker_summary', {})
163
+ print(f" Speaker summary: {speaker_summary}")
164
+ print("✅ Speaker diarization test successful")
165
+ else:
166
+ print(f" Error: {result.get('error_message', 'Unknown error')}")
167
+ print("⚠️ Speaker diarization failed (might be due to remote endpoint or HF token)")
168
+
169
+ except Exception as e:
170
+ print(f"❌ Speaker diarization test failed: {str(e)}")
171
+ print(" This might be expected if HF token is not configured or endpoint unavailable")
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_different_transcription_models(self, temp_dir: str):
175
+ """Test transcription with different models"""
176
+ print("\n🧠 Testing different transcription models...")
177
+
178
+ sample_file = await self.test_create_sample_audio_file(temp_dir)
179
+
180
+ models_to_test = ["tiny", "base", "small"]
181
+
182
+ for model in models_to_test:
183
+ print(f"\n Testing model: {model}")
184
+ try:
185
+ result = await transcribe_audio_file_tool(
186
+ audio_file_path=sample_file,
187
+ model_size=model,
188
+ language="auto",
189
+ output_format="txt",
190
+ enable_speaker_diarization=False
191
+ )
192
+
193
+ if result.get("processing_status") == "success":
194
+ print(f" ✅ {model} model: Success")
195
+ print(f" Segments: {result.get('segment_count', 0)}")
196
+ print(f" Duration: {result.get('audio_duration', 0):.2f}s")
197
+ else:
198
+ print(f" ⚠️ {model} model: Failed - {result.get('error_message', 'Unknown')}")
199
+
200
+ except Exception as e:
201
+ print(f" ❌ {model} model: Exception - {str(e)}")
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_transcription_output_formats(self, temp_dir: str):
205
+ """Test different transcription output formats"""
206
+ print("\n📄 Testing different output formats...")
207
+
208
+ sample_file = await self.test_create_sample_audio_file(temp_dir)
209
+
210
+ formats_to_test = ["txt", "srt", "json"]
211
+
212
+ for format_type in formats_to_test:
213
+ print(f"\n Testing format: {format_type}")
214
+ try:
215
+ result = await transcribe_audio_file_tool(
216
+ audio_file_path=sample_file,
217
+ model_size="base",
218
+ language="auto",
219
+ output_format=format_type,
220
+ enable_speaker_diarization=False
221
+ )
222
+
223
+ if result.get("processing_status") == "success":
224
+ print(f" ✅ {format_type} format: Success")
225
+
226
+ # Check for format-specific outputs
227
+ if format_type == "txt" and result.get('txt_file_path'):
228
+ assert os.path.exists(result['txt_file_path'])
229
+ elif format_type == "srt" and result.get('srt_file_path'):
230
+ assert os.path.exists(result['srt_file_path'])
231
+
232
+ else:
233
+ print(f" ⚠️ {format_type} format: Failed - {result.get('error_message', 'Unknown')}")
234
+
235
+ except Exception as e:
236
+ print(f" ❌ {format_type} format: Exception - {str(e)}")
237
+
238
+
239
+ if __name__ == "__main__":
240
+ # Run tests with verbose output
241
+ pytest.main([__file__, "-v", "-s"])