Spaces:
Build error
Build error
Adds files 3
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_3/-1gCG8m1SHU_filtered.json +1 -0
- annotations_3/-3mo5CqjvWs_filtered.json +1 -0
- annotations_3/-3ywc_7_IE8_filtered.json +1 -0
- annotations_3/-4GsCEopbd4_filtered.json +1 -0
- annotations_3/-5798-VRVYA_filtered.json +1 -0
- annotations_3/-5Pku48YPFo_filtered.json +1 -0
- annotations_3/-5Rohhkg-7k_filtered.json +1 -0
- annotations_3/-6fuDrAmhNc_filtered.json +1 -0
- annotations_3/-7-C6lSAfOs_filtered.json +1 -0
- annotations_3/-85ubSkzSWg_filtered.json +1 -0
- annotations_3/-98BSUhcZtY_filtered.json +1 -0
- annotations_3/-9IgLueodZA_filtered.json +1 -0
- annotations_3/-A-fBbIXbPo_filtered.json +1 -0
- annotations_3/-A9rFt7ITy4_filtered.json +1 -0
- annotations_3/-AlTccRsRsk_filtered.json +1 -0
- annotations_3/-ArVBL8EgKU_filtered.json +1 -0
- annotations_3/-BUI1BdZz94_filtered.json +1 -0
- annotations_3/-BiLCJxpqi4_filtered.json +1 -0
- annotations_3/-C7Fcg58rZU_filtered.json +1 -0
- annotations_3/-CgUGjRFukQ_filtered.json +1 -0
- annotations_3/-CzO7z1dZ1A_filtered.json +1 -0
- annotations_3/-EZ9f-GgWVQ_filtered.json +1 -0
- annotations_3/-EuO6OFypLo_filtered.json +1 -0
- annotations_3/-F1-sTyGvwA_filtered.json +1 -0
- annotations_3/-IZv4Jfl6ZQ_filtered.json +1 -0
- annotations_3/-Ixi48TxkaA_filtered.json +1 -0
- annotations_3/-J_tiDK1tEA_filtered.json +1 -0
- annotations_3/-JbSkxI2DrY_filtered.json +1 -0
- annotations_3/-KOG8edoC00_filtered.json +1 -0
- annotations_3/-L3D0BL9ieA_filtered.json +1 -0
- annotations_3/-L9EZRMgmXM_filtered.json +1 -0
- annotations_3/-LjxKR0q7Yo_filtered.json +1 -0
- annotations_3/-MEOfLvOuas_filtered.json +1 -0
- annotations_3/-MQNNzaEt2s_filtered.json +1 -0
- annotations_3/-Mmq6Kmd75I_filtered.json +1 -0
- annotations_3/-NW-w5Z_vpk_filtered.json +1 -0
- annotations_3/-NgmhVRFApQ_filtered.json +1 -0
- annotations_3/-NtpPdMGluE_filtered.json +1 -0
- annotations_3/-QB2gXiOAKc_filtered.json +1 -0
- annotations_3/-RBjiJto4hc_filtered.json +1 -0
- annotations_3/-RJ6USD2nEU_filtered.json +1 -0
- annotations_3/-RhmDUj_GJs_filtered.json +1 -0
- annotations_3/-SKfkvvtqN0_filtered.json +1 -0
- annotations_3/-T16rxR-nCo_filtered.json +1 -0
- annotations_3/-TPRG6Yqzf4_filtered.json +1 -0
- annotations_3/-TWsZukTS4Q_filtered.json +1 -0
- annotations_3/-TogGxzlfhM_filtered.json +1 -0
- annotations_3/-UAElWXbk3I_filtered.json +1 -0
- annotations_3/-UAV4O9oZy0_filtered.json +1 -0
- annotations_3/-UJ9K8lMxPA_filtered.json +1 -0
annotations_3/-1gCG8m1SHU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 21.51], [23.0, 29.44], [31.0, 32.14], [38.0, 40.2], [43.0, 44.31], [47.0, 46.79], [48.0, 50.19], [54.0, 53.77], [59.0, 59.85], [61.0, 61.05], [62.0, 63.46], [64.0, 64.57], [66.0, 66.75], [70.0, 70.95], [72.0, 73.08], [74.0, 74.8], [77.0, 77.3], [78.0, 79.47], [83.0, 84.65], [85.0, 96.15], [99.0, 113.75], [115.0, 123.8], [125.0, 126.12]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [40.95, 98.73, 0.0, 99.73, 0.0, 0.0, 99.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30.99, 31.55, 54.63, 0.0], "audiomae_on_audioset": [[["music", 35.95], ["hum", 13.7], ["noise", 5.28]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 42.38], ["theremin", 8.86], ["hum", 4.46]], [["music", 47.45], ["hum", 18.65], ["throbbing", 9.19]], null, null], "duration": [17.51, 6.44, 1.14, 2.2, 1.31, -0.21, 2.19, -0.23, 0.85, 0.05, 1.46, 0.57, 0.75, 0.95, 1.08, 0.8, 0.3, 1.47, 1.65, 11.15, 14.75, 8.8, 1.12]}
|
annotations_3/-3mo5CqjvWs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.22], [7.0, 6.89], [9.0, 8.85], [25.0, 25.24], [28.0, 27.68], [31.0, 31.48]], "keep_status": [false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null], "duration": [0.22, -0.11, -0.15, 0.24, -0.32, 0.48]}
|
annotations_3/-3ywc_7_IE8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 12.68], [19.0, 37.59], [41.0, 41.98], [49.0, 63.66], [67.0, 114.02], [115.0, 123.55], [126.0, 126.84], [134.0, 134.45], [136.0, 137.25], [139.0, 138.96], [144.0, 144.83], [145.0, 145.47], [156.0, 156.44], [157.0, 157.18], [158.0, 158.57], [160.0, 161.0], [168.0, 168.49], [171.0, 173.43], [174.0, 176.01]], "keep_status": [true, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [34.31, 31.05, 0.0, 31.15, 0.0, 32.53, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 96.04, 99.9], "audiomae_on_audioset": [[["cacophony", 9.36], ["fly, housefly", 5.02], ["cheering", 4.37]], [["fly, housefly", 54.14], ["insect", 15.95], ["bee, wasp, etc.", 11.82]], null, [["bee, wasp, etc.", 42.33], ["fly, housefly", 26.81], ["insect", 13.73]], null, [["speech", 21.02], ["whack, thwack", 12.44], ["livestock, farm animals, working animals", 5.97]], null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.68, 18.59, 0.98, 14.66, 47.02, 8.55, 0.84, 0.45, 1.25, -0.04, 0.83, 0.47, 0.44, 0.18, 0.57, 1.0, 0.49, 2.43, 2.01]}
|
annotations_3/-4GsCEopbd4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[32.0, 52.84], [53.0, 63.14], [64.0, 81.58], [83.0, 94.9], [97.0, 100.3], [102.0, 103.0], [104.0, 106.0], [107.0, 108.97], [110.0, 120.66], [123.0, 130.13], [132.0, 133.24], [137.0, 137.56], [139.0, 139.11], [139.0, 143.53], [144.0, 171.59]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, true], "silence_prob": [69.47, 98.51, 99.96, 100.0, 99.16, 0.0, 99.87, 0.0, 37.81, 35.33, 0.0, 0.0, 0.0, 32.88, 30.79], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 57.7], ["music", 12.4], ["theremin", 8.34]], [["speech", 61.66], ["explosion", 7.3], ["firecracker", 6.96]], null, null, null, [["speech", 42.44], ["fart", 13.31], ["explosion", 10.21]], [["speech", 52.19], ["fly, housefly", 9.28], ["music", 8.03]]], "duration": [20.84, 10.14, 17.58, 11.9, 3.3, 1.0, 2.0, 1.97, 10.66, 7.13, 1.24, 0.56, 0.11, 4.53, 27.59]}
|
annotations_3/-5798-VRVYA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.92], [5.0, 5.26], [6.0, 6.29], [9.0, 11.03], [11.0, 11.84], [14.0, 14.05], [17.0, 18.25], [19.0, 20.36], [21.0, 21.34], [23.0, 24.06], [27.0, 27.14], [28.0, 28.86], [30.0, 30.65], [32.0, 33.98], [37.0, 37.86], [39.0, 39.02], [40.0, 40.34], [44.0, 44.91], [46.0, 47.51], [48.0, 48.02], [49.0, 50.72], [52.0, 52.76], [54.0, 54.77], [56.0, 57.01], [58.0, 59.48], [60.0, 60.54], [62.0, 66.66], [68.0, 72.22], [73.0, 74.19], [75.0, 76.18], [77.0, 79.98], [83.0, 84.57], [87.0, 90.64], [92.0, 97.43], [98.0, 101.38], [102.0, 106.74], [107.0, 110.03], [112.0, 146.74]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 84.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 68.41, 36.03, 0.0, 0.0, 73.97, 0.0, 83.52, 48.23, 85.54, 51.5, 43.98, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["burping, eructation", 16.08], ["domestic animals, pets", 10.18], ["hum", 10.12]], null, null, null, null, null, [["hum", 47.0], ["mains hum", 28.83], ["speech", 6.35]], null, null, [["speech", 62.57], ["hum", 6.04], ["mains hum", 3.86]], null], "duration": [0.92, 0.26, 0.29, 2.03, 0.84, 0.05, 1.25, 1.36, 0.34, 1.06, 0.14, 0.86, 0.65, 1.98, 0.86, 0.02, 0.34, 0.91, 1.51, 0.02, 1.72, 0.76, 0.77, 1.01, 1.48, 0.54, 4.66, 4.22, 1.19, 1.18, 2.98, 1.57, 3.64, 5.43, 3.38, 4.74, 3.03, 34.74]}
|
annotations_3/-5Pku48YPFo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.4], [13.0, 17.25], [28.0, 31.53], [35.0, 40.85], [43.0, 93.6], [97.0, 102.05], [102.0, 102.78], [103.0, 104.14], [106.0, 109.41], [110.0, 118.64]], "keep_status": [false, true, true, false, false, true, false, false, false, false], "silence_prob": [0.0, 34.91, 38.11, 30.81, 0.0, 32.26, 0.0, 0.0, 35.07, 75.72], "audiomae_on_audioset": [null, [["speech", 22.11], ["screaming", 17.24], ["thunk", 10.79]], [["whale vocalization", 19.7], ["dog", 10.82], ["music", 9.72]], [["music", 69.01], ["musical instrument", 4.65], ["domestic animals, pets", 2.95]], null, [["speech", 37.14], ["vehicle", 17.56], ["radio", 8.43]], null, null, [["speech", 51.87], ["vehicle", 15.02], ["hum", 3.86]], null], "duration": [0.4, 4.25, 3.53, 5.85, 50.6, 5.05, 0.78, 1.14, 3.41, 8.64]}
|
annotations_3/-5Rohhkg-7k_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.94], [8.0, 9.31], [10.0, 11.06], [12.0, 12.31], [14.0, 14.39], [15.0, 15.69], [16.0, 16.92], [19.0, 19.33], [20.0, 20.76], [24.0, 25.17], [27.0, 29.67], [33.0, 33.0], [37.0, 37.84], [39.0, 39.34], [40.0, 41.3], [42.0, 44.2], [46.0, 46.41], [49.0, 48.78], [51.0, 52.12], [53.0, 53.59], [56.0, 57.35], [59.0, 60.12], [61.0, 61.94], [62.0, 62.77], [64.0, 65.53], [67.0, 68.99], [72.0, 72.82], [75.0, 75.69], [77.0, 77.3], [79.0, 80.42], [84.0, 84.3], [85.0, 85.9], [87.0, 87.59], [89.0, 93.24], [95.0, 96.13], [97.0, 97.95], [100.0, 103.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 83.7, 0.0, 0.0, 0.0, 0.0, 69.47, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 90.25, 0.0, 0.0, 98.36], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.94, 1.31, 1.06, 0.31, 0.39, 0.69, 0.92, 0.33, 0.76, 1.17, 2.67, 0.0, 0.84, 0.34, 1.3, 2.2, 0.41, -0.22, 1.12, 0.59, 1.35, 1.12, 0.94, 0.77, 1.53, 1.99, 0.82, 0.69, 0.3, 1.42, 0.3, 0.9, 0.59, 4.24, 1.13, 0.95, 3.76]}
|
annotations_3/-6fuDrAmhNc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[24.0, 26.13], [27.0, 28.88], [32.0, 32.17], [33.0, 38.03], [38.0, 40.19], [41.0, 44.02], [45.0, 46.45], [47.0, 47.63], [49.0, 49.91], [54.0, 54.67], [55.0, 56.32], [58.0, 58.46], [61.0, 61.74], [62.0, 63.98], [65.0, 65.4], [69.0, 69.99], [73.0, 73.89], [75.0, 76.54], [78.0, 84.0], [88.0, 97.07], [98.0, 98.78], [103.0, 155.43], [159.0, 179.0]], "keep_status": [false, false, false, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [37.01, 0.0, 0.0, 43.45, 32.83, 35.91, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.32, 31.62, 0.0, 0.0, 34.35], "audiomae_on_audioset": [[["hum", 40.54], ["mains hum", 26.25], ["speech", 5.13]], null, null, [["hum", 22.65], ["mains hum", 17.99], ["music", 17.63]], [["music", 31.89], ["speech", 15.71], ["cattle, bovinae", 7.73]], [["music", 29.62], ["foghorn", 13.93], ["speech", 6.49]], null, null, null, null, null, null, null, null, null, null, null, null, [["music", 60.31], ["theremin", 11.38], ["hum", 2.41]], [["music", 53.67], ["brass instrument", 12.72], ["musical instrument", 8.37]], null, null, [["music", 40.6], ["didgeridoo", 12.31], ["gong", 10.97]]], "duration": [2.13, 1.88, 0.17, 5.03, 2.19, 3.02, 1.45, 0.63, 0.91, 0.67, 1.32, 0.46, 0.74, 1.98, 0.4, 0.99, 0.89, 1.54, 6.0, 9.07, 0.78, 52.43, 20.0]}
|
annotations_3/-7-C6lSAfOs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[30.0, 76.38], [77.0, 76.99], [79.0, 79.69], [80.0, 81.9], [83.0, 83.78], [85.0, 103.96], [107.0, 107.38], [112.0, 113.68]], "keep_status": [false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 34.04, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 58.1], ["synthesizer", 6.42], ["musical instrument", 3.74]], null, null], "duration": [46.38, -0.01, 0.69, 1.9, 0.78, 18.96, 0.38, 1.68]}
|
annotations_3/-85ubSkzSWg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.99], [15.0, 14.64], [16.0, 20.71], [22.0, 22.96], [23.0, 23.62], [24.0, 24.85], [25.0, 27.18], [28.0, 30.72], [32.0, 33.2], [33.0, 34.4], [36.0, 36.54], [38.0, 40.76], [42.0, 42.84], [44.0, 45.91], [48.0, 50.75], [56.0, 66.06]], "keep_status": [false, false, true, false, false, false, false, true, false, false, false, true, false, false, true, true], "silence_prob": [0.0, 0.0, 38.46, 0.0, 0.0, 0.0, 35.38, 34.84, 0.0, 0.0, 0.0, 33.42, 0.0, 0.0, 42.11, 33.01], "audiomae_on_audioset": [null, null, [["music", 56.86], ["musical instrument", 5.27], ["synthesizer", 3.65]], null, null, null, [["speech", 53.41], ["music", 20.83], ["radio", 4.67]], [["music", 21.99], ["mains hum", 19.4], ["hum", 13.14]], null, null, null, [["music", 32.01], ["speech", 25.74], ["radio", 9.71]], null, null, [["hum", 30.98], ["speech", 20.05], ["music", 8.82]], [["hum", 22.59], ["sidetone", 20.95], ["mains hum", 18.14]]], "duration": [1.99, -0.36, 4.71, 0.96, 0.62, 0.85, 2.18, 2.72, 1.2, 1.4, 0.54, 2.76, 0.84, 1.91, 2.75, 10.06]}
|
annotations_3/-98BSUhcZtY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[17.0, 16.95], [23.0, 24.24], [33.0, 34.2], [39.0, 41.27], [43.0, 44.42], [47.0, 47.7]], "keep_status": [false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 42.26, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["hum", 47.69], ["mains hum", 18.88], ["throbbing", 10.53]], null, null], "duration": [-0.05, 1.24, 1.2, 2.27, 1.42, 0.7]}
|
annotations_3/-9IgLueodZA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.73], [7.0, 9.44], [10.0, 11.7], [15.0, 15.26], [24.0, 24.09], [25.0, 27.26], [28.0, 33.27], [37.0, 37.32], [38.0, 39.36], [41.0, 47.78], [50.0, 53.38]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 68.28, 0.0, 0.0, 0.0, 65.44, 69.74, 0.0, 0.0, 100.0, 92.15], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [0.73, 2.44, 1.7, 0.26, 0.09, 2.26, 5.27, 0.32, 1.36, 6.78, 3.38]}
|
annotations_3/-A-fBbIXbPo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 9.78], [13.0, 14.96], [16.0, 20.09], [22.0, 24.02], [25.0, 26.28], [28.0, 33.52], [36.0, 40.32], [41.0, 52.84], [54.0, 59.63], [61.0, 71.81], [73.0, 72.79]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [92.48, 0.0, 97.73, 100.0, 0.0, 99.99, 99.93, 96.42, 94.07, 74.92, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [5.78, 1.96, 4.09, 2.02, 1.28, 5.52, 4.32, 11.84, 5.63, 10.81, -0.21]}
|
annotations_3/-A9rFt7ITy4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[34.0, 33.86], [36.0, 36.41], [38.0, 39.02], [42.0, 43.14], [45.0, 45.82], [46.0, 47.26], [48.0, 48.44], [50.0, 50.4], [51.0, 51.24], [53.0, 52.89], [58.0, 58.35], [62.0, 62.51]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.14, 0.41, 1.02, 1.14, 0.82, 1.26, 0.44, 0.4, 0.24, -0.11, 0.35, 0.51]}
|
annotations_3/-AlTccRsRsk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.83], [8.0, 8.26], [10.0, 10.29], [12.0, 13.53], [14.0, 16.06], [19.0, 28.41], [30.0, 32.31], [40.0, 46.38], [51.0, 52.98], [56.0, 57.01], [60.0, 59.75], [67.0, 67.98], [74.0, 74.83], [76.0, 77.72], [81.0, 81.9], [84.0, 86.1], [89.0, 89.18], [90.0, 90.98], [91.0, 91.2]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 30.32, 30.62, 46.02, 55.89, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 86.27, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["music", 57.87], ["didgeridoo", 10.98], ["speech", 8.96]], [["music", 65.97], ["synthesizer", 13.99], ["musical instrument", 5.17]], [["music", 18.28], ["didgeridoo", 6.8], ["hum", 6.12]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.83, 0.26, 0.29, 1.53, 2.06, 9.41, 2.31, 6.38, 1.98, 1.01, -0.25, 0.98, 0.83, 1.72, 0.9, 2.1, 0.18, 0.98, 0.2]}
|
annotations_3/-ArVBL8EgKU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[28.0, 53.81], [59.0, 81.4], [85.0, 107.87], [111.0, 110.78], [112.0, 114.15], [116.0, 116.73]], "keep_status": [false, true, true, false, true, false], "silence_prob": [31.32, 30.02, 30.8, 0.0, 31.94, 0.0], "audiomae_on_audioset": [[["music", 67.23], ["throbbing", 9.77], ["hum", 3.68]], [["music", 37.68], ["buzz", 4.27], ["speech", 3.73]], [["music", 56.65], ["theremin", 5.83], ["didgeridoo", 3.89]], null, [["music", 20.95], ["mains hum", 11.66], ["hum", 11.38]], null], "duration": [25.81, 22.4, 22.87, -0.22, 2.15, 0.73]}
|
annotations_3/-BUI1BdZz94_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.51], [5.0, 4.78], [7.0, 9.26], [11.0, 14.44], [16.0, 18.77], [19.0, 22.42], [23.0, 27.73], [29.0, 31.16], [32.0, 43.9], [45.0, 46.33], [47.0, 49.96], [50.0, 50.03], [50.0, 50.23], [51.0, 52.25], [61.0, 61.86], [66.0, 67.61], [70.0, 72.55], [74.0, 74.48], [77.0, 78.19], [78.0, 78.22], [78.0, 80.06], [95.0, 96.23], [98.0, 98.0], [106.0, 109.49], [110.0, 113.54], [116.0, 123.89]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, true, true, true], "silence_prob": [0.0, 0.0, 52.74, 50.31, 42.62, 57.01, 68.15, 79.76, 61.08, 0.0, 53.34, 0.0, 0.0, 0.0, 0.0, 0.0, 46.33, 0.0, 0.0, 0.0, 33.8, 0.0, 0.0, 39.88, 43.82, 42.02], "audiomae_on_audioset": [null, null, null, null, [["sidetone", 77.86], ["speech", 10.47], ["hum", 2.52]], null, null, null, null, null, null, null, null, null, null, null, [["speech", 30.93], ["music", 21.64], ["didgeridoo", 9.88]], null, null, null, [["speech", 81.71], ["inside, small room", 2.28], ["hum", 1.83]], null, null, [["music", 43.33], ["didgeridoo", 5.27], ["mains hum", 5.1]], [["music", 35.91], ["didgeridoo", 14.97], ["whale vocalization", 6.16]], [["music", 34.09], ["hum", 13.27], ["gong", 12.95]]], "duration": [0.51, -0.22, 2.26, 3.44, 2.77, 3.42, 4.73, 2.16, 11.9, 1.33, 2.96, 0.03, 0.23, 1.25, 0.86, 1.61, 2.55, 0.48, 1.19, 0.22, 2.06, 1.23, 0.0, 3.49, 3.54, 7.89]}
|
annotations_3/-BiLCJxpqi4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[54.0, 60.37], [62.0, 67.19], [68.0, 69.58], [70.0, 71.46], [73.0, 73.52], [74.0, 75.63]], "keep_status": [true, false, false, false, false, false], "silence_prob": [38.54, 43.9, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 25.44], ["hum", 22.22], ["throbbing", 14.46]], [["hum", 36.73], ["music", 22.02], ["throbbing", 14.96]], null, null, null, null], "duration": [6.37, 5.19, 1.58, 1.46, 0.52, 1.63]}
|
annotations_3/-C7Fcg58rZU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.86], [5.0, 19.09], [33.0, 35.87], [46.0, 46.79], [48.0, 48.3], [55.0, 58.13]], "keep_status": [false, false, true, false, false, false], "silence_prob": [0.0, 30.24, 40.01, 0.0, 0.0, 34.16], "audiomae_on_audioset": [null, [["music", 48.55], ["throbbing", 17.16], ["hum", 16.5]], [["music", 42.11], ["musical instrument", 6.9], ["synthesizer", 5.57]], null, null, [["speech", 59.17], ["explosion", 8.36], ["busy signal", 4.23]]], "duration": [1.86, 14.09, 2.87, 0.79, 0.3, 3.13]}
|
annotations_3/-CgUGjRFukQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.46], [17.0, 17.29], [23.0, 24.27], [26.0, 27.35], [29.0, 29.3], [31.0, 31.95], [33.0, 34.57], [35.0, 35.95], [38.0, 38.8], [41.0, 41.1], [47.0, 47.19], [49.0, 51.26], [52.0, 55.46], [60.0, 62.02], [65.0, 66.07], [71.0, 71.86], [77.0, 77.65]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 43.18, 48.06, 37.19, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["sidetone", 87.34], ["speech", 6.73], ["whale vocalization", 1.9]], [["didgeridoo", 50.89], ["music", 26.01], ["musical instrument", 2.61]], [["music", 12.37], ["hum", 8.58], ["vehicle", 5.67]], null, null, null], "duration": [1.46, 0.29, 1.27, 1.35, 0.3, 0.95, 1.57, 0.95, 0.8, 0.1, 0.19, 2.26, 3.46, 2.02, 1.07, 0.86, 0.65]}
|
annotations_3/-CzO7z1dZ1A_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[38.0, 45.0], [53.0, 53.6], [58.0, 60.51], [63.0, 63.98], [69.0, 87.47]], "keep_status": [false, false, false, false, false], "silence_prob": [88.46, 0.0, 92.48, 0.0, 99.82], "audiomae_on_audioset": [null, null, null, null, null], "duration": [7.0, 0.6, 2.51, 0.98, 18.47]}
|
annotations_3/-EZ9f-GgWVQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 7.84], [9.0, 10.12], [12.0, 13.24], [16.0, 17.39], [19.0, 21.52], [23.0, 25.2], [33.0, 33.98], [35.0, 35.16], [38.0, 42.33], [43.0, 44.42], [46.0, 47.92], [48.0, 48.09], [49.0, 49.08], [52.0, 52.69], [54.0, 55.34], [57.0, 57.18], [60.0, 61.21], [62.0, 63.12], [65.0, 65.64], [66.0, 66.82], [68.0, 68.77], [70.0, 75.0], [78.0, 79.03], [81.0, 82.51], [84.0, 85.5], [88.0, 88.13], [90.0, 91.29], [96.0, 97.53], [99.0, 99.17], [101.0, 101.12], [102.0, 104.55], [112.0, 116.61], [117.0, 118.42], [120.0, 133.79], [134.0, 137.03], [138.0, 144.29], [147.0, 148.29], [150.0, 150.89], [156.0, 157.11], [159.0, 158.97], [160.0, 160.79], [163.0, 166.85]], "keep_status": [false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, false, false], "silence_prob": [60.14, 0.0, 0.0, 0.0, 37.93, 41.87, 0.0, 0.0, 97.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 96.89, 38.59, 0.0, 33.98, 79.59, 77.36, 0.0, 0.0, 0.0, 0.0, 0.0, 78.38], "audiomae_on_audioset": [null, null, null, null, [["water", 19.27], ["speech", 13.46], ["liquid", 13.21]], [["sidetone", 74.54], ["speech", 17.17], ["radio", 1.19]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["thunk", 32.92], ["speech", 13.75], ["chirp tone", 6.37]], null, [["speech", 42.61], ["thunk", 6.13], ["goat", 3.46]], null, null, null, null, null, null, null, null], "duration": [2.84, 1.12, 1.24, 1.39, 2.52, 2.2, 0.98, 0.16, 4.33, 1.42, 1.92, 0.09, 0.08, 0.69, 1.34, 0.18, 1.21, 1.12, 0.64, 0.82, 0.77, 5.0, 1.03, 1.51, 1.5, 0.13, 1.29, 1.53, 0.17, 0.12, 2.55, 4.61, 1.42, 13.79, 3.03, 6.29, 1.29, 0.89, 1.11, -0.03, 0.79, 3.85]}
|
annotations_3/-EuO6OFypLo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 9.07], [11.0, 11.15], [15.0, 23.67], [25.0, 25.49], [27.0, 27.65], [29.0, 29.76], [34.0, 35.68], [42.0, 42.43], [45.0, 45.66], [47.0, 48.09], [54.0, 55.29], [60.0, 61.33], [66.0, 68.93], [70.0, 71.46], [72.0, 72.93], [85.0, 85.43], [87.0, 89.18], [95.0, 98.56], [102.0, 105.31], [109.0, 111.15], [114.0, 121.85], [122.0, 126.89], [128.0, 130.74], [132.0, 145.89]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, true], "silence_prob": [0.0, 0.0, 31.95, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 59.15, 0.0, 0.0, 0.0, 59.15, 56.33, 54.43, 50.76, 39.69, 48.23, 53.72, 36.71], "audiomae_on_audioset": [null, null, [["didgeridoo", 54.15], ["music", 22.74], ["speech", 14.44]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["hum", 17.55], ["sidetone", 13.26], ["sine wave", 9.82]], [["sidetone", 36.27], ["speech", 7.18], ["hum", 6.21]], null, [["electric shaver, electric razor", 43.36], ["hum", 7.11], ["music", 4.27]]], "duration": [0.07, 0.15, 8.67, 0.49, 0.65, 0.76, 1.68, 0.43, 0.66, 1.09, 1.29, 1.33, 2.93, 1.46, 0.93, 0.43, 2.18, 3.56, 3.31, 2.15, 7.85, 4.89, 2.74, 13.89]}
|
annotations_3/-F1-sTyGvwA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 11.67], [12.0, 14.07], [15.0, 16.65], [18.0, 31.94], [35.0, 36.56], [38.0, 40.61], [41.0, 42.6], [43.0, 57.03], [58.0, 60.25], [61.0, 66.48], [68.0, 69.15], [69.0, 71.79], [73.0, 76.81], [77.0, 79.84], [81.0, 85.78], [86.0, 88.13], [90.0, 92.79], [94.0, 96.55], [97.0, 96.69], [97.0, 100.08], [100.0, 114.72], [116.0, 118.49], [119.0, 120.24], [121.0, 120.8], [121.0, 120.92]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 99.44, 0.0, 99.56, 0.0, 99.78, 0.0, 96.77, 98.66, 99.26, 0.0, 89.9, 93.6, 97.73, 88.83, 91.3, 97.33, 94.22, 0.0, 91.13, 78.55, 65.79, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.67, 2.07, 1.65, 13.94, 1.56, 2.61, 1.6, 14.03, 2.25, 5.48, 1.15, 2.79, 3.81, 2.84, 4.78, 2.13, 2.79, 2.55, -0.31, 3.08, 14.72, 2.49, 1.24, -0.2, -0.08]}
|
annotations_3/-IZv4Jfl6ZQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 9.48], [11.0, 17.63], [23.0, 24.68], [28.0, 29.08], [34.0, 35.72], [38.0, 40.26], [46.0, 51.06], [58.0, 60.24], [62.0, 65.47], [67.0, 70.7], [73.0, 74.11], [80.0, 81.73], [94.0, 96.13], [98.0, 99.57], [100.0, 100.63], [102.0, 104.08]], "keep_status": [false, true, false, false, false, true, false, false, false, false, false, false, true, false, false, false], "silence_prob": [0.0, 36.3, 0.0, 0.0, 0.0, 30.35, 30.95, 30.75, 30.32, 30.31, 0.0, 0.0, 49.64, 0.0, 0.0, 50.11], "audiomae_on_audioset": [null, [["music", 43.89], ["guitar", 13.73], ["musical instrument", 10.76]], null, null, null, [["music", 40.42], ["fart", 11.53], ["fly, housefly", 7.98]], [["music", 64.97], ["didgeridoo", 9.36], ["guitar", 3.49]], [["music", 65.27], ["musical instrument", 6.16], ["guitar", 4.76]], [["music", 74.18], ["musical instrument", 3.03], ["psychedelic rock", 2.24]], [["music", 79.02], ["theremin", 10.26], ["musical instrument", 1.99]], null, null, [["hum", 34.48], ["eruption", 10.34], ["throbbing", 10.06]], null, null, null], "duration": [0.48, 6.63, 1.68, 1.08, 1.72, 2.26, 5.06, 2.24, 3.47, 3.7, 1.11, 1.73, 2.13, 1.57, 0.63, 2.08]}
|
annotations_3/-Ixi48TxkaA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.91], [4.0, 5.59], [13.0, 13.83], [17.0, 16.88], [20.0, 21.09], [23.0, 23.65], [25.0, 25.74], [27.0, 28.02], [28.0, 28.92], [29.0, 39.31], [40.0, 46.18], [48.0, 49.89], [50.0, 50.62], [52.0, 55.26], [58.0, 60.71], [62.0, 62.65], [67.0, 70.12], [76.0, 76.67], [80.0, 80.13], [83.0, 83.61], [85.0, 95.0], [96.0, 97.06], [100.0, 101.01], [103.0, 107.92], [108.0, 111.57], [113.0, 113.32], [115.0, 115.3], [117.0, 118.5], [119.0, 120.53], [123.0, 123.08], [124.0, 125.61], [127.0, 128.95]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 75.23, 80.64, 0.0, 0.0, 98.44, 65.32, 0.0, 100.0, 0.0, 0.0, 0.0, 72.16, 0.0, 0.0, 100.0, 99.76, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.91, 1.59, 0.83, -0.12, 1.09, 0.65, 0.74, 1.02, 0.92, 10.31, 6.18, 1.89, 0.62, 3.26, 2.71, 0.65, 3.12, 0.67, 0.13, 0.61, 10.0, 1.06, 1.01, 4.92, 3.57, 0.32, 0.3, 1.5, 1.53, 0.08, 1.61, 1.95]}
|
annotations_3/-J_tiDK1tEA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 13.96], [15.0, 15.3], [17.0, 17.69], [23.0, 23.55], [24.0, 25.24], [29.0, 43.02], [47.0, 46.94], [51.0, 51.53], [53.0, 53.92], [55.0, 54.78], [55.0, 54.92], [59.0, 60.49], [65.0, 65.04], [69.0, 69.97], [71.0, 71.95], [80.0, 81.5], [85.0, 85.8], [90.0, 91.18], [99.0, 99.06], [104.0, 104.5], [107.0, 108.36], [111.0, 111.89]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [70.3, 0.0, 0.0, 0.0, 0.0, 95.64, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [5.96, 0.3, 0.69, 0.55, 1.24, 14.02, -0.06, 0.53, 0.92, -0.22, -0.08, 1.49, 0.04, 0.97, 0.95, 1.5, 0.8, 1.18, 0.06, 0.5, 1.36, 0.89]}
|
annotations_3/-JbSkxI2DrY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[17.0, 18.03], [18.0, 18.1], [18.0, 19.09], [48.0, 48.42], [52.0, 53.38], [56.0, 56.96], [59.0, 59.91], [60.0, 61.86], [82.0, 82.59], [83.0, 88.16], [118.0, 118.99], [137.0, 140.41], [143.0, 148.85], [152.0, 153.25], [159.0, 159.76], [162.0, 162.13]], "keep_status": [false, false, false, false, false, false, false, false, false, true, false, false, true, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 34.6, 0.0, 30.31, 34.28, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["speech", 47.49], ["sidetone", 8.16], ["stomach rumble", 4.92]], null, [["music", 50.4], ["boing", 11.41], ["fart", 9.1]], [["music", 39.37], ["speech", 21.8], ["sidetone", 3.68]], null, null, null], "duration": [1.03, 0.1, 1.09, 0.42, 1.38, 0.96, 0.91, 1.86, 0.59, 5.16, 0.99, 3.41, 5.85, 1.25, 0.76, 0.13]}
|
annotations_3/-KOG8edoC00_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[23.0, 26.15], [29.0, 31.62], [33.0, 32.86], [35.0, 50.65], [57.0, 58.72], [60.0, 63.93], [66.0, 66.95], [67.0, 67.83], [68.0, 69.96], [70.0, 70.61], [72.0, 74.58], [75.0, 87.12], [88.0, 89.9], [94.0, 94.8], [98.0, 101.04], [102.0, 102.84], [111.0, 111.4], [115.0, 115.45], [116.0, 116.65], [118.0, 120.39]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [99.99, 100.0, 0.0, 99.21, 0.0, 56.03, 0.0, 0.0, 0.0, 0.0, 99.26, 68.54, 0.0, 0.0, 95.91, 0.0, 0.0, 0.0, 0.0, 31.48], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 34.13], ["foghorn", 13.24], ["musical instrument", 7.35]]], "duration": [3.15, 2.62, -0.14, 15.65, 1.72, 3.93, 0.95, 0.83, 1.96, 0.61, 2.58, 12.12, 1.9, 0.8, 3.04, 0.84, 0.4, 0.45, 0.65, 2.39]}
|
annotations_3/-L3D0BL9ieA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 22.15], [23.0, 24.81], [27.0, 33.07], [36.0, 36.37], [38.0, 40.42], [42.0, 46.63], [50.0, 64.93], [70.0, 70.21], [76.0, 94.61], [95.0, 95.2], [95.0, 120.33], [122.0, 122.59]], "keep_status": [false, false, true, false, false, true, true, false, true, false, true, false], "silence_prob": [30.99, 0.0, 46.54, 0.0, 51.88, 38.17, 29.6, 0.0, 30.21, 0.0, 29.99, 0.0], "audiomae_on_audioset": [[["speech", 58.47], ["hum", 8.76], ["music", 4.16]], null, [["music", 59.63], ["synthesizer", 5.1], ["electronic music", 3.78]], null, null, [["speech", 20.54], ["cattle, bovinae", 18.53], ["livestock, farm animals, working animals", 8.28]], [["speech", 30.74], ["music", 11.83], ["buzz", 8.28]], null, [["music", 30.74], ["animal", 17.42], ["speech", 7.46]], null, [["music", 34.72], ["whip", 10.58], ["whack, thwack", 10.3]], null], "duration": [15.15, 1.81, 6.07, 0.37, 2.42, 4.63, 14.93, 0.21, 18.61, 0.2, 25.33, 0.59]}
|
annotations_3/-L9EZRMgmXM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[42.0, 45.27], [54.0, 64.81], [68.0, 68.98], [73.0, 73.9], [74.0, 76.77], [80.0, 82.16], [83.0, 85.77], [89.0, 89.38], [91.0, 91.59], [94.0, 95.0], [97.0, 97.31], [100.0, 101.7], [104.0, 104.58], [107.0, 108.09], [110.0, 110.91], [112.0, 113.34], [117.0, 117.37], [119.0, 119.72]], "keep_status": [true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [42.58, 39.82, 0.0, 0.0, 69.07, 82.97, 66.76, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 35.7], ["speech", 26.83], ["throbbing", 5.76]], [["insect", 17.09], ["hum", 15.07], ["fly, housefly", 11.83]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [3.27, 10.81, 0.98, 0.9, 2.77, 2.16, 2.77, 0.38, 0.59, 1.0, 0.31, 1.7, 0.58, 1.09, 0.91, 1.34, 0.37, 0.72]}
|
annotations_3/-LjxKR0q7Yo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 19.89], [23.0, 29.73], [32.0, 38.91], [41.0, 42.5], [43.0, 59.48], [60.0, 63.26], [64.0, 67.22], [69.0, 83.34], [87.0, 91.47], [97.0, 109.29], [113.0, 114.61]], "keep_status": [false, true, true, false, true, true, false, true, true, true, false], "silence_prob": [0.0, 36.16, 31.91, 0.0, 34.94, 39.4, 30.5, 32.06, 44.57, 32.75, 0.0], "audiomae_on_audioset": [null, [["speech", 31.97], ["animal", 22.74], ["livestock, farm animals, working animals", 5.04]], [["speech", 34.84], ["fly, housefly", 11.78], ["insect", 8.35]], null, [["animal", 41.98], ["livestock, farm animals, working animals", 6.87], ["goose", 6.03]], [["speech", 25.44], ["music", 15.09], ["animal", 8.5]], [["speech", 61.0], ["music", 19.53], ["hum", 1.19]], [["speech", 45.54], ["mosquito", 5.83], ["explosion", 4.28]], [["mosquito", 29.76], ["whale vocalization", 17.4], ["fly, housefly", 16.54]], [["speech", 30.9], ["animal", 27.47], ["bow-wow", 3.91]], null], "duration": [0.89, 6.73, 6.91, 1.5, 16.48, 3.26, 3.22, 14.34, 4.47, 12.29, 1.61]}
|
annotations_3/-MEOfLvOuas_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 17.54], [19.0, 20.06], [21.0, 21.56], [22.0, 25.19], [30.0, 29.98], [33.0, 33.34], [37.0, 38.06], [42.0, 42.42], [47.0, 48.37], [50.0, 51.14], [52.0, 53.22], [67.0, 67.91], [69.0, 82.17], [87.0, 87.34], [90.0, 91.18]], "keep_status": [false, false, false, true, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 39.46, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.21, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["speech", 20.6], ["hum", 16.94], ["music", 10.94]], null, null, null, null, null, null, null, null, [["music", 47.94], ["theremin", 15.21], ["hum", 6.29]], null, null], "duration": [1.54, 1.06, 0.56, 3.19, -0.02, 0.34, 1.06, 0.42, 1.37, 1.14, 1.22, 0.91, 13.17, 0.34, 1.18]}
|
annotations_3/-MQNNzaEt2s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 16.77], [28.0, 28.26], [29.0, 30.54], [33.0, 33.91]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [0.77, 0.26, 1.54, 0.91]}
|
annotations_3/-Mmq6Kmd75I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.46], [19.0, 19.16], [21.0, 21.29], [24.0, 24.81], [27.0, 29.86], [41.0, 43.6], [44.0, 45.33], [47.0, 49.3], [51.0, 53.65], [56.0, 56.27], [66.0, 67.2], [69.0, 70.85], [73.0, 74.56], [76.0, 77.06]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 42.06, 62.78, 0.0, 95.23, 83.88, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["speech", 63.04], ["music", 8.18], ["hum", 2.86]], null, null, null, null, null, null, null, null, null], "duration": [1.46, 0.16, 0.29, 0.81, 2.86, 2.6, 1.33, 2.3, 2.65, 0.27, 1.2, 1.85, 1.56, 1.06]}
|
annotations_3/-NW-w5Z_vpk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.64], [12.0, 12.09], [13.0, 15.82], [19.0, 20.21], [22.0, 28.75], [31.0, 34.01], [35.0, 35.26], [36.0, 36.46], [40.0, 43.39], [45.0, 53.7], [55.0, 80.23], [83.0, 83.4], [88.0, 113.09], [119.0, 120.56], [121.0, 122.3]], "keep_status": [false, false, false, false, false, false, false, false, true, false, true, false, true, false, false], "silence_prob": [0.0, 0.0, 99.92, 0.0, 81.53, 99.96, 0.0, 0.0, 42.28, 50.11, 46.33, 0.0, 40.11, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["music", 24.14], ["hum", 23.5], ["throbbing", 15.63]], null, [["music", 44.83], ["synthesizer", 17.53], ["ambient music", 6.2]], null, [["speech", 26.87], ["music", 17.49], ["hum", 10.92]], null, null], "duration": [0.64, 0.09, 2.82, 1.21, 6.75, 3.01, 0.26, 0.46, 3.39, 8.7, 25.23, 0.4, 25.09, 1.56, 1.3]}
|
annotations_3/-NgmhVRFApQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.31], [5.0, 5.71], [10.0, 12.53], [14.0, 14.77], [16.0, 19.43], [21.0, 21.83], [22.0, 28.83], [31.0, 31.9], [33.0, 33.17], [33.0, 35.9], [36.0, 44.36], [46.0, 46.85], [48.0, 49.13], [52.0, 54.5], [56.0, 57.69], [59.0, 60.45], [62.0, 62.83], [69.0, 70.38], [71.0, 73.55], [75.0, 76.5], [81.0, 81.55], [84.0, 85.4], [86.0, 87.24]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 100.0, 0.0, 99.05, 0.0, 70.02, 0.0, 0.0, 99.68, 57.89, 0.0, 0.0, 99.71, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.31, 0.71, 2.53, 0.77, 3.43, 0.83, 6.83, 0.9, 0.17, 2.9, 8.36, 0.85, 1.13, 2.5, 1.69, 1.45, 0.83, 1.38, 2.55, 1.5, 0.55, 1.4, 1.24]}
|
annotations_3/-NtpPdMGluE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 1.98], [4.0, 5.85], [6.0, 7.08], [8.0, 8.63], [10.0, 17.52], [20.0, 31.73], [33.0, 34.79], [36.0, 59.04], [60.0, 62.6], [64.0, 64.74], [66.0, 66.56], [68.0, 76.98], [77.0, 86.95], [88.0, 91.39], [93.0, 95.61], [97.0, 114.03], [116.0, 119.45], [121.0, 135.67], [140.0, 142.15], [146.0, 147.72], [152.0, 152.54], [157.0, 158.03]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 71.43, 59.51, 0.0, 88.46, 98.27, 0.0, 0.0, 92.15, 99.1, 48.48, 51.6, 60.32, 67.51, 73.97, 60.51, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, [["radio", 21.87], ["sidetone", 21.61], ["speech", 17.95]], null, null, null, null, null, null, null, null], "duration": [-0.02, 1.85, 1.08, 0.63, 7.52, 11.73, 1.79, 23.04, 2.6, 0.74, 0.56, 8.98, 9.95, 3.39, 2.61, 17.03, 3.45, 14.67, 2.15, 1.72, 0.54, 1.03]}
|
annotations_3/-QB2gXiOAKc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.13], [11.0, 12.26], [13.0, 14.57], [17.0, 22.94], [29.0, 32.91], [35.0, 47.33]], "keep_status": [false, false, false, false, true, true], "silence_prob": [0.0, 0.0, 0.0, 59.59, 47.98, 45.3], "audiomae_on_audioset": [null, null, null, null, [["singing bowl", 58.17], ["music", 4.91], ["tuning fork", 4.51]], [["hum", 25.52], ["speech", 16.82], ["didgeridoo", 13.0]]], "duration": [1.13, 1.26, 1.57, 5.94, 3.91, 12.33]}
|
annotations_3/-RBjiJto4hc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 6.34], [19.0, 20.38], [36.0, 41.17], [42.0, 42.63], [43.0, 42.67], [43.0, 43.65], [44.0, 44.31], [44.0, 44.34], [44.0, 45.1], [49.0, 49.45], [50.0, 52.78], [54.0, 55.65], [56.0, 61.65], [63.0, 66.11], [68.0, 80.84]], "keep_status": [true, false, true, false, false, false, false, false, false, false, false, false, false, true, true], "silence_prob": [30.34, 0.0, 30.63, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64.75, 0.0, 84.98, 46.64, 35.07], "audiomae_on_audioset": [[["vehicle", 17.86], ["moo", 14.13], ["speech", 10.38]], null, [["foghorn", 34.48], ["speech", 20.86], ["music", 12.71]], null, null, null, null, null, null, null, null, null, null, [["music", 40.1], ["ambient music", 7.64], ["foghorn", 6.24]], [["music", 53.31], ["theremin", 10.71], ["musical instrument", 5.24]]], "duration": [3.34, 1.38, 5.17, 0.63, -0.33, 0.65, 0.31, 0.34, 1.1, 0.45, 2.78, 1.65, 5.65, 3.11, 12.84]}
|
annotations_3/-RJ6USD2nEU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 7.91], [8.0, 15.75], [30.0, 31.4], [33.0, 33.1], [34.0, 34.64], [38.0, 38.38], [43.0, 43.6], [45.0, 45.54], [56.0, 63.9], [76.0, 80.3], [83.0, 83.96], [84.0, 89.95], [90.0, 90.44], [90.0, 91.13], [93.0, 100.65], [102.0, 103.64], [107.0, 109.39], [115.0, 120.77], [127.0, 127.68], [131.0, 133.57], [137.0, 138.64], [139.0, 139.29], [149.0, 149.86], [152.0, 154.84]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, true, true, false, false, false, false, false, false], "silence_prob": [0.0, 51.66, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 60.79, 62.37, 0.0, 33.04, 0.0, 0.0, 47.54, 0.0, 45.69, 46.05, 0.0, 46.61, 0.0, 0.0, 0.0, 58.81], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["music", 39.21], ["animal", 10.42], ["breaking", 5.16]], null, null, [["fly, housefly", 38.31], ["bee, wasp, etc.", 35.73], ["insect", 14.6]], null, [["music", 49.28], ["throbbing", 5.06], ["hum", 3.83]], [["fly, housefly", 17.39], ["noise", 15.69], ["hum", 7.02]], null, [["hum", 43.42], ["mains hum", 18.79], ["music", 9.52]], null, null, null, null], "duration": [-0.09, 7.75, 1.4, 0.1, 0.64, 0.38, 0.6, 0.54, 7.9, 4.3, 0.96, 5.95, 0.44, 1.13, 7.65, 1.64, 2.39, 5.77, 0.68, 2.57, 1.64, 0.29, 0.86, 2.84]}
|
annotations_3/-RhmDUj_GJs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[28.0, 29.02], [31.0, 32.09], [36.0, 37.83], [51.0, 52.07], [54.0, 55.29], [58.0, 60.89], [77.0, 77.67], [80.0, 80.86], [88.0, 89.55]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 52.62, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null], "duration": [1.02, 1.09, 1.83, 1.07, 1.29, 2.89, 0.67, 0.86, 1.55]}
|
annotations_3/-SKfkvvtqN0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 21.68], [35.0, 35.43], [39.0, 39.92], [42.0, 42.45], [47.0, 49.1], [49.0, 49.15], [49.0, 49.92], [55.0, 57.03], [70.0, 70.28], [74.0, 74.6], [76.0, 76.52], [81.0, 104.11], [106.0, 105.8], [108.0, 108.43], [135.0, 135.43], [145.0, 145.49], [147.0, 149.57], [152.0, 154.5], [161.0, 161.89], [163.0, 163.65]], "keep_status": [true, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false, true, true, false, false], "silence_prob": [32.24, 0.0, 0.0, 0.0, 34.15, 0.0, 0.0, 35.0, 0.0, 0.0, 0.0, 37.07, 0.0, 0.0, 0.0, 0.0, 35.74, 37.56, 0.0, 0.0], "audiomae_on_audioset": [[["music", 34.06], ["brass instrument", 17.19], ["saxophone", 9.1]], null, null, null, [["mosquito", 35.35], ["fly, housefly", 31.42], ["insect", 13.07]], null, null, [["hum", 36.85], ["throbbing", 19.46], ["music", 12.17]], null, null, null, [["music", 53.1], ["buzz", 12.2], ["bleat", 3.12]], null, null, null, null, [["music", 25.42], ["carnatic music", 24.13], ["speech", 7.78]], [["music", 42.27], ["chant", 6.75], ["musical instrument", 4.14]], null, null], "duration": [15.68, 0.43, 0.92, 0.45, 2.1, 0.15, 0.92, 2.03, 0.28, 0.6, 0.52, 23.11, -0.2, 0.43, 0.43, 0.49, 2.57, 2.5, 0.89, 0.65]}
|
annotations_3/-T16rxR-nCo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.39], [15.0, 15.09], [17.0, 17.2], [19.0, 20.14], [27.0, 26.97], [34.0, 35.04], [40.0, 42.89], [46.0, 46.11], [49.0, 49.94], [53.0, 53.5], [56.0, 56.83], [59.0, 59.95], [61.0, 63.02], [69.0, 70.11], [76.0, 76.42], [78.0, 78.61], [86.0, 86.26]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 66.27, 0.0, 0.0, 0.0, 0.0, 0.0, 73.82, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.39, 0.09, 0.2, 1.14, -0.03, 1.04, 2.89, 0.11, 0.94, 0.5, 0.83, 0.95, 2.02, 1.11, 0.42, 0.61, 0.26]}
|
annotations_3/-TPRG6Yqzf4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.67], [3.0, 9.21], [16.0, 19.08], [20.0, 21.47], [23.0, 23.75], [36.0, 37.0], [40.0, 40.76], [44.0, 45.01], [47.0, 48.54], [50.0, 53.23], [54.0, 67.8], [73.0, 83.81], [84.0, 83.84], [85.0, 96.15]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, true, false, false], "silence_prob": [0.0, 87.19, 97.43, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 46.02, 35.46, 0.0, 34.03], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["speech", 37.04], ["music", 23.48], ["hum", 4.3]], [["music", 43.05], ["musical instrument", 14.6], ["speech", 6.81]], null, [["livestock, farm animals, working animals", 43.97], ["cattle, bovinae", 38.13], ["moo", 17.4]]], "duration": [0.67, 6.21, 3.08, 1.47, 0.75, 1.0, 0.76, 1.01, 1.54, 3.23, 13.8, 10.81, -0.16, 11.15]}
|
annotations_3/-TWsZukTS4Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_3/-TogGxzlfhM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.5], [14.0, 15.04], [16.0, 16.5], [19.0, 18.99], [27.0, 27.19], [30.0, 30.23], [32.0, 32.88], [35.0, 35.33], [37.0, 37.3], [42.0, 43.48], [53.0, 53.74], [78.0, 78.38], [109.0, 109.7], [111.0, 111.4], [118.0, 117.78]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.5, 1.04, 0.5, -0.01, 0.19, 0.23, 0.88, 0.33, 0.3, 1.48, 0.74, 0.38, 0.7, 0.4, -0.22]}
|
annotations_3/-UAElWXbk3I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 14.39], [16.0, 33.0], [35.0, 34.94], [37.0, 37.44], [39.0, 38.97], [40.0, 41.45], [44.0, 47.7], [48.0, 49.54], [50.0, 50.79], [58.0, 59.21], [61.0, 61.28], [65.0, 66.12], [67.0, 67.59], [69.0, 74.24], [76.0, 84.28], [87.0, 86.9], [90.0, 90.42], [93.0, 98.51], [99.0, 101.39], [102.0, 107.89], [109.0, 112.82], [114.0, 115.13], [116.0, 116.72], [118.0, 118.89], [121.0, 121.59], [124.0, 123.99], [125.0, 125.71], [126.0, 126.35], [128.0, 127.6], [128.0, 127.75], [129.0, 128.88], [131.0, 131.08], [139.0, 139.97], [142.0, 142.87], [143.0, 143.48], [147.0, 146.99], [148.0, 149.54], [152.0, 152.46], [161.0, 160.74], [166.0, 166.28], [170.0, 170.21], [172.0, 173.58], [175.0, 176.54], [177.0, 178.8]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [31.88, 29.83, 0.0, 0.0, 0.0, 0.0, 69.74, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 52.86, 31.21, 0.0, 0.0, 62.58, 74.76, 75.07, 71.29, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 55.08], ["speech", 14.77], ["crack", 4.28]], [["music", 69.76], ["speech", 11.07], ["sampler", 1.87]], null, null, null, null, null, null, null, null, null, null, null, null, [["music", 59.63], ["speech", 6.21], ["livestock, farm animals, working animals", 4.36]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [3.39, 17.0, -0.06, 0.44, -0.03, 1.45, 3.7, 1.54, 0.79, 1.21, 0.28, 1.12, 0.59, 5.24, 8.28, -0.1, 0.42, 5.51, 2.39, 5.89, 3.82, 1.13, 0.72, 0.89, 0.59, -0.01, 0.71, 0.35, -0.4, -0.25, -0.12, 0.08, 0.97, 0.87, 0.48, -0.01, 1.54, 0.46, -0.26, 0.28, 0.21, 1.58, 1.54, 1.8]}
|
annotations_3/-UAV4O9oZy0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[27.0, 33.24], [37.0, 37.1]], "keep_status": [true, false], "silence_prob": [34.61, 0.0], "audiomae_on_audioset": [[["music", 34.97], ["guitar", 16.89], ["effects unit", 6.65]], null], "duration": [6.24, 0.1]}
|
annotations_3/-UJ9K8lMxPA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 11.99], [15.0, 15.82], [17.0, 22.67], [25.0, 25.93], [29.0, 123.5], [124.0, 128.92], [132.0, 170.87], [171.0, 172.81]], "keep_status": [false, false, true, false, false, true, false, false], "silence_prob": [0.0, 0.0, 32.65, 0.0, 0.0, 30.62, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["music", 46.87], ["effects unit", 8.81], ["synthesizer", 6.87]], null, null, [["music", 47.12], ["speech", 10.17], ["cacophony", 3.97]], null, null], "duration": [1.99, 0.82, 5.67, 0.93, 94.5, 4.92, 38.87, 1.81]}
|