Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/b-2p52a82UM_filtered.json +1 -0
- annotations_filtered/b-QlCUByMcE_filtered.json +1 -0
- annotations_filtered/b-_C0lWgga0_filtered.json +1 -0
- annotations_filtered/b-f5iMDXvcA_filtered.json +1 -0
- annotations_filtered/b-w1bY8qhnc_filtered.json +1 -0
- annotations_filtered/b02H0dW2xf8_filtered.json +1 -0
- annotations_filtered/b0KSEziycmw_filtered.json +1 -0
- annotations_filtered/b0SfZ4LMV98_filtered.json +1 -0
- annotations_filtered/b0p7_jQ8HiE_filtered.json +1 -0
- annotations_filtered/b0xYU8jHaH4_filtered.json +1 -0
- annotations_filtered/b10LyOeq5Hs_filtered.json +1 -0
- annotations_filtered/b1MxW8nf_lU_filtered.json +1 -0
- annotations_filtered/b1Qxbu777zo_filtered.json +1 -0
- annotations_filtered/b1eMAFWXZ4Q_filtered.json +1 -0
- annotations_filtered/b1jqSRnqLMw_filtered.json +1 -0
- annotations_filtered/b1vFQilhgrY_filtered.json +1 -0
- annotations_filtered/b2MEP246DxY_filtered.json +1 -0
- annotations_filtered/b2P-oU216V4_filtered.json +1 -0
- annotations_filtered/b2WuWXRVdfk_filtered.json +1 -0
- annotations_filtered/b2f2Kqt_KcE_filtered.json +1 -0
- annotations_filtered/b2gz0vSh0J4_filtered.json +1 -0
- annotations_filtered/b2hhdMiOTOE_filtered.json +1 -0
- annotations_filtered/b2zQmmYEDY4_filtered.json +1 -0
- annotations_filtered/b3Aq5Vc0Ics_filtered.json +1 -0
- annotations_filtered/b3EWsHg08x4_filtered.json +1 -0
- annotations_filtered/b3OlGLDk4pY_filtered.json +1 -0
- annotations_filtered/b3lLWO2d7b0_filtered.json +1 -0
- annotations_filtered/b3lOpSXhT0c_filtered.json +1 -0
- annotations_filtered/b4kKWa_hjCk_filtered.json +1 -0
- annotations_filtered/b4kRHpvisxE_filtered.json +1 -0
- annotations_filtered/b4vpGhO2LwA_filtered.json +1 -0
- annotations_filtered/b56RExAdg7s_filtered.json +1 -0
- annotations_filtered/b5I94bT23cQ_filtered.json +1 -0
- annotations_filtered/b5Q6A_1YyHg_filtered.json +1 -0
- annotations_filtered/b60DLSEemEY_filtered.json +1 -0
- annotations_filtered/b65C_muXajk_filtered.json +1 -0
- annotations_filtered/b6X5bVMoCJc_filtered.json +1 -0
- annotations_filtered/b6vOp7_rI6Q_filtered.json +1 -0
- annotations_filtered/b6xbga06ApQ_filtered.json +1 -0
- annotations_filtered/b74611maYgQ_filtered.json +1 -0
- annotations_filtered/b7AjNXAF-7Y_filtered.json +1 -0
- annotations_filtered/b7C69HqnV8s_filtered.json +1 -0
- annotations_filtered/b7Dxy34dFyY_filtered.json +1 -0
- annotations_filtered/b7lV6-iKiwQ_filtered.json +1 -0
- annotations_filtered/b7wurDomuVs_filtered.json +1 -0
- annotations_filtered/b8Dv782UIb4_filtered.json +1 -0
- annotations_filtered/b8U1na74Bcc_filtered.json +1 -0
- annotations_filtered/b8oFKKPfgi0_filtered.json +1 -0
- annotations_filtered/b8t5kX7k0vQ_filtered.json +1 -0
- annotations_filtered/b95SzqTrjRo_filtered.json +1 -0
annotations_filtered/b-2p52a82UM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 17.81], [18.0, 18.28], [19.0, 19.9], [30.0, 30.75], [31.0, 31.4], [31.0, 37.13], [49.0, 50.77], [52.0, 53.96], [56.0, 62.34], [63.0, 79.57], [83.0, 83.4], [87.0, 87.79], [125.0, 127.9], [129.0, 130.1], [138.0, 138.96], [141.0, 141.17], [145.0, 146.79], [150.0, 151.83], [154.0, 156.59], [158.0, 163.19]], "keep_status": [false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [31.0, 0.0, 0.0, 0.0, 0.0, 29.45, 0.0, 0.0, 62.37, 48.91, 0.0, 0.0, 91.47, 0.0, 0.0, 0.0, 0.0, 0.0, 55.53, 45.05], "audiomae_on_audioset": [[["music", 77.25], ["speech", 4.17], ["thunk", 1.69]], null, null, null, null, [["music", 23.79], ["sidetone", 12.86], ["speech", 10.59]], null, null, null, [["hum", 42.64], ["mains hum", 27.66], ["music", 9.86]], null, null, null, null, null, null, null, null, null, [["speech", 36.16], ["music", 13.68], ["animal", 5.74]]], "duration": [16.81, 0.28, 0.9, 0.75, 0.4, 6.13, 1.77, 1.96, 6.34, 16.57, 0.4, 0.79, 2.9, 1.1, 0.96, 0.17, 1.79, 1.83, 2.59, 5.19]}
|
annotations_filtered/b-QlCUByMcE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 12.6], [15.0, 14.94], [17.0, 18.32], [21.0, 22.72], [29.0, 30.59], [32.0, 32.36], [33.0, 34.16], [42.0, 43.87], [47.0, 48.25], [49.0, 51.8], [56.0, 56.78], [60.0, 62.97], [68.0, 70.38], [70.0, 70.92], [76.0, 76.64], [81.0, 81.65], [87.0, 87.84], [94.0, 96.2], [98.0, 99.69], [100.0, 101.82], [102.0, 103.81], [108.0, 110.46], [111.0, 110.69], [114.0, 119.18]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.99, 0.0, 78.55, 62.27, 0.0, 0.0, 0.0, 0.0, 99.97, 0.0, 0.0, 0.0, 99.76, 0.0, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [6.6, -0.06, 1.32, 1.72, 1.59, 0.36, 1.16, 1.87, 1.25, 2.8, 0.78, 2.97, 2.38, 0.92, 0.64, 0.65, 0.84, 2.2, 1.69, 1.82, 1.81, 2.46, -0.31, 5.18]}
|
annotations_filtered/b-_C0lWgga0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 47.11], [51.0, 52.44], [56.0, 58.04]], "keep_status": [false, false, true], "silence_prob": [0.0, 0.0, 30.43], "audiomae_on_audioset": [null, null, [["music", 61.13], ["musical instrument", 3.04], ["bass guitar", 2.72]]], "duration": [42.11, 1.44, 2.04]}
|
annotations_filtered/b-f5iMDXvcA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 74.28]], "keep_status": [false], "silence_prob": [0.0], "audiomae_on_audioset": [null], "duration": [66.28]}
|
annotations_filtered/b-w1bY8qhnc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 28.49], [29.0, 28.54], [32.0, 33.79], [35.0, 37.3], [40.0, 42.57], [44.0, 45.32], [47.0, 50.06], [54.0, 56.08], [58.0, 57.67], [58.0, 60.93], [62.0, 64.15], [66.0, 69.67], [71.0, 71.09], [71.0, 71.79], [79.0, 80.27], [81.0, 82.44], [84.0, 84.69], [85.0, 87.24], [87.0, 87.27], [88.0, 89.77], [90.0, 90.56], [99.0, 98.63], [100.0, 100.48], [106.0, 112.01], [114.0, 114.15], [125.0, 129.78], [138.0, 141.4], [141.0, 149.82], [151.0, 157.05], [158.0, 159.9], [161.0, 162.35], [162.0, 167.86], [168.0, 170.24], [171.0, 173.33]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true, true, false], "silence_prob": [55.04, 0.0, 0.0, 91.47, 82.07, 0.0, 88.64, 84.8, 0.0, 52.74, 44.69, 53.22, 0.0, 0.0, 0.0, 0.0, 0.0, 65.67, 0.0, 0.0, 0.0, 0.0, 0.0, 32.46, 0.0, 33.19, 46.9, 39.5, 36.11, 0.0, 0.0, 42.44, 41.5, 58.3], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["music", 41.12], ["speech", 12.58], ["hum", 7.46]], null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 54.63], ["music", 31.33], ["synthetic singing", 1.27]], null, [["speech", 42.12], ["music", 21.83], ["sidetone", 5.78]], [["music", 17.62], ["speech", 8.85], ["animal", 4.75]], [["music", 30.68], ["singing bowl", 10.17], ["synthesizer", 8.13]], [["speech", 58.06], ["music", 22.48], ["synthesizer", 3.44]], null, null, [["music", 36.11], ["speech", 22.14], ["mains hum", 4.69]], [["speech", 40.2], ["music", 23.74], ["musical instrument", 5.86]], null], "duration": [13.49, -0.46, 1.79, 2.3, 2.57, 1.32, 3.06, 2.08, -0.33, 2.93, 2.15, 3.67, 0.09, 0.79, 1.27, 1.44, 0.69, 2.24, 0.27, 1.77, 0.56, -0.37, 0.48, 6.01, 0.15, 4.78, 3.4, 8.82, 6.05, 1.9, 1.35, 5.86, 2.24, 2.33]}
|
annotations_filtered/b02H0dW2xf8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 11.57], [25.0, 25.57]], "keep_status": [true, false], "silence_prob": [31.25, 0.0], "audiomae_on_audioset": [[["speech", 31.63], ["fart", 20.99], ["music", 5.61]], null], "duration": [3.57, 0.57]}
|
annotations_filtered/b0KSEziycmw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 6.1], [10.0, 10.59], [18.0, 18.87], [22.0, 23.43], [27.0, 27.18], [28.0, 28.34], [35.0, 36.56], [38.0, 38.38], [40.0, 40.56], [42.0, 42.94], [49.0, 49.81], [51.0, 52.08], [53.0, 53.87], [55.0, 55.73], [57.0, 58.18], [59.0, 59.8], [67.0, 67.71], [69.0, 70.7], [71.0, 71.98], [74.0, 75.24], [76.0, 77.31], [78.0, 78.31], [80.0, 80.27], [82.0, 82.76], [84.0, 84.45], [86.0, 86.17], [87.0, 87.32], [92.0, 93.26], [95.0, 101.76], [102.0, 104.45], [112.0, 113.27], [116.0, 117.98], [122.0, 123.84], [126.0, 128.36], [129.0, 131.55], [134.0, 135.55], [139.0, 140.71], [142.0, 144.46], [145.0, 152.59]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, true, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.32, 42.96, 0.0, 0.0, 0.0, 38.02, 41.2, 0.0, 0.0, 55.53, 29.72], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["fly, housefly", 22.54], ["speech", 18.4], ["fart", 14.78]], [["speech", 36.55], ["music", 28.91], ["echo", 3.09]], null, null, null, [["sine wave", 43.99], ["speech", 12.33], ["chirp tone", 9.39]], [["insect", 20.62], ["fly, housefly", 19.44], ["speech", 10.63]], null, null, null, [["music", 61.0], ["speech", 22.54], ["explosion", 2.26]]], "duration": [1.1, 0.59, 0.87, 1.43, 0.18, 0.34, 1.56, 0.38, 0.56, 0.94, 0.81, 1.08, 0.87, 0.73, 1.18, 0.8, 0.71, 1.7, 0.98, 1.24, 1.31, 0.31, 0.27, 0.76, 0.45, 0.17, 0.32, 1.26, 6.76, 2.45, 1.27, 1.98, 1.84, 2.36, 2.55, 1.55, 1.71, 2.46, 7.59]}
|
annotations_filtered/b0SfZ4LMV98_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 11.74], [13.0, 13.61], [15.0, 20.76], [23.0, 23.33], [25.0, 25.79], [27.0, 26.86], [48.0, 48.56], [59.0, 60.22], [61.0, 61.74], [63.0, 64.05]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null], "duration": [1.74, 0.61, 5.76, 0.33, 0.79, -0.14, 0.56, 1.22, 0.74, 1.05]}
|
annotations_filtered/b0p7_jQ8HiE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 7.89], [17.0, 19.16], [24.0, 24.39], [27.0, 29.66], [38.0, 38.97], [43.0, 44.25], [49.0, 49.35], [52.0, 61.84], [63.0, 64.34], [67.0, 68.03], [70.0, 71.66], [74.0, 75.73], [79.0, 84.45], [85.0, 90.36]], "keep_status": [true, true, false, true, false, false, false, false, false, false, false, false, true, false], "silence_prob": [43.18, 41.1, 0.0, 35.82, 0.0, 0.0, 0.0, 32.84, 0.0, 0.0, 0.0, 0.0, 37.01, 33.97], "audiomae_on_audioset": [[["music", 46.53], ["musical instrument", 7.08], ["echo", 4.34]], [["gong", 16.8], ["music", 16.11], ["singing bowl", 11.65]], null, [["music", 46.7], ["didgeridoo", 7.44], ["mantra", 6.44]], null, null, null, [["music", 60.66], ["theremin", 8.51], ["synthesizer", 7.73]], null, null, null, null, [["music", 59.51], ["effects unit", 6.27], ["didgeridoo", 4.15]], [["music", 63.54], ["didgeridoo", 12.0], ["musical instrument", 3.78]]], "duration": [5.89, 2.16, 0.39, 2.66, 0.97, 1.25, 0.35, 9.84, 1.34, 1.03, 1.66, 1.73, 5.45, 5.36]}
|
annotations_filtered/b0xYU8jHaH4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.41], [5.0, 5.71], [7.0, 7.74], [8.0, 9.05], [11.0, 13.37], [16.0, 17.68], [23.0, 23.14], [24.0, 25.03], [28.0, 28.29], [30.0, 31.02], [37.0, 38.15], [41.0, 43.92], [47.0, 47.55], [50.0, 50.25], [57.0, 56.86], [57.0, 57.2], [57.0, 57.3], [57.0, 57.48], [58.0, 58.7], [70.0, 71.05], [74.0, 74.48]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 78.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 44.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["speech", 41.87], ["music", 7.53], ["synthesizer", 4.96]], null, null, null, null, null, null, null, null, null], "duration": [0.41, 0.71, 0.74, 1.05, 2.37, 1.68, 0.14, 1.03, 0.29, 1.02, 1.15, 2.92, 0.55, 0.25, -0.14, 0.2, 0.3, 0.48, 0.7, 1.05, 0.48]}
|
annotations_filtered/b10LyOeq5Hs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 5.14], [5.0, 6.25], [9.0, 12.34], [14.0, 15.48], [21.0, 23.25], [26.0, 31.04], [33.0, 40.24], [42.0, 49.72], [51.0, 51.71], [53.0, 54.45], [55.0, 57.94], [60.0, 61.18], [62.0, 64.25], [65.0, 66.7], [68.0, 70.07], [77.0, 78.58], [81.0, 84.91], [86.0, 92.87], [94.0, 95.81], [98.0, 98.68], [103.0, 104.21], [105.0, 107.81], [109.0, 110.84], [113.0, 114.83], [118.0, 135.35], [136.0, 145.32], [146.0, 152.83]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.97, 0.0, 99.93, 0.0, 98.36, 100.0, 100.0, 100.0, 0.0, 0.0, 62.89, 0.0, 67.13, 0.0, 99.52, 0.0, 99.99, 98.8, 0.0, 0.0, 0.0, 87.19, 0.0, 0.0, 96.54, 98.73, 97.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.14, 1.25, 3.34, 1.48, 2.25, 5.04, 7.24, 7.72, 0.71, 1.45, 2.94, 1.18, 2.25, 1.7, 2.07, 1.58, 3.91, 6.87, 1.81, 0.68, 1.21, 2.81, 1.84, 1.83, 17.35, 9.32, 6.83]}
|
annotations_filtered/b1MxW8nf_lU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[24.0, 41.5], [42.0, 50.4], [55.0, 73.16], [76.0, 76.92], [78.0, 78.97], [84.0, 84.48]], "keep_status": [true, true, true, false, false, false], "silence_prob": [32.28, 32.08, 33.73, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 60.41], ["crowd", 4.75], ["throbbing", 4.61]], [["music", 47.17], ["throbbing", 10.45], ["speech", 4.94]], [["speech", 35.09], ["livestock, farm animals, working animals", 13.32], ["cattle, bovinae", 10.74]], null, null, null], "duration": [17.5, 8.4, 18.16, 0.92, 0.97, 0.48]}
|
annotations_filtered/b1Qxbu777zo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.29], [8.0, 8.45], [11.0, 12.72], [19.0, 19.48], [20.0, 20.56], [23.0, 23.48], [31.0, 32.34], [36.0, 37.05], [38.0, 39.24], [40.0, 41.57], [43.0, 42.9], [48.0, 50.6], [51.0, 51.07], [52.0, 53.08], [54.0, 54.3], [57.0, 59.83], [61.0, 62.94]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 41.52, 0.0, 0.0, 0.0, 39.49, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["speech", 19.48], ["sidetone", 19.03], ["rumble", 12.31]], null, null, null, [["music", 33.64], ["speech", 21.25], ["hum", 6.69]], null], "duration": [0.29, 0.45, 1.72, 0.48, 0.56, 0.48, 1.34, 1.05, 1.24, 1.57, -0.1, 2.6, 0.07, 1.08, 0.3, 2.83, 1.94]}
|
annotations_filtered/b1eMAFWXZ4Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 23.62], [32.0, 34.08], [38.0, 37.76], [42.0, 44.25], [56.0, 56.62], [58.0, 65.15], [66.0, 66.02], [69.0, 69.58], [74.0, 73.9], [80.0, 80.57], [96.0, 96.99], [104.0, 104.13], [106.0, 109.32], [113.0, 114.57], [116.0, 117.91]], "keep_status": [false, true, false, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [28.77, 45.08, 0.0, 34.45, 0.0, 54.36, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 80.29, 0.0, 0.0], "audiomae_on_audioset": [[["music", 33.4], ["throbbing", 20.38], ["didgeridoo", 20.31]], [["music", 20.99], ["hum", 13.28], ["throbbing", 9.11]], null, [["creak", 35.86], ["mains hum", 12.46], ["hum", 12.26]], null, null, null, null, null, null, null, null, null, null, null], "duration": [8.62, 2.08, -0.24, 2.25, 0.62, 7.15, 0.02, 0.58, -0.1, 0.57, 0.99, 0.13, 3.32, 1.57, 1.91]}
|
annotations_filtered/b1jqSRnqLMw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[21.0, 45.6], [47.0, 101.44], [104.0, 104.18], [106.0, 105.71], [112.0, 112.72], [119.0, 121.42], [122.0, 123.43], [130.0, 131.6], [137.0, 149.84]], "keep_status": [false, false, false, false, false, false, false, false, true], "silence_prob": [98.51, 0.0, 0.0, 0.0, 0.0, 45.21, 0.0, 0.0, 43.3], "audiomae_on_audioset": [null, null, null, null, null, [["music", 57.61], ["carnatic music", 17.95], ["musical instrument", 11.06]], null, null, [["music", 39.38], ["hum", 13.41], ["synthesizer", 9.73]]], "duration": [24.6, 54.44, 0.18, -0.29, 0.72, 2.42, 1.43, 1.6, 12.84]}
|
annotations_filtered/b1vFQilhgrY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 0.9], [1.0, 0.94], [1.0, 6.22], [8.0, 8.46], [13.0, 13.93], [16.0, 16.46], [28.0, 28.48], [29.0, 30.15], [34.0, 34.52], [35.0, 39.83], [42.0, 42.62]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 32.85, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.72, 0.0], "audiomae_on_audioset": [null, null, [["speech", 40.1], ["sidetone", 13.42], ["dishes, pots, and pans", 12.51]], null, null, null, null, null, null, [["speech", 70.8], ["radio", 3.09], ["mains hum", 3.02]], null], "duration": [-0.1, -0.06, 5.22, 0.46, 0.93, 0.46, 0.48, 1.15, 0.52, 4.83, 0.62]}
|
annotations_filtered/b2MEP246DxY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[14.0, 24.16], [27.0, 70.68], [79.0, 79.07], [80.0, 94.64], [98.0, 113.95]], "keep_status": [false, false, false, false, false], "silence_prob": [28.63, 0.0, 0.0, 30.4, 29.34], "audiomae_on_audioset": [[["speech", 44.9], ["music", 26.54], ["outside, rural or natural", 4.2]], null, null, [["speech", 48.08], ["brass instrument", 13.46], ["trombone", 11.75]], [["music", 45.46], ["speech", 20.87], ["electronic music", 7.7]]], "duration": [10.16, 43.68, 0.07, 14.64, 15.95]}
|
annotations_filtered/b2P-oU216V4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 10.79], [16.0, 26.43], [27.0, 29.71], [35.0, 42.36], [45.0, 45.33], [47.0, 55.53], [65.0, 67.96], [73.0, 78.12], [81.0, 88.72], [110.0, 111.72]], "keep_status": [false, true, false, false, false, false, true, false, false, false], "silence_prob": [32.09, 34.04, 54.83, 33.39, 0.0, 32.77, 34.57, 31.79, 32.01, 0.0], "audiomae_on_audioset": [[["speech", 43.85], ["music", 25.81], ["sidetone", 2.33]], [["music", 52.1], ["speech", 11.51], ["whack, thwack", 5.09]], null, [["music", 44.01], ["speech", 22.52], ["throbbing", 5.12]], null, [["cattle, bovinae", 37.54], ["livestock, farm animals, working animals", 35.4], ["moo", 23.38]], [["music", 43.32], ["speech", 18.38], ["moo", 6.35]], [["music", 82.11], ["theremin", 4.17], ["synthesizer", 3.43]], [["music", 72.52], ["throbbing", 4.7], ["musical instrument", 3.23]], null], "duration": [3.79, 10.43, 2.71, 7.36, 0.33, 8.53, 2.96, 5.12, 7.72, 1.72]}
|
annotations_filtered/b2WuWXRVdfk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 12.26], [16.0, 48.46], [51.0, 52.56], [57.0, 98.56]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [0.26, 32.46, 1.56, 41.56]}
|
annotations_filtered/b2f2Kqt_KcE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 8.09], [13.0, 13.54], [15.0, 15.23], [15.0, 16.48], [17.0, 18.03], [22.0, 24.97], [33.0, 34.8], [37.0, 37.02], [38.0, 40.73], [44.0, 46.58], [50.0, 51.27], [52.0, 54.16], [56.0, 57.6], [58.0, 60.74], [61.0, 62.41], [67.0, 66.95], [71.0, 71.73], [73.0, 82.0], [84.0, 83.81], [92.0, 92.8], [104.0, 105.27], [107.0, 108.8], [111.0, 112.04]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 99.1, 100.0, 0.0, 98.73, 0.0, 97.83, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.09, 0.54, 0.23, 1.48, 1.03, 2.97, 1.8, 0.02, 2.73, 2.58, 1.27, 2.16, 1.6, 2.74, 1.41, -0.05, 0.73, 9.0, -0.19, 0.8, 1.27, 1.8, 1.04]}
|
annotations_filtered/b2gz0vSh0J4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 8.88], [11.0, 25.54], [26.0, 25.93], [26.0, 28.0], [30.0, 31.83], [32.0, 33.29], [34.0, 35.55], [37.0, 39.36], [40.0, 42.38], [43.0, 49.87], [52.0, 58.67]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.98, 99.99, 0.0, 99.99, 0.0, 0.0, 0.0, 99.98, 99.94, 99.99, 66.88], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [6.88, 14.54, -0.07, 2.0, 1.83, 1.29, 1.55, 2.36, 2.38, 6.87, 6.67]}
|
annotations_filtered/b2hhdMiOTOE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[59.0, 58.6], [59.0, 96.97], [100.0, 126.98], [128.0, 138.67]], "keep_status": [false, false, true, true], "silence_prob": [0.0, 0.0, 31.65, 34.72], "audiomae_on_audioset": [null, null, [["music", 18.87], ["hum", 6.79], ["noise", 6.63]], [["music", 38.42], ["effects unit", 14.55], ["guitar", 7.35]]], "duration": [-0.4, 37.97, 26.98, 10.67]}
|
annotations_filtered/b2zQmmYEDY4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.43], [10.0, 12.63], [15.0, 16.02], [17.0, 17.9], [21.0, 21.07], [39.0, 39.34], [48.0, 54.73], [57.0, 64.94], [67.0, 67.22], [71.0, 101.29], [104.0, 116.33], [119.0, 120.78]], "keep_status": [false, true, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 35.61, 0.0, 0.0, 0.0, 0.0, 30.95, 31.38, 0.0, 0.0, 30.32, 0.0], "audiomae_on_audioset": [null, [["music", 45.32], ["theremin", 9.4], ["didgeridoo", 3.36]], null, null, null, null, [["music", 36.56], ["speech", 32.39], ["theremin", 5.76]], [["music", 59.35], ["didgeridoo", 7.97], ["musical instrument", 2.27]], null, null, [["music", 47.92], ["hum", 13.69], ["speech", 9.75]], null], "duration": [1.43, 2.63, 1.02, 0.9, 0.07, 0.34, 6.73, 7.94, 0.22, 30.29, 12.33, 1.78]}
|
annotations_filtered/b3Aq5Vc0Ics_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.22], [9.0, 11.45], [12.0, 15.8], [26.0, 33.74], [35.0, 37.99], [41.0, 43.38], [48.0, 49.76], [50.0, 51.81], [55.0, 58.18], [59.0, 60.86], [62.0, 63.53], [73.0, 73.85], [76.0, 80.74], [82.0, 83.22], [85.0, 87.18], [89.0, 91.93]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 96.29, 77.2, 99.94, 99.97, 92.15, 0.0, 0.0, 99.95, 0.0, 0.0, 0.0, 99.8, 0.0, 100.0, 67.89], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.22, 2.45, 3.8, 7.74, 2.99, 2.38, 1.76, 1.81, 3.18, 1.86, 1.53, 0.85, 4.74, 1.22, 2.18, 2.93]}
|
annotations_filtered/b3EWsHg08x4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.17], [6.0, 8.68], [9.0, 9.86], [18.0, 18.1], [20.0, 21.19], [29.0, 29.37], [37.0, 37.59], [40.0, 41.05], [42.0, 42.18], [47.0, 47.63], [56.0, 56.34], [58.0, 58.46], [60.0, 60.4], [61.0, 61.8], [62.0, 63.41], [66.0, 68.05], [68.0, 68.49], [70.0, 70.82]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 33.08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 53.47, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 68.57], ["music", 8.62], ["fireworks", 1.44]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.17, 2.68, 0.86, 0.1, 1.19, 0.37, 0.59, 1.05, 0.18, 0.63, 0.34, 0.46, 0.4, 0.8, 1.41, 2.05, 0.49, 0.82]}
|
annotations_filtered/b3OlGLDk4pY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 57.25], [60.0, 76.69], [79.0, 85.46], [87.0, 104.58], [107.0, 115.26], [119.0, 128.36], [131.0, 131.23], [133.0, 150.36], [154.0, 158.19], [159.0, 167.24], [172.0, 173.15], [175.0, 175.86], [177.0, 234.86]], "keep_status": [false, true, true, true, true, true, false, true, true, false, false, false, false], "silence_prob": [0.0, 29.7, 29.0, 28.73, 29.4, 29.54, 0.0, 28.7, 29.4, 28.83, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 22.08], ["hum", 10.79], ["mains hum", 10.28]], [["music", 26.03], ["static", 6.54], ["hum", 5.97]], [["music", 23.84], ["speech", 8.63], ["vehicle", 8.05]], [["music", 17.62], ["fly, housefly", 13.73], ["mosquito", 12.18]], [["music", 59.03], ["sound effect", 5.28], ["buzz", 3.97]], null, [["livestock, farm animals, working animals", 25.77], ["cattle, bovinae", 21.67], ["moo", 12.71]], [["music", 26.81], ["hum", 13.23], ["explosion", 12.48]], [["hum", 29.1], ["music", 25.12], ["mains hum", 16.37]], null, null, null], "duration": [52.25, 16.69, 6.46, 17.58, 8.26, 9.36, 0.23, 17.36, 4.19, 8.24, 1.15, 0.86, 57.86]}
|
annotations_filtered/b3lLWO2d7b0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[129.0, 133.14], [136.0, 168.4], [169.0, 170.67], [171.0, 171.42], [173.0, 195.22], [195.0, 195.25], [196.0, 200.18], [209.0, 212.14], [213.0, 212.94]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [38.82, 0.0, 0.0, 0.0, 32.47, 0.0, 33.23, 44.6, 0.0], "audiomae_on_audioset": [[["fly, housefly", 40.41], ["insect", 34.7], ["mosquito", 6.91]], null, null, null, [["music", 58.9], ["speech", 11.35], ["electronic music", 9.52]], null, [["throbbing", 26.42], ["music", 24.87], ["hum", 21.78]], [["music", 48.08], ["throbbing", 16.9], ["hum", 8.34]], null], "duration": [4.14, 32.4, 1.67, 0.42, 22.22, 0.25, 4.18, 3.14, -0.06]}
|
annotations_filtered/b3lOpSXhT0c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[70.0, 103.67], [104.0, 104.14], [105.0, 108.63], [109.0, 111.2], [112.0, 113.29], [114.0, 137.71], [138.0, 138.38]], "keep_status": [false, false, true, true, false, true, false], "silence_prob": [0.0, 0.0, 41.2, 40.92, 0.0, 31.0, 0.0], "audiomae_on_audioset": [null, null, [["music", 30.38], ["hum", 10.08], ["throbbing", 5.84]], [["hum", 36.11], ["throbbing", 19.8], ["mains hum", 13.37]], null, [["whack, thwack", 19.5], ["music", 11.83], ["smash, crash", 9.09]], null], "duration": [33.67, 0.14, 3.63, 2.2, 1.29, 23.71, 0.38]}
|
annotations_filtered/b4kKWa_hjCk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.13], [9.0, 9.24], [10.0, 10.35], [11.0, 13.93], [16.0, 17.71], [18.0, 18.86], [19.0, 22.99], [26.0, 26.67], [38.0, 37.89], [54.0, 56.34], [58.0, 60.4], [63.0, 64.13], [87.0, 87.54], [95.0, 95.76], [111.0, 111.92], [117.0, 118.18], [120.0, 120.66]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 99.95, 0.0, 0.0, 33.63, 0.0, 0.0, 58.47, 40.17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["moo", 42.83], ["cattle, bovinae", 27.71], ["livestock, farm animals, working animals", 18.63]], null, null, null, [["music", 53.95], ["foghorn", 5.7], ["theremin", 4.12]], null, null, null, null, null, null], "duration": [1.13, 0.24, 0.35, 2.93, 1.71, 0.86, 3.99, 0.67, -0.11, 2.34, 2.4, 1.13, 0.54, 0.76, 0.92, 1.18, 0.66]}
|
annotations_filtered/b4kRHpvisxE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 7.89], [10.0, 11.72], [13.0, 14.28], [18.0, 18.0], [18.0, 20.31], [25.0, 26.64], [31.0, 34.47], [35.0, 37.39], [39.0, 41.57], [43.0, 45.81], [50.0, 52.47], [54.0, 57.32], [60.0, 62.94], [64.0, 66.82], [69.0, 72.5], [75.0, 76.27], [79.0, 81.11], [84.0, 86.12], [88.0, 90.54], [92.0, 95.01], [96.0, 97.78], [101.0, 101.51], [103.0, 104.21], [107.0, 107.08], [112.0, 113.0], [117.0, 122.0], [129.0, 130.28], [131.0, 135.89], [137.0, 140.34], [143.0, 145.27]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 98.44, 0.0, 99.92, 56.86, 100.0, 100.0, 100.0, 73.97, 99.98, 99.1, 98.86, 0.0, 100.0, 98.99, 99.05, 94.52, 0.0, 0.0, 0.0, 0.0, 0.0, 60.32, 0.0, 71.29, 82.25, 76.37], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.11, 1.72, 1.28, 0.0, 2.31, 1.64, 3.47, 2.39, 2.57, 2.81, 2.47, 3.32, 2.94, 2.82, 3.5, 1.27, 2.11, 2.12, 2.54, 3.01, 1.78, 0.51, 1.21, 0.08, 1.0, 5.0, 1.28, 4.89, 3.34, 2.27]}
|
annotations_filtered/b4vpGhO2LwA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 11.38], [18.0, 18.18], [19.0, 19.62], [41.0, 41.35], [47.0, 47.82], [50.0, 51.02], [52.0, 54.04], [55.0, 56.54], [61.0, 61.11], [67.0, 67.1], [73.0, 72.82], [73.0, 76.0], [77.0, 77.48], [84.0, 84.08], [85.0, 86.07], [96.0, 96.9]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.58, 0.0, 0.0, 0.0, 0.0, 34.56, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["speech", 12.97], ["honk", 11.75], ["goose", 10.95]], null, null, null, null, [["speech", 35.66], ["hum", 9.83], ["sidetone", 6.71]], null, null, null, null], "duration": [0.38, 0.18, 0.62, 0.35, 0.82, 1.02, 2.04, 1.54, 0.11, 0.1, -0.18, 3.0, 0.48, 0.08, 1.07, 0.9]}
|
annotations_filtered/b56RExAdg7s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 13.12], [14.0, 35.45], [36.0, 37.54], [39.0, 42.25], [42.0, 44.59], [46.0, 50.68]], "keep_status": [false, false, false, false, false, true], "silence_prob": [0.0, 55.67, 0.0, 61.67, 57.32, 44.52], "audiomae_on_audioset": [null, null, null, null, null, [["hum", 34.88], ["mains hum", 22.01], ["music", 8.14]]], "duration": [1.12, 21.45, 1.54, 3.25, 2.59, 4.68]}
|
annotations_filtered/b5I94bT23cQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 2.79], [19.0, 19.79], [23.0, 25.08], [26.0, 26.43], [32.0, 33.74], [34.0, 35.7], [36.0, 37.4], [39.0, 39.9], [41.0, 41.28], [43.0, 43.09], [43.0, 48.32], [51.0, 50.67], [51.0, 52.74], [58.0, 58.16], [62.0, 64.81], [65.0, 72.05], [74.0, 74.36], [80.0, 80.97], [83.0, 84.13], [85.0, 85.87], [87.0, 87.3], [101.0, 101.36], [112.0, 112.62], [116.0, 118.37], [121.0, 121.54], [122.0, 123.52], [124.0, 126.03], [137.0, 139.16], [148.0, 155.73], [159.0, 160.64], [169.0, 171.73], [173.0, 174.7]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, false], "silence_prob": [0.0, 0.0, 84.62, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.55, 0.0, 0.0, 0.0, 68.8, 37.93, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 92.31, 0.0, 0.0, 36.18, 49.13, 84.25, 0.0, 86.45, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["speech", 64.42], ["fly, housefly", 5.84], ["throbbing", 3.26]], null, null, null, null, [["speech", 26.42], ["insect", 15.41], ["bee, wasp, etc.", 12.9]], null, null, null, null, null, null, null, null, null, null, [["speech", 19.41], ["boing", 11.47], ["hum", 8.13]], [["hum", 17.73], ["throbbing", 6.3], ["noise", 4.26]], null, null, null, null], "duration": [-0.21, 0.79, 2.08, 0.43, 1.74, 1.7, 1.4, 0.9, 0.28, 0.09, 5.32, -0.33, 1.74, 0.16, 2.81, 7.05, 0.36, 0.97, 1.13, 0.87, 0.3, 0.36, 0.62, 2.37, 0.54, 1.52, 2.03, 2.16, 7.73, 1.64, 2.73, 1.7]}
|
annotations_filtered/b5Q6A_1YyHg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[22.0, 27.01], [28.0, 32.68], [36.0, 40.76], [48.0, 68.15], [71.0, 80.57], [91.0, 102.69], [103.0, 103.79], [106.0, 106.89], [117.0, 124.38]], "keep_status": [false, false, false, false, false, true, false, false, true], "silence_prob": [31.13, 29.7, 30.1, 29.47, 29.78, 29.92, 0.0, 0.0, 32.68], "audiomae_on_audioset": [[["speech", 41.65], ["music", 33.4], ["theremin", 3.25]], [["music", 82.12], ["mosquito", 2.19], ["theremin", 2.05]], [["speech", 50.87], ["music", 33.41], ["boing", 2.7]], [["music", 57.71], ["speech", 10.03], ["fart", 3.61]], [["music", 54.8], ["speech", 27.43], ["electronic music", 1.92]], [["whack, thwack", 37.8], ["speech", 12.4], ["fart", 11.62]], null, null, [["speech", 45.09], ["whack, thwack", 16.11], ["thunk", 8.59]]], "duration": [5.01, 4.68, 4.76, 20.15, 9.57, 11.69, 0.79, 0.89, 7.38]}
|
annotations_filtered/b60DLSEemEY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 4.53], [6.0, 12.38], [13.0, 13.22], [14.0, 18.71], [19.0, 23.41], [24.0, 24.34], [25.0, 27.08], [27.0, 50.84], [52.0, 54.01], [54.0, 55.43], [60.0, 60.13], [62.0, 64.07], [66.0, 65.97], [66.0, 68.23], [69.0, 70.02], [79.0, 79.42], [83.0, 83.34], [85.0, 84.94], [92.0, 92.91], [98.0, 98.17], [103.0, 104.75], [106.0, 119.11], [125.0, 129.98], [130.0, 131.38]], "keep_status": [true, false, false, false, false, false, true, false, true, false, false, false, false, true, false, false, false, false, false, false, false, true, false, false], "silence_prob": [41.72, 63.1, 0.0, 53.84, 37.34, 0.0, 30.69, 30.7, 32.14, 0.0, 0.0, 43.69, 0.0, 40.59, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 44.52, 96.89, 0.0], "audiomae_on_audioset": [[["hum", 17.11], ["mains hum", 16.89], ["music", 12.81]], null, null, null, [["sidetone", 62.83], ["hum", 12.77], ["speech", 9.22]], null, [["hum", 17.6], ["throbbing", 16.32], ["mains hum", 13.33]], [["music", 46.06], ["hum", 17.2], ["throbbing", 11.46]], [["speech", 34.67], ["music", 11.77], ["hum", 6.01]], null, null, [["speech", 75.45], ["sidetone", 6.15], ["music", 2.94]], null, [["music", 26.08], ["speech", 18.32], ["mains hum", 7.25]], null, null, null, null, null, null, null, [["crow", 40.8], ["caw", 20.29], ["hum", 7.68]], null, null], "duration": [3.53, 6.38, 0.22, 4.71, 4.41, 0.34, 2.08, 23.84, 2.01, 1.43, 0.13, 2.07, -0.03, 2.23, 1.02, 0.42, 0.34, -0.06, 0.91, 0.17, 1.75, 13.11, 4.98, 1.38]}
|
annotations_filtered/b65C_muXajk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[26.0, 26.25], [28.0, 28.26], [29.0, 29.57], [38.0, 38.64], [41.0, 41.2], [42.0, 48.39], [51.0, 50.94], [52.0, 52.32], [53.0, 53.65], [55.0, 56.98], [85.0, 85.33], [86.0, 87.84], [92.0, 94.54], [97.0, 98.49], [100.0, 100.52], [102.0, 102.46], [110.0, 110.54], [114.0, 114.76], [118.0, 122.01], [123.0, 125.9], [126.0, 126.74], [128.0, 130.76], [132.0, 135.4], [136.0, 137.98], [143.0, 144.58], [146.0, 147.26], [149.0, 151.24], [152.0, 153.94], [155.0, 157.67]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 99.59, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.99, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 0.0, 100.0, 99.92, 0.0, 0.0, 0.0, 35.46, 0.0, 31.9], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 21.33], ["sound effect", 6.28], ["cacophony", 6.08]], null, [["hum", 24.13], ["mains hum", 24.11], ["boat, water vehicle", 5.58]]], "duration": [0.25, 0.26, 0.57, 0.64, 0.2, 6.39, -0.06, 0.32, 0.65, 1.98, 0.33, 1.84, 2.54, 1.49, 0.52, 0.46, 0.54, 0.76, 4.01, 2.9, 0.74, 2.76, 3.4, 1.98, 1.58, 1.26, 2.24, 1.94, 2.67]}
|
annotations_filtered/b6X5bVMoCJc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.47], [6.0, 7.08], [13.0, 13.61], [18.0, 19.09], [23.0, 24.51], [27.0, 28.0], [30.0, 31.7], [33.0, 39.92], [42.0, 42.48], [43.0, 45.79], [47.0, 51.0], [53.0, 59.88], [61.0, 63.26], [68.0, 73.99], [77.0, 79.47], [81.0, 81.7], [84.0, 84.5], [87.0, 92.8], [98.0, 99.0], [105.0, 112.99], [115.0, 117.63]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 82.43, 0.0, 46.36, 45.36, 31.77, 41.44, 34.67, 32.08, 0.0, 0.0, 32.94, 0.0, 33.62, 56.7], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["music", 36.82], ["speech", 25.38], ["throbbing", 13.1]], [["speech", 75.11], ["music", 9.51], ["sidetone", 1.09]], [["music", 50.88], ["boing", 16.25], ["speech", 7.89]], [["music", 40.1], ["throbbing", 12.3], ["hum", 4.48]], [["music", 27.09], ["speech", 22.91], ["throbbing", 17.71]], [["music", 40.44], ["speech", 39.39], ["sidetone", 2.98]], null, null, [["music", 41.38], ["breaking", 23.23], ["whack, thwack", 6.37]], null, [["music", 41.32], ["speech", 27.92], ["throbbing", 9.01]], null], "duration": [0.47, 1.08, 0.61, 1.09, 1.51, 1.0, 1.7, 6.92, 0.48, 2.79, 4.0, 6.88, 2.26, 5.99, 2.47, 0.7, 0.5, 5.8, 1.0, 7.99, 2.63]}
|
annotations_filtered/b6vOp7_rI6Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 23.87], [24.0, 27.84], [28.0, 31.43], [32.0, 34.05], [38.0, 40.98], [42.0, 44.93], [45.0, 50.7], [52.0, 60.93], [63.0, 64.57], [66.0, 66.73], [73.0, 76.17], [77.0, 77.38], [78.0, 81.13], [104.0, 104.53], [112.0, 112.75], [118.0, 119.03], [127.0, 128.28], [131.0, 132.51], [135.0, 135.85], [138.0, 138.0], [142.0, 142.91], [144.0, 145.0], [147.0, 147.31], [152.0, 153.59], [156.0, 156.32], [162.0, 162.87]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [97.83, 93.13, 80.64, 99.48, 100.0, 100.0, 83.7, 99.99, 0.0, 0.0, 100.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [7.87, 3.84, 3.43, 2.05, 2.98, 2.93, 5.7, 8.93, 1.57, 0.73, 3.17, 0.38, 3.13, 0.53, 0.75, 1.03, 1.28, 1.51, 0.85, 0.0, 0.91, 1.0, 0.31, 1.59, 0.32, 0.87]}
|
annotations_filtered/b6xbga06ApQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 5.68], [9.0, 13.59], [18.0, 19.26], [20.0, 22.92], [25.0, 26.18], [34.0, 34.33], [36.0, 36.85], [40.0, 40.46], [42.0, 42.16], [45.0, 45.2], [48.0, 48.14], [49.0, 51.46], [56.0, 56.13], [59.0, 59.61], [67.0, 70.82], [80.0, 81.36], [94.0, 94.39]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [100.0, 100.0, 0.0, 99.44, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.76, 0.0, 0.0, 99.62, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [3.68, 4.59, 1.26, 2.92, 1.18, 0.33, 0.85, 0.46, 0.16, 0.2, 0.14, 2.46, 0.13, 0.61, 3.82, 1.36, 0.39]}
|
annotations_filtered/b74611maYgQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[22.0, 24.58], [25.0, 28.58], [31.0, 39.23], [42.0, 50.16], [52.0, 55.05], [55.0, 55.14], [60.0, 60.78], [63.0, 69.77], [70.0, 70.88], [74.0, 74.56], [77.0, 87.86], [93.0, 96.03], [97.0, 103.33]], "keep_status": [false, true, true, true, true, false, false, false, false, false, false, false, true], "silence_prob": [31.54, 39.74, 40.28, 29.36, 32.42, 0.0, 0.0, 33.27, 0.0, 0.0, 32.87, 31.06, 30.4], "audiomae_on_audioset": [[["speech", 55.79], ["music", 14.47], ["explosion", 4.99]], [["speech", 23.46], ["hum", 20.26], ["mains hum", 10.18]], [["speech", 43.18], ["music", 12.54], ["mains hum", 6.29]], [["electric shaver, electric razor", 15.19], ["buzz", 13.88], ["speech", 13.19]], [["music", 37.65], ["hum", 13.47], ["throbbing", 8.88]], null, null, [["music", 59.23], ["speech", 21.2], ["electronic music", 2.7]], null, null, [["music", 43.08], ["speech", 23.8], ["hum", 14.51]], [["whale vocalization", 55.31], ["speech", 7.79], ["wild animals", 7.25]], [["whale vocalization", 28.49], ["music", 12.85], ["buzz", 10.51]]], "duration": [2.58, 3.58, 8.23, 8.16, 3.05, 0.14, 0.78, 6.77, 0.88, 0.56, 10.86, 3.03, 6.33]}
|
annotations_filtered/b7AjNXAF-7Y_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/b7C69HqnV8s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 17.49], [18.0, 21.1], [22.0, 24.27], [25.0, 25.46], [28.0, 29.08], [32.0, 34.84], [35.0, 37.23], [39.0, 39.95], [40.0, 43.43], [46.0, 47.14], [49.0, 51.51], [53.0, 64.79], [68.0, 69.04], [71.0, 72.45], [73.0, 76.98], [81.0, 81.97], [84.0, 85.11], [87.0, 87.35]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [67.76, 80.11, 79.76, 0.0, 0.0, 59.59, 72.16, 0.0, 90.08, 0.0, 98.19, 36.72, 0.0, 0.0, 37.14, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["music", 82.78], ["funk", 2.09], ["synthesizer", 1.44]], null, null, [["music", 82.77], ["didgeridoo", 1.67], ["sampler", 1.49]], null, null, null], "duration": [4.49, 3.1, 2.27, 0.46, 1.08, 2.84, 2.23, 0.95, 3.43, 1.14, 2.51, 11.79, 1.04, 1.45, 3.98, 0.97, 1.11, 0.35]}
|
annotations_filtered/b7Dxy34dFyY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.49], [5.0, 6.59], [10.0, 10.69], [12.0, 12.78], [13.0, 14.55], [25.0, 26.0], [30.0, 31.43], [32.0, 35.97], [38.0, 39.9], [45.0, 45.96], [47.0, 48.24], [53.0, 54.53], [57.0, 58.01], [60.0, 60.05], [64.0, 64.5], [66.0, 67.04], [72.0, 72.32], [73.0, 73.45], [78.0, 78.22], [80.0, 81.84], [85.0, 85.46], [90.0, 96.42], [98.0, 98.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 94.37, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.99, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.49, 1.59, 0.69, 0.78, 1.55, 1.0, 1.43, 3.97, 1.9, 0.96, 1.24, 1.53, 1.01, 0.05, 0.5, 1.04, 0.32, 0.45, 0.22, 1.84, 0.46, 6.42, 0.76]}
|
annotations_filtered/b7lV6-iKiwQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[70.0, 81.94], [83.0, 87.13], [88.0, 90.44], [91.0, 93.92], [97.0, 98.66], [100.0, 101.78], [104.0, 108.9], [111.0, 111.92], [112.0, 113.8], [116.0, 116.87]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [98.66, 99.99, 100.0, 77.36, 0.0, 0.0, 99.36, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null], "duration": [11.94, 4.13, 2.44, 2.92, 1.66, 1.78, 4.9, 0.92, 1.8, 0.87]}
|
annotations_filtered/b7wurDomuVs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.99], [8.0, 10.1], [13.0, 26.99], [27.0, 48.52], [49.0, 50.58], [52.0, 52.95], [53.0, 54.7], [57.0, 60.29], [62.0, 71.19], [74.0, 75.69], [78.0, 98.64], [106.0, 106.98]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 98.51, 96.04, 87.19, 0.0, 0.0, 0.0, 99.44, 99.56, 0.0, 58.64, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.99, 2.1, 13.99, 21.52, 1.58, 0.95, 1.7, 3.29, 9.19, 1.69, 20.64, 0.98]}
|
annotations_filtered/b8Dv782UIb4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 5.14], [6.0, 9.78], [10.0, 10.69], [13.0, 15.25], [16.0, 17.1], [19.0, 19.77], [22.0, 22.62], [24.0, 25.93], [27.0, 27.13], [30.0, 30.42], [31.0, 37.34], [39.0, 41.86], [43.0, 44.24], [45.0, 46.63], [47.0, 58.26], [68.0, 69.79], [72.0, 72.84], [74.0, 76.71], [78.0, 78.85], [81.0, 82.68], [85.0, 85.99], [95.0, 95.05], [96.0, 96.7], [98.0, 99.93], [101.0, 103.08], [105.0, 106.19], [108.0, 108.9]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 92.48, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 92.48, 52.33, 0.0, 0.0, 29.41, 0.0, 0.0, 40.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 94.37, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["animal", 29.09], ["grunt", 15.37], ["roaring cats (lions, tigers)", 13.47]], null, null, [["cattle, bovinae", 20.15], ["livestock, farm animals, working animals", 15.91], ["speech", 14.13]], null, null, null, null, null, null, null, null, null], "duration": [1.14, 3.78, 0.69, 2.25, 1.1, 0.77, 0.62, 1.93, 0.13, 0.42, 6.34, 2.86, 1.24, 1.63, 11.26, 1.79, 0.84, 2.71, 0.85, 1.68, 0.99, 0.05, 0.7, 1.93, 2.08, 1.19, 0.9]}
|
annotations_filtered/b8U1na74Bcc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 55.9], [59.0, 73.67], [74.0, 75.47], [76.0, 76.64], [77.0, 77.67], [82.0, 82.39], [86.0, 86.49], [87.0, 87.83], [91.0, 95.17], [96.0, 98.93], [99.0, 99.99], [104.0, 107.06], [109.0, 110.08], [112.0, 113.22], [114.0, 116.21], [117.0, 117.32], [118.0, 118.29], [119.0, 119.69], [121.0, 127.38], [132.0, 134.2]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 36.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 92.31, 84.8, 0.0, 95.91, 0.0, 0.0, 92.64, 0.0, 0.0, 0.0, 32.85, 34.02], "audiomae_on_audioset": [null, [["telephone bell ringing", 68.72], ["music", 17.45], ["telephone", 7.14]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 42.45], ["synthesizer", 16.91], ["sine wave", 10.65]], [["music", 34.1], ["speech", 18.57], ["foghorn", 9.94]]], "duration": [45.9, 14.67, 1.47, 0.64, 0.67, 0.39, 0.49, 0.83, 4.17, 2.93, 0.99, 3.06, 1.08, 1.22, 2.21, 0.32, 0.29, 0.69, 6.38, 2.2]}
|
annotations_filtered/b8oFKKPfgi0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 2.1], [2.0, 4.36], [6.0, 6.0], [6.0, 8.33], [9.0, 10.71], [12.0, 12.66], [13.0, 14.37], [15.0, 16.26], [17.0, 18.79], [20.0, 21.88], [22.0, 25.83], [30.0, 30.74], [31.0, 80.59], [81.0, 82.07], [84.0, 91.71], [92.0, 93.46], [95.0, 96.72], [98.0, 103.45], [103.0, 103.49], [104.0, 103.54], [104.0, 104.92], [108.0, 109.51], [111.0, 112.46], [113.0, 114.88], [116.0, 118.35], [118.0, 118.42], [120.0, 126.55], [129.0, 129.96], [131.0, 136.12]], "keep_status": [false, true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 43.82, 0.0, 38.35, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.84, 0.0, 0.0, 0.0, 33.71, 0.0, 0.0, 31.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.94, 0.0, 32.33, 0.0, 61.97], "audiomae_on_audioset": [null, [["music", 51.88], ["speech", 13.2], ["techno", 2.47]], null, [["music", 39.01], ["frog", 14.98], ["croak", 8.25]], null, null, null, null, null, null, [["speech", 71.04], ["radio", 5.41], ["busy signal", 3.1]], null, null, null, [["music", 61.14], ["speech", 10.94], ["didgeridoo", 2.87]], null, null, [["music", 78.91], ["electronic music", 1.5], ["house music", 1.49]], null, null, null, null, null, null, [["music", 45.76], ["throbbing", 6.7], ["sampler", 4.22]], null, [["music", 60.05], ["speech", 10.28], ["drum machine", 6.81]], null, null], "duration": [1.1, 2.36, 0.0, 2.33, 1.71, 0.66, 1.37, 1.26, 1.79, 1.88, 3.83, 0.74, 49.59, 1.07, 7.71, 1.46, 1.72, 5.45, 0.49, -0.46, 0.92, 1.51, 1.46, 1.88, 2.35, 0.42, 6.55, 0.96, 5.12]}
|
annotations_filtered/b8t5kX7k0vQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 5.16], [5.0, 5.8]], "keep_status": [false, false], "silence_prob": [0.0, 0.0], "audiomae_on_audioset": [null, null], "duration": [0.16, 0.8]}
|
annotations_filtered/b95SzqTrjRo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 4.29], [5.0, 5.63], [8.0, 8.13], [9.0, 9.32], [12.0, 12.46], [13.0, 13.39], [16.0, 16.97], [22.0, 26.1], [29.0, 30.91], [31.0, 35.34], [42.0, 42.96], [44.0, 46.4], [49.0, 53.0]], "keep_status": [false, false, false, false, false, false, false, true, false, false, false, true, false], "silence_prob": [63.85, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.36, 0.0, 52.1, 0.0, 49.97, 53.47], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["speech", 38.84], ["whale vocalization", 9.15], ["livestock, farm animals, working animals", 8.26]], null, null, null, [["glass", 16.08], ["mains hum", 15.22], ["hum", 13.34]], null], "duration": [3.29, 0.63, 0.13, 0.32, 0.46, 0.39, 0.97, 4.1, 1.91, 4.34, 0.96, 2.4, 4.0]}
|