Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/h-Ss_FvzZcs_filtered.json +1 -0
- annotations_filtered/h-UqMU-MIig_filtered.json +1 -0
- annotations_filtered/h0i2KfT2SB0_filtered.json +1 -0
- annotations_filtered/h0qWin97VyI_filtered.json +1 -0
- annotations_filtered/h0qniQTX3r8_filtered.json +1 -0
- annotations_filtered/h0z4HetQWME_filtered.json +1 -0
- annotations_filtered/h1-T9LYq1hI_filtered.json +1 -0
- annotations_filtered/h1F9-NKqDDk_filtered.json +1 -0
- annotations_filtered/h1aJoWg4vGo_filtered.json +1 -0
- annotations_filtered/h36wtoBcAS8_filtered.json +1 -0
- annotations_filtered/h3AqOR2Ru1s_filtered.json +1 -0
- annotations_filtered/h3VZ6IRrVlI_filtered.json +1 -0
- annotations_filtered/h3g5B5JhFcY_filtered.json +1 -0
- annotations_filtered/h41ylpWhV1I_filtered.json +1 -0
- annotations_filtered/h44egWnbrrg_filtered.json +1 -0
- annotations_filtered/h4SMndWj5To_filtered.json +1 -0
- annotations_filtered/h4eOGlJpLYg_filtered.json +1 -0
- annotations_filtered/h4lbn5nDXwY_filtered.json +1 -0
- annotations_filtered/h4u9pO-98ZM_filtered.json +1 -0
- annotations_filtered/h52UYfkLhXg_filtered.json +1 -0
- annotations_filtered/h55rTtbCy7o_filtered.json +1 -0
- annotations_filtered/h5KBS20Ke6U_filtered.json +1 -0
- annotations_filtered/h5OjSHDUn8c_filtered.json +1 -0
- annotations_filtered/h5RMM02YE3U_filtered.json +1 -0
- annotations_filtered/h5UGcMYOaaU_filtered.json +1 -0
- annotations_filtered/h5dCFGJp__0_filtered.json +1 -0
- annotations_filtered/h5f5GgqVWes_filtered.json +1 -0
- annotations_filtered/h5jZBcDev1s_filtered.json +1 -0
- annotations_filtered/h5nhyFFSweU_filtered.json +1 -0
- annotations_filtered/h6iHbAju1cI_filtered.json +1 -0
- annotations_filtered/h7CCnLwD2MY_filtered.json +1 -0
- annotations_filtered/h7NG9ZEfyKo_filtered.json +1 -0
- annotations_filtered/h7ZUKB_zYQ0_filtered.json +1 -0
- annotations_filtered/h7wEE6Yx7IQ_filtered.json +1 -0
- annotations_filtered/h8E3sSTc11E_filtered.json +1 -0
- annotations_filtered/h8Rxb-9snJQ_filtered.json +1 -0
- annotations_filtered/h8c0Q6aqZG8_filtered.json +1 -0
- annotations_filtered/h8m69o_1PoQ_filtered.json +1 -0
- annotations_filtered/h8wzJimC5Zc_filtered.json +1 -0
- annotations_filtered/h9GHe5K0kOI_filtered.json +1 -0
- annotations_filtered/h9Rb7mT3juI_filtered.json +1 -0
- annotations_filtered/h9WDm1k4Hz4_filtered.json +1 -0
- annotations_filtered/h9jsnAD4aNw_filtered.json +1 -0
- annotations_filtered/hA063IaOHyQ_filtered.json +1 -0
- annotations_filtered/hA0OlCQLC0Q_filtered.json +1 -0
- annotations_filtered/hA1BikUzBKc_filtered.json +1 -0
- annotations_filtered/hAQ2xTr4U64_filtered.json +1 -0
- annotations_filtered/hAU8AQ6xlw8_filtered.json +1 -0
- annotations_filtered/hAUbdHw8QG4_filtered.json +1 -0
- annotations_filtered/hAbVFxYi_q0_filtered.json +1 -0
annotations_filtered/h-Ss_FvzZcs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 10.1], [15.0, 28.04], [32.0, 46.94], [48.0, 67.27], [68.0, 74.14], [86.0, 102.95], [105.0, 124.92], [128.0, 133.89], [137.0, 144.56], [145.0, 150.8], [154.0, 155.21], [159.0, 159.24], [170.0, 171.31]], "keep_status": [false, false, true, false, true, false, false, true, false, false, false, false, false], "silence_prob": [35.42, 29.9, 30.24, 30.33, 30.32, 30.28, 30.09, 31.38, 33.35, 33.63, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 42.25], ["music", 35.44], ["quack", 7.12]], [["music", 76.63], ["synthetic singing", 1.57], ["funny music", 1.07]], [["livestock, farm animals, working animals", 36.16], ["music", 17.28], ["speech", 10.9]], [["moo", 31.23], ["cattle, bovinae", 20.38], ["music", 19.61]], [["speech", 22.65], ["music", 22.19], ["cattle, bovinae", 10.22]], [["music", 79.47], ["speech", 7.75], ["boing", 2.26]], [["cattle, bovinae", 52.03], ["moo", 34.41], ["livestock, farm animals, working animals", 11.98]], [["mosquito", 19.12], ["speech", 13.22], ["music", 11.87]], [["music", 31.5], ["speech", 30.62], ["fly, housefly", 10.57]], [["music", 59.16], ["carnatic music", 14.59], ["theremin", 3.22]], null, null, null], "duration": [4.1, 13.04, 14.94, 19.27, 6.14, 16.95, 19.92, 5.89, 7.56, 5.8, 1.21, 0.24, 1.31]}
|
annotations_filtered/h-UqMU-MIig_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.46], [3.0, 3.94], [5.0, 7.8], [10.0, 11.28], [13.0, 14.54], [17.0, 78.66], [85.0, 95.08], [99.0, 100.11], [100.0, 108.9], [110.0, 111.74]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 60.89, 0.0, 0.0, 0.0, 66.15, 0.0, 37.07, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 66.74], ["explosion", 7.79], ["echo", 3.13]], null], "duration": [0.46, 0.94, 2.8, 1.28, 1.54, 61.66, 10.08, 1.11, 8.9, 1.74]}
|
annotations_filtered/h0i2KfT2SB0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 12.9], [16.0, 16.39], [16.0, 21.0], [24.0, 25.3], [27.0, 28.07], [30.0, 31.18], [32.0, 34.1], [42.0, 43.5], [45.0, 45.37], [48.0, 51.04], [54.0, 56.91], [58.0, 61.16], [61.0, 72.93], [76.0, 76.44], [77.0, 94.15], [95.0, 109.54], [110.0, 109.58], [110.0, 109.61], [113.0, 129.66]], "keep_status": [false, false, false, false, false, false, true, false, false, true, true, false, false, false, true, true, false, false, true], "silence_prob": [0.0, 0.0, 52.16, 0.0, 0.0, 0.0, 41.85, 0.0, 0.0, 47.62, 41.52, 50.26, 42.72, 0.0, 33.14, 32.33, 0.0, 0.0, 32.15], "audiomae_on_audioset": [null, null, null, null, null, null, [["speech", 24.04], ["music", 23.09], ["throbbing", 10.22]], null, null, [["music", 15.65], ["hum", 11.85], ["speech", 11.17]], [["music", 38.01], ["electronic music", 11.39], ["hum", 11.1]], null, [["hum", 47.09], ["mains hum", 15.08], ["music", 15.05]], null, [["music", 27.1], ["speech", 17.89], ["didgeridoo", 7.98]], [["music", 22.3], ["throbbing", 19.26], ["whack, thwack", 17.18]], null, null, [["music", 58.59], ["didgeridoo", 4.55], ["speech", 3.73]]], "duration": [1.9, 0.39, 5.0, 1.3, 1.07, 1.18, 2.1, 1.5, 0.37, 3.04, 2.91, 3.16, 11.93, 0.44, 17.15, 14.54, -0.42, -0.39, 16.66]}
|
annotations_filtered/h0qWin97VyI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[14.0, 14.66], [15.0, 17.58], [18.0, 18.84], [20.0, 21.32], [26.0, 26.43], [35.0, 34.92], [36.0, 39.83], [41.0, 42.33], [44.0, 45.37], [46.0, 48.39], [50.0, 58.45], [59.0, 61.01], [62.0, 65.3], [66.0, 68.82], [69.0, 72.77], [74.0, 78.92], [80.0, 80.84], [81.0, 81.26], [97.0, 97.58], [98.0, 103.18], [106.0, 106.34], [112.0, 113.75], [114.0, 117.34], [118.0, 120.23]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 95.91, 0.0, 0.0, 0.0, 0.0, 91.98, 0.0, 0.0, 99.68, 79.41, 96.17, 94.37, 97.22, 94.81, 98.86, 0.0, 0.0, 0.0, 80.11, 0.0, 0.0, 70.44, 99.62], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.66, 2.58, 0.84, 1.32, 0.43, -0.08, 3.83, 1.33, 1.37, 2.39, 8.45, 2.01, 3.3, 2.82, 3.77, 4.92, 0.84, 0.26, 0.58, 5.18, 0.34, 1.75, 3.34, 2.23]}
|
annotations_filtered/h0qniQTX3r8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[45.0, 45.45], [46.0, 45.99], [47.0, 73.04], [73.0, 78.19], [81.0, 80.65], [82.0, 86.59], [88.0, 91.18], [93.0, 96.01], [98.0, 98.76], [102.0, 104.57], [105.0, 108.08], [109.0, 123.26], [127.0, 128.83], [130.0, 134.96], [137.0, 142.65], [145.0, 148.88]], "keep_status": [false, false, true, true, false, true, false, false, false, true, false, true, false, true, true, false], "silence_prob": [0.0, 0.0, 35.25, 34.54, 0.0, 38.74, 42.11, 54.56, 0.0, 47.16, 92.31, 40.59, 0.0, 47.98, 33.88, 35.38], "audiomae_on_audioset": [null, null, [["hum", 22.25], ["buzz", 14.08], ["music", 12.33]], [["music", 12.52], ["mains hum", 12.33], ["hum", 9.4]], null, [["grunt", 32.61], ["music", 17.54], ["throbbing", 8.97]], [["livestock, farm animals, working animals", 39.8], ["moo", 19.39], ["cattle, bovinae", 18.7]], null, null, [["mains hum", 17.22], ["hum", 12.92], ["bow-wow", 10.71]], null, [["speech", 33.11], ["music", 32.6], ["didgeridoo", 4.01]], null, [["music", 31.24], ["didgeridoo", 12.39], ["speech", 7.23]], [["music", 25.94], ["hum", 19.4], ["mains hum", 11.57]], [["throbbing", 46.64], ["music", 24.96], ["hum", 14.1]]], "duration": [0.45, -0.01, 26.04, 5.19, -0.35, 4.59, 3.18, 3.01, 0.76, 2.57, 3.08, 14.26, 1.83, 4.96, 5.65, 3.88]}
|
annotations_filtered/h0z4HetQWME_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 5.68], [7.0, 8.61], [13.0, 13.12], [15.0, 16.7], [17.0, 17.46], [18.0, 21.25], [22.0, 25.51], [28.0, 27.72], [30.0, 31.4], [33.0, 35.23], [38.0, 40.19], [40.0, 40.27], [40.0, 40.51], [41.0, 41.37], [46.0, 62.53], [68.0, 71.95], [79.0, 79.59], [80.0, 81.09], [84.0, 84.99], [86.0, 86.98], [90.0, 90.32], [91.0, 93.72], [96.0, 96.57], [101.0, 102.39], [103.0, 103.42], [114.0, 114.13], [115.0, 115.52], [118.0, 118.74], [120.0, 120.65], [123.0, 124.09], [125.0, 126.27]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [100.0, 0.0, 0.0, 0.0, 0.0, 76.2, 39.09, 0.0, 0.0, 37.77, 38.67, 0.0, 0.0, 0.0, 33.26, 92.15, 0.0, 0.0, 0.0, 0.0, 0.0, 98.66, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 59.09], ["effects unit", 14.05], ["distortion", 4.8]], null, null, [["music", 75.79], ["guitar", 8.21], ["musical instrument", 4.77]], [["music", 53.29], ["boing", 10.42], ["sidetone", 7.19]], null, null, null, [["music", 64.77], ["synthesizer", 18.08], ["electronic music", 2.31]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.68, 1.61, 0.12, 1.7, 0.46, 3.25, 3.51, -0.28, 1.4, 2.23, 2.19, 0.27, 0.51, 0.37, 16.53, 3.95, 0.59, 1.09, 0.99, 0.98, 0.32, 2.72, 0.57, 1.39, 0.42, 0.13, 0.52, 0.74, 0.65, 1.09, 1.27]}
|
annotations_filtered/h1-T9LYq1hI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 65.82]], "keep_status": [false], "silence_prob": [0.0], "audiomae_on_audioset": [null], "duration": [62.82]}
|
annotations_filtered/h1F9-NKqDDk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.48], [4.0, 21.15], [23.0, 24.39], [25.0, 29.13], [32.0, 32.91], [37.0, 37.5], [38.0, 40.34], [45.0, 45.4], [47.0, 49.05], [52.0, 52.54], [54.0, 56.4], [59.0, 60.74], [68.0, 68.91], [70.0, 70.61], [72.0, 72.5], [74.0, 75.78], [76.0, 76.35], [77.0, 77.82], [80.0, 81.85], [85.0, 85.38], [86.0, 85.78], [88.0, 88.89], [89.0, 93.93], [97.0, 103.23], [105.0, 112.45]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 36.3, 0.0, 72.16, 0.0, 0.0, 43.9, 0.0, 40.29, 0.0, 89.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.35, 45.11, 39.25], "audiomae_on_audioset": [null, [["beatboxing", 29.69], ["throbbing", 12.32], ["synthesizer", 12.18]], null, null, null, null, [["sidetone", 45.11], ["speech", 30.51], ["chirp tone", 9.62]], null, [["sine wave", 43.21], ["chirp tone", 39.57], ["speech", 3.79]], null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 53.28], ["theremin", 19.75], ["didgeridoo", 8.21]], [["music", 65.92], ["theremin", 10.77], ["musical instrument", 3.31]], [["music", 46.3], ["musical instrument", 8.35], ["synthesizer", 7.11]]], "duration": [1.48, 17.15, 1.39, 4.13, 0.91, 0.5, 2.34, 0.4, 2.05, 0.54, 2.4, 1.74, 0.91, 0.61, 0.5, 1.78, 0.35, 0.82, 1.85, 0.38, -0.22, 0.89, 4.93, 6.23, 7.45]}
|
annotations_filtered/h1aJoWg4vGo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.2], [7.0, 7.79], [11.0, 11.2], [13.0, 13.56], [16.0, 16.56], [18.0, 19.89], [20.0, 21.2], [22.0, 25.03], [26.0, 26.38], [27.0, 28.88], [32.0, 34.4], [36.0, 36.58], [37.0, 37.72], [38.0, 38.79], [39.0, 40.07], [42.0, 42.06], [43.0, 43.88], [49.0, 54.08], [56.0, 57.03], [59.0, 59.64], [60.0, 72.71], [74.0, 75.03], [76.0, 77.79], [80.0, 81.5], [82.0, 83.91], [88.0, 88.35], [89.0, 90.17], [92.0, 92.47], [93.0, 93.55], [94.0, 95.2], [96.0, 96.45], [97.0, 101.34], [102.0, 109.14], [114.0, 114.98], [116.0, 123.13], [125.0, 126.17], [128.0, 127.8], [129.0, 129.64]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 96.17, 0.0, 0.0, 99.48, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 90.78, 0.0, 0.0, 93.91, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 95.91, 99.95, 0.0, 68.67, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.2, 0.79, 0.2, 0.56, 0.56, 1.89, 1.2, 3.03, 0.38, 1.88, 2.4, 0.58, 0.72, 0.79, 1.07, 0.06, 0.88, 5.08, 1.03, 0.64, 12.71, 1.03, 1.79, 1.5, 1.91, 0.35, 1.17, 0.47, 0.55, 1.2, 0.45, 4.34, 7.14, 0.98, 7.13, 1.17, -0.2, 0.64]}
|
annotations_filtered/h36wtoBcAS8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.76], [4.0, 7.13], [14.0, 14.82], [15.0, 15.58], [21.0, 22.5], [31.0, 30.86], [32.0, 32.41], [33.0, 35.51], [36.0, 37.76], [42.0, 42.09], [44.0, 44.25], [54.0, 54.55], [56.0, 56.67], [58.0, 59.75], [60.0, 62.04], [64.0, 64.18], [66.0, 66.9], [68.0, 68.57], [70.0, 71.22], [72.0, 72.86], [74.0, 75.17], [76.0, 77.5], [80.0, 81.28], [83.0, 83.67], [85.0, 85.08], [86.0, 86.66], [87.0, 88.05], [90.0, 91.57], [92.0, 92.91], [98.0, 99.35], [100.0, 101.19], [101.0, 101.97], [103.0, 103.45], [105.0, 105.09], [107.0, 109.11], [117.0, 116.9]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 68.67, 0.0, 0.0, 0.0, 0.0, 0.0, 43.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 54.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 49.92, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["speech", 49.99], ["sidetone", 19.34], ["telephone", 6.11]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["hum", 34.69], ["throbbing", 19.02], ["speech", 14.88]], null], "duration": [0.76, 3.13, 0.82, 0.58, 1.5, -0.14, 0.41, 2.51, 1.76, 0.09, 0.25, 0.55, 0.67, 1.75, 2.04, 0.18, 0.9, 0.57, 1.22, 0.86, 1.17, 1.5, 1.28, 0.67, 0.08, 0.66, 1.05, 1.57, 0.91, 1.35, 1.19, 0.97, 0.45, 0.09, 2.11, -0.1]}
|
annotations_filtered/h3AqOR2Ru1s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 12.61], [13.0, 13.17], [13.0, 13.22], [14.0, 14.42], [16.0, 17.02], [29.0, 30.3], [31.0, 31.75], [35.0, 38.53], [39.0, 38.58], [41.0, 43.73], [54.0, 54.45], [60.0, 60.61], [63.0, 65.7], [73.0, 82.17], [85.0, 94.46], [98.0, 108.01], [116.0, 124.31], [126.0, 127.89], [131.0, 134.57], [138.0, 137.94], [149.0, 149.3], [153.0, 153.79], [156.0, 155.55], [180.0, 184.55], [193.0, 194.83], [196.0, 200.21], [207.0, 209.22], [213.0, 214.62]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, true, false, false, false, false, false, false, true, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 45.02, 0.0, 68.54, 0.0, 0.0, 61.08, 55.89, 45.49, 40.54, 33.53, 0.0, 39.57, 0.0, 0.0, 0.0, 0.0, 35.45, 0.0, 39.38, 36.65, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["speech", 36.74], ["music", 31.03], ["thunk", 11.34]], null, null, null, null, null, null, [["music", 37.88], ["speech", 6.8], ["musical instrument", 3.92]], [["music", 43.81], ["hum", 13.97], ["throbbing", 7.65]], [["whack, thwack", 39.53], ["groan", 24.1], ["music", 15.48]], null, [["music", 45.74], ["synthesizer", 8.64], ["hum", 7.36]], null, null, null, null, [["music", 46.93], ["boing", 14.76], ["speech", 13.02]], null, [["music", 57.5], ["theremin", 6.44], ["synthesizer", 5.54]], [["music", 53.27], ["speech", 7.32], ["quack", 3.24]], null], "duration": [1.61, 0.17, 0.22, 0.42, 1.02, 1.3, 0.75, 3.53, -0.42, 2.73, 0.45, 0.61, 2.7, 9.17, 9.46, 10.01, 8.31, 1.89, 3.57, -0.06, 0.3, 0.79, -0.45, 4.55, 1.83, 4.21, 2.22, 1.62]}
|
annotations_filtered/h3VZ6IRrVlI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 2.64], [3.0, 8.01], [9.0, 9.58], [14.0, 15.43], [16.0, 17.66], [21.0, 22.11], [23.0, 40.09], [41.0, 44.24], [45.0, 48.68], [50.0, 50.25], [51.0, 51.97], [54.0, 54.94], [56.0, 63.22], [64.0, 64.67], [65.0, 65.42], [66.0, 69.92], [72.0, 72.25], [74.0, 75.66], [76.0, 75.73], [76.0, 75.79], [76.0, 81.9], [82.0, 83.62], [84.0, 87.49], [88.0, 89.55], [90.0, 91.96], [92.0, 92.01], [92.0, 92.31]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 36.92, 0.0, 0.0, 0.0, 0.0, 97.92, 94.52, 67.25, 0.0, 0.0, 0.0, 39.72, 0.0, 0.0, 37.4, 0.0, 0.0, 0.0, 0.0, 49.82, 0.0, 67.25, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 77.68], ["fart", 1.65], ["noise", 1.44]], null, null, null, null, null, null, null, null, null, null, [["speech", 64.51], ["hum", 4.66], ["whistling", 4.22]], null, null, [["speech", 58.84], ["fart", 26.91], ["inside, small room", 1.14]], null, null, null, null, [["speech", 71.78], ["noise", 12.41], ["radio", 4.88]], null, null, null, null, null, null], "duration": [-0.36, 5.01, 0.58, 1.43, 1.66, 1.11, 17.09, 3.24, 3.68, 0.25, 0.97, 0.94, 7.22, 0.67, 0.42, 3.92, 0.25, 1.66, -0.27, -0.21, 5.9, 1.62, 3.49, 1.55, 1.96, 0.01, 0.31]}
|
annotations_filtered/h3g5B5JhFcY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[50.0, 51.21], [66.0, 66.39], [68.0, 67.93], [79.0, 79.69], [83.0, 83.56], [85.0, 86.97], [89.0, 112.78], [113.0, 114.79], [116.0, 133.12], [133.0, 133.24]], "keep_status": [false, false, false, false, false, false, true, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.55, 0.0, 39.05, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["whale vocalization", 31.38], ["stomach rumble", 12.99], ["animal", 11.51]], null, [["hum", 40.7], ["music", 13.71], ["mains hum", 12.94]], null], "duration": [1.21, 0.39, -0.07, 0.69, 0.56, 1.97, 23.78, 1.79, 17.12, 0.24]}
|
annotations_filtered/h41ylpWhV1I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[24.0, 25.71], [33.0, 34.06], [37.0, 89.95], [95.0, 105.92]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 51.55], "audiomae_on_audioset": [null, null, null, null], "duration": [1.71, 1.06, 52.95, 10.92]}
|
annotations_filtered/h44egWnbrrg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.25], [9.0, 9.41], [10.0, 12.55], [14.0, 15.62], [16.0, 17.83], [22.0, 24.02], [26.0, 26.52], [35.0, 37.37], [39.0, 39.65], [41.0, 42.15], [44.0, 46.89], [50.0, 51.98], [55.0, 55.61], [57.0, 57.97], [59.0, 60.93], [64.0, 64.37]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 67.89, 0.0, 0.0, 58.89, 0.0, 50.76, 0.0, 0.0, 66.27, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.25, 0.41, 2.55, 1.62, 1.83, 2.02, 0.52, 2.37, 0.65, 1.15, 2.89, 1.98, 0.61, 0.97, 1.93, 0.37]}
|
annotations_filtered/h4SMndWj5To_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 18.87], [19.0, 20.41], [21.0, 60.44], [61.0, 73.11], [91.0, 91.77], [96.0, 111.2]], "keep_status": [false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 97.73, 0.0, 30.74], "audiomae_on_audioset": [null, null, null, null, null, [["music", 70.19], ["swing music", 7.36], ["brass instrument", 1.7]]], "duration": [0.87, 1.41, 39.44, 12.11, 0.77, 15.2]}
|
annotations_filtered/h4eOGlJpLYg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.3], [9.0, 10.77], [11.0, 11.74], [13.0, 18.79], [20.0, 22.17], [23.0, 23.45], [25.0, 25.03], [26.0, 26.45], [27.0, 27.41], [28.0, 28.34], [29.0, 28.81], [31.0, 31.4], [32.0, 33.83], [36.0, 36.98], [48.0, 49.35], [51.0, 52.3], [53.0, 53.11], [61.0, 61.21], [63.0, 63.76], [67.0, 68.18], [72.0, 77.72], [83.0, 89.83], [91.0, 93.99]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 58.47, 97.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 51.82, 38.84, 34.18], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 69.0], ["radio", 6.41], ["explosion", 1.86]], [["animal", 34.25], ["speech", 13.49], ["clip-clop", 13.09]]], "duration": [0.3, 1.77, 0.74, 5.79, 2.17, 0.45, 0.03, 0.45, 0.41, 0.34, -0.19, 0.4, 1.83, 0.98, 1.35, 1.3, 0.11, 0.21, 0.76, 1.18, 5.72, 6.83, 2.99]}
|
annotations_filtered/h4lbn5nDXwY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.85], [10.0, 11.69], [14.0, 14.91], [15.0, 16.04], [17.0, 17.32], [35.0, 46.18], [47.0, 47.09], [53.0, 54.45], [62.0, 63.64], [66.0, 67.07], [89.0, 90.44], [99.0, 100.16], [102.0, 103.69]], "keep_status": [false, false, false, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 31.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 57.01], ["whack, thwack", 3.63], ["mosquito", 2.7]], null, null, null, null, null, null, null], "duration": [0.85, 1.69, 0.91, 1.04, 0.32, 11.18, 0.09, 1.45, 1.64, 1.07, 1.44, 1.16, 1.69]}
|
annotations_filtered/h4u9pO-98ZM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.31], [6.0, 6.34], [15.0, 15.45], [17.0, 18.28], [19.0, 19.99], [24.0, 24.02], [39.0, 40.54], [43.0, 43.68], [51.0, 52.84], [54.0, 57.08]], "keep_status": [false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 43.66], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["music", 32.41], ["hum", 21.87], ["mains hum", 15.24]]], "duration": [1.31, 0.34, 0.45, 1.28, 0.99, 0.02, 1.54, 0.68, 1.84, 3.08]}
|
annotations_filtered/h52UYfkLhXg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.0], [1.0, 1.12], [5.0, 5.27], [7.0, 8.63], [19.0, 19.25], [22.0, 22.69], [37.0, 37.32], [45.0, 44.95], [65.0, 65.08], [67.0, 66.77], [69.0, 69.63], [70.0, 71.78], [76.0, 76.69], [78.0, 78.41], [81.0, 81.4], [86.0, 87.59], [102.0, 102.27], [112.0, 111.91], [116.0, 115.74], [117.0, 117.83], [125.0, 125.44]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.0, 0.12, 0.27, 1.63, 0.25, 0.69, 0.32, -0.05, 0.08, -0.23, 0.63, 1.78, 0.69, 0.41, 0.4, 1.59, 0.27, -0.09, -0.26, 0.83, 0.44]}
|
annotations_filtered/h55rTtbCy7o_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 2.91], [6.0, 6.3], [8.0, 8.82], [11.0, 11.65], [15.0, 16.06], [17.0, 17.02], [24.0, 23.99], [25.0, 25.78], [27.0, 28.56], [32.0, 32.36], [33.0, 34.38], [36.0, 52.25], [55.0, 56.02], [76.0, 77.19], [89.0, 89.56], [90.0, 90.95], [93.0, 92.99], [94.0, 95.93], [97.0, 97.41], [98.0, 98.41]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 86.82, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.91, 0.3, 0.82, 0.65, 1.06, 0.02, -0.01, 0.78, 1.56, 0.36, 1.38, 16.25, 1.02, 1.19, 0.56, 0.95, -0.01, 1.93, 0.41, 0.41]}
|
annotations_filtered/h5KBS20Ke6U_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[26.0, 28.02], [29.0, 30.08], [32.0, 32.81], [37.0, 37.81], [38.0, 39.55], [41.0, 41.81], [45.0, 48.39], [54.0, 55.61], [56.0, 59.1], [62.0, 62.68], [63.0, 63.54], [81.0, 81.45], [86.0, 87.03], [103.0, 103.06], [104.0, 104.08], [110.0, 110.35], [112.0, 112.28], [113.0, 113.24], [114.0, 123.11], [125.0, 139.97], [143.0, 142.89], [144.0, 145.52]], "keep_status": [false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, false, false, false, true, true, false, false], "silence_prob": [67.89, 0.0, 0.0, 0.0, 0.0, 0.0, 49.13, 0.0, 48.78, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.94, 44.12, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["sine wave", 29.16], ["chirp tone", 25.45], ["noise", 8.28]], null, [["sidetone", 30.77], ["speech", 22.99], ["whale vocalization", 9.98]], null, null, null, null, null, null, null, null, null, [["animal", 15.49], ["music", 14.86], ["buzz", 7.42]], [["speech", 10.61], ["crow", 9.88], ["animal", 8.94]], null, null], "duration": [2.02, 1.08, 0.81, 0.81, 1.55, 0.81, 3.39, 1.61, 3.1, 0.68, 0.54, 0.45, 1.03, 0.06, 0.08, 0.35, 0.28, 0.24, 9.11, 14.97, -0.11, 1.52]}
|
annotations_filtered/h5OjSHDUn8c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.5], [5.0, 12.92], [14.0, 15.26], [16.0, 16.53], [18.0, 18.84], [21.0, 21.37], [23.0, 24.75], [27.0, 27.92], [34.0, 37.57], [43.0, 44.46], [46.0, 47.14], [54.0, 55.97], [57.0, 58.5], [60.0, 59.93], [64.0, 63.8], [68.0, 68.67], [73.0, 72.87], [80.0, 81.03], [84.0, 84.7], [87.0, 89.01], [94.0, 95.72], [102.0, 108.89], [116.0, 115.79], [116.0, 115.96], [122.0, 122.69], [141.0, 143.24], [149.0, 150.23], [156.0, 157.27], [160.0, 161.64], [164.0, 165.11], [166.0, 168.12]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 36.54, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 98.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 37.11, 0.0, 98.99, 0.0, 0.0, 0.0, 50.41, 0.0, 0.0, 0.0, 0.0, 40.81], "audiomae_on_audioset": [null, [["applause", 90.09], ["clapping", 1.27], ["eruption", 1.24]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 20.73], ["hum", 14.9], ["theremin", 9.43]], null, null, null, null, null, null, null, null, null, null, [["music", 48.89], ["musical instrument", 6.53], ["sidetone", 4.56]]], "duration": [0.5, 7.92, 1.26, 0.53, 0.84, 0.37, 1.75, 0.92, 3.57, 1.46, 1.14, 1.97, 1.5, -0.07, -0.2, 0.67, -0.13, 1.03, 0.7, 2.01, 1.72, 6.89, -0.21, -0.04, 0.69, 2.24, 1.23, 1.27, 1.64, 1.11, 2.12]}
|
annotations_filtered/h5RMM02YE3U_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.26], [5.0, 7.1], [8.0, 11.42], [12.0, 17.73], [19.0, 43.63], [44.0, 52.47], [54.0, 85.08], [86.0, 87.27], [89.0, 90.41]], "keep_status": [false, false, false, true, true, true, false, false, false], "silence_prob": [0.0, 42.37, 53.04, 32.96, 36.65, 34.18, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 48.89], ["speech", 24.43], ["effects unit", 3.46]], null, [["livestock, farm animals, working animals", 16.51], ["speech", 14.39], ["cattle, bovinae", 10.34]], [["chime", 27.82], ["music", 26.15], ["wind chime", 10.29]], [["music", 30.51], ["brass instrument", 23.0], ["trombone", 10.72]], null, null, null], "duration": [1.26, 2.1, 3.42, 5.73, 24.63, 8.47, 31.08, 1.27, 1.41]}
|
annotations_filtered/h5UGcMYOaaU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 1.78], [6.0, 13.98], [19.0, 19.45], [20.0, 20.93], [22.0, 26.0], [30.0, 30.06], [31.0, 31.23], [33.0, 34.74], [60.0, 60.29], [61.0, 62.02], [77.0, 78.06], [94.0, 97.95], [104.0, 105.02], [108.0, 122.2], [123.0, 122.82], [126.0, 126.15], [127.0, 127.31]], "keep_status": [false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false], "silence_prob": [0.0, 33.28, 0.0, 0.0, 37.95, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.11, 0.0, 34.32, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 76.82], ["smash, crash", 4.99], ["breaking", 4.03]], null, null, [["speech", 27.94], ["music", 23.04], ["didgeridoo", 13.53]], null, null, null, null, null, null, [["snicker", 30.41], ["laughter", 26.11], ["speech", 11.65]], null, [["music", 52.07], ["hum", 13.93], ["throbbing", 13.05]], null, null, null], "duration": [-0.22, 7.98, 0.45, 0.93, 4.0, 0.06, 0.23, 1.74, 0.29, 1.02, 1.06, 3.95, 1.02, 14.2, -0.18, 0.15, 0.31]}
|
annotations_filtered/h5dCFGJp__0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.41], [7.0, 7.05], [8.0, 8.09], [9.0, 9.95], [12.0, 12.45], [13.0, 14.82], [16.0, 17.56], [18.0, 20.88], [22.0, 27.13], [31.0, 33.07], [37.0, 52.0], [52.0, 54.21], [55.0, 59.75], [63.0, 63.42], [68.0, 69.31], [72.0, 72.71], [74.0, 74.34], [77.0, 77.21], [78.0, 79.49], [80.0, 81.43], [82.0, 86.12], [87.0, 94.17], [97.0, 99.12], [100.0, 102.07], [103.0, 102.79], [111.0, 110.96], [112.0, 112.35], [114.0, 114.37], [115.0, 116.75], [117.0, 116.82], [117.0, 117.54]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 74.6, 37.75, 98.01, 99.99, 49.64, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 99.99, 32.59, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 76.12], ["sidetone", 6.68], ["hum", 4.22]], null, null, [["speech", 32.17], ["sine wave", 16.38], ["dial tone", 8.95]], null, null, null, null, null, null, null, null, null, null, null, [["speech", 63.9], ["fart", 12.51], ["inside, small room", 3.52]], null, null, null, null, null, null, null], "duration": [1.41, 0.05, 0.09, 0.95, 0.45, 1.82, 1.56, 2.88, 5.13, 2.07, 15.0, 2.21, 4.75, 0.42, 1.31, 0.71, 0.34, 0.21, 1.49, 1.43, 4.12, 7.17, 2.12, 2.07, -0.21, -0.04, 0.35, 0.37, 1.75, -0.18, 0.54]}
|
annotations_filtered/h5f5GgqVWes_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.3], [3.0, 3.15], [4.0, 4.94], [6.0, 6.42], [9.0, 9.58], [11.0, 11.11], [13.0, 13.85], [15.0, 17.17], [19.0, 18.71], [24.0, 24.0], [27.0, 27.18], [29.0, 29.54], [30.0, 31.87], [32.0, 33.47], [35.0, 35.4], [38.0, 39.39], [40.0, 42.74], [46.0, 47.09], [50.0, 50.25], [51.0, 53.47], [59.0, 65.5], [66.0, 72.37], [75.0, 75.88], [77.0, 85.23], [87.0, 87.79], [90.0, 91.22], [92.0, 93.87], [98.0, 101.68], [103.0, 104.23], [105.0, 106.49], [110.0, 110.51], [119.0, 118.69], [119.0, 126.37], [128.0, 129.27], [135.0, 152.95], [154.0, 154.47], [156.0, 157.42], [158.0, 159.97], [161.0, 163.21], [165.0, 166.77], [168.0, 168.88], [172.0, 181.6], [185.0, 184.87], [186.0, 187.29]], "keep_status": [false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 48.39, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.43, 0.0, 0.0, 35.45, 36.25, 35.84, 0.0, 40.73, 0.0, 0.0, 0.0, 50.97, 0.0, 0.0, 0.0, 0.0, 28.73, 0.0, 29.69, 0.0, 0.0, 0.0, 91.98, 0.0, 0.0, 42.06, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["music", 42.9], ["theremin", 8.67], ["didgeridoo", 4.23]], null, null, null, null, null, null, null, null, [["whale vocalization", 55.35], ["hum", 7.65], ["music", 7.25]], null, null, [["speech", 40.57], ["clip-clop", 7.25], ["horse", 6.04]], [["music", 35.22], ["burping, eructation", 13.38], ["hum", 8.14]], [["music", 38.14], ["speech", 26.11], ["sidetone", 6.84]], null, [["music", 61.67], ["speech", 8.9], ["throbbing", 7.37]], null, null, null, null, null, null, null, null, [["gong", 20.22], ["music", 19.82], ["speech", 18.24]], null, [["music", 46.78], ["breaking", 13.87], ["buzz", 4.32]], null, null, null, null, null, null, [["music", 28.27], ["hum", 22.99], ["mains hum", 10.87]], null, null], "duration": [0.3, 0.15, 0.94, 0.42, 0.58, 0.11, 0.85, 2.17, -0.29, 0.0, 0.18, 0.54, 1.87, 1.47, 0.4, 1.39, 2.74, 1.09, 0.25, 2.47, 6.5, 6.37, 0.88, 8.23, 0.79, 1.22, 1.87, 3.68, 1.23, 1.49, 0.51, -0.31, 7.37, 1.27, 17.95, 0.47, 1.42, 1.97, 2.21, 1.77, 0.88, 9.6, -0.13, 1.29]}
|
annotations_filtered/h5jZBcDev1s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.58], [6.0, 6.45], [7.0, 12.82], [14.0, 22.32], [28.0, 29.94], [40.0, 48.47], [51.0, 56.89], [58.0, 59.98], [61.0, 110.47], [113.0, 121.12], [123.0, 124.5], [126.0, 129.12], [130.0, 131.48]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 99.9, 99.97, 0.0, 90.6, 98.44, 0.0, 0.0, 72.01, 0.0, 69.61, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.58, 0.45, 5.82, 8.32, 1.94, 8.47, 5.89, 1.98, 49.47, 8.12, 1.5, 3.12, 1.48]}
|
annotations_filtered/h5nhyFFSweU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 9.54], [10.0, 11.08], [13.0, 13.26], [14.0, 15.28], [18.0, 19.58], [21.0, 22.05], [23.0, 23.08], [25.0, 25.89], [27.0, 27.72], [28.0, 29.22], [30.0, 30.99], [31.0, 32.88], [35.0, 35.02], [35.0, 36.91], [41.0, 41.91], [43.0, 43.06], [45.0, 46.31], [47.0, 48.03], [50.0, 50.85], [54.0, 54.85], [57.0, 57.69], [60.0, 60.37], [62.0, 68.82], [70.0, 71.0], [71.0, 73.74], [77.0, 81.77], [83.0, 84.75], [87.0, 88.1], [89.0, 89.95], [91.0, 98.85], [101.0, 101.39], [105.0, 105.81], [107.0, 108.19], [112.0, 112.23], [114.0, 115.99], [119.0, 120.26], [129.0, 130.27], [131.0, 132.68], [133.0, 134.15], [135.0, 135.68], [136.0, 137.69], [138.0, 139.55], [141.0, 141.35], [142.0, 143.21], [145.0, 146.26], [147.0, 147.34], [150.0, 150.75], [151.0, 151.88], [153.0, 154.5], [156.0, 160.07], [161.0, 174.48], [176.0, 177.09], [180.0, 180.57], [181.0, 201.92]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.35, 0.0, 67.89, 92.48, 0.0, 0.0, 0.0, 60.79, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 90.95, 71.43, 0.0, 0.0, 80.29], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["sidetone", 45.37], ["speech", 16.63], ["radio", 12.75]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.54, 1.08, 0.26, 1.28, 1.58, 1.05, 0.08, 0.89, 0.72, 1.22, 0.99, 1.88, 0.02, 1.91, 0.91, 0.06, 1.31, 1.03, 0.85, 0.85, 0.69, 0.37, 6.82, 1.0, 2.74, 4.77, 1.75, 1.1, 0.95, 7.85, 0.39, 0.81, 1.19, 0.23, 1.99, 1.26, 1.27, 1.68, 1.15, 0.68, 1.69, 1.55, 0.35, 1.21, 1.26, 0.34, 0.75, 0.88, 1.5, 4.07, 13.48, 1.09, 0.57, 20.92]}
|
annotations_filtered/h6iHbAju1cI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 10.59], [19.0, 20.63], [22.0, 35.29], [54.0, 54.48], [55.0, 57.0], [58.0, 58.38], [79.0, 82.22], [83.0, 85.13], [86.0, 86.63]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [34.4, 0.0, 30.86, 0.0, 99.97, 0.0, 30.75, 98.51, 0.0], "audiomae_on_audioset": [[["music", 39.17], ["speech", 34.89], ["sidetone", 4.09]], null, [["music", 71.91], ["throbbing", 7.47], ["smash, crash", 3.99]], null, null, null, [["cattle, bovinae", 33.1], ["moo", 22.97], ["livestock, farm animals, working animals", 22.9]], null, null], "duration": [3.59, 1.63, 13.29, 0.48, 2.0, 0.38, 3.22, 2.13, 0.63]}
|
annotations_filtered/h7CCnLwD2MY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[23.0, 22.69], [23.0, 28.8], [30.0, 30.74], [35.0, 35.58], [42.0, 43.28], [46.0, 46.77], [49.0, 49.01], [54.0, 54.24], [59.0, 61.18], [64.0, 64.61], [80.0, 82.0], [93.0, 94.02], [104.0, 105.24], [111.0, 111.64], [119.0, 119.48], [121.0, 122.89], [124.0, 125.32]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 37.88, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.28, 0.0, 95.78, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 71.36], ["speech", 6.46], ["sidetone", 4.72]], null, null, null, null, null, null, [["music", 37.04], ["hum", 16.07], ["speech", 11.15]], null, null, null, null, null, null, null, null], "duration": [-0.31, 5.8, 0.74, 0.58, 1.28, 0.77, 0.01, 0.24, 2.18, 0.61, 2.0, 1.02, 1.24, 0.64, 0.48, 1.89, 1.32]}
|
annotations_filtered/h7NG9ZEfyKo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/h7ZUKB_zYQ0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.54], [8.0, 13.34], [15.0, 14.91], [18.0, 21.66], [22.0, 24.22], [34.0, 36.78], [37.0, 38.1], [41.0, 43.48], [45.0, 45.79], [47.0, 49.42], [55.0, 57.59], [85.0, 85.95], [90.0, 93.23], [94.0, 98.39], [105.0, 106.1], [113.0, 115.2], [117.0, 118.35], [120.0, 120.45]], "keep_status": [false, true, false, false, false, false, false, false, false, true, true, false, true, false, false, true, false, false], "silence_prob": [0.0, 36.11, 0.0, 33.37, 62.68, 58.47, 0.0, 64.63, 0.0, 49.64, 35.47, 0.0, 40.52, 69.2, 0.0, 43.93, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 29.2], ["whale vocalization", 13.55], ["speech", 10.12]], null, [["music", 60.05], ["theremin", 12.25], ["synthesizer", 5.57]], null, null, null, null, null, [["music", 49.07], ["didgeridoo", 8.61], ["theremin", 7.96]], [["speech", 31.37], ["vehicle", 11.38], ["field recording", 7.32]], null, [["music", 45.23], ["foghorn", 9.39], ["theremin", 6.1]], null, null, [["speech", 36.54], ["hum", 11.3], ["telephone", 8.46]], null, null], "duration": [0.54, 5.34, -0.09, 3.66, 2.22, 2.78, 1.1, 2.48, 0.79, 2.42, 2.59, 0.95, 3.23, 4.39, 1.1, 2.2, 1.35, 0.45]}
|
annotations_filtered/h7wEE6Yx7IQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 7.6], [9.0, 55.9], [56.0, 59.49], [61.0, 62.06], [64.0, 65.74], [67.0, 67.41], [68.0, 69.55], [72.0, 86.73], [88.0, 89.5], [92.0, 93.73], [95.0, 96.92], [99.0, 107.91], [110.0, 111.21], [113.0, 114.34], [115.0, 117.9], [119.0, 127.2]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [99.98, 0.0, 68.93, 0.0, 0.0, 0.0, 0.0, 55.53, 0.0, 0.0, 0.0, 76.2, 0.0, 0.0, 41.4, 35.47], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 38.69], ["creak", 9.08], ["cattle, bovinae", 5.22]], [["speech", 78.65], ["whale vocalization", 4.36], ["didgeridoo", 2.3]]], "duration": [5.6, 46.9, 3.49, 1.06, 1.74, 0.41, 1.55, 14.73, 1.5, 1.73, 1.92, 8.91, 1.21, 1.34, 2.9, 8.2]}
|
annotations_filtered/h8E3sSTc11E_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[75.0, 78.78], [80.0, 91.84]], "keep_status": [false, false], "silence_prob": [80.11, 47.62], "audiomae_on_audioset": [null, [["music", 63.35], ["musical instrument", 6.06], ["synthesizer", 5.07]]], "duration": [3.78, 11.84]}
|
annotations_filtered/h8Rxb-9snJQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.87], [6.0, 8.48], [9.0, 10.13], [12.0, 13.86], [15.0, 19.57], [21.0, 22.74], [24.0, 26.43], [28.0, 28.75], [30.0, 31.72], [33.0, 33.37], [34.0, 36.15], [37.0, 38.65], [39.0, 40.69], [42.0, 43.26], [47.0, 47.12], [49.0, 49.5], [52.0, 52.98], [54.0, 55.36], [56.0, 56.61], [58.0, 58.45], [60.0, 60.03], [62.0, 61.75], [64.0, 66.21], [70.0, 72.17], [74.0, 74.01], [74.0, 74.34], [78.0, 82.68], [84.0, 87.94], [93.0, 94.36], [95.0, 96.89], [99.0, 99.93], [101.0, 101.65], [102.0, 105.97], [113.0, 115.5], [119.0, 122.64], [124.0, 125.69], [127.0, 128.63]], "keep_status": [true, false, false, false, true, false, true, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, true, false, false, false, false, true, true, true, false, false], "silence_prob": [34.1, 33.0, 0.0, 0.0, 38.23, 0.0, 37.02, 0.0, 0.0, 0.0, 35.37, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.72, 31.84, 0.0, 0.0, 35.68, 36.89, 0.0, 0.0, 0.0, 0.0, 33.12, 41.64, 36.11, 0.0, 0.0], "audiomae_on_audioset": [[["throbbing", 32.54], ["music", 15.58], ["hum", 9.35]], [["speech", 65.18], ["music", 12.02], ["throbbing", 7.75]], null, null, [["fly, housefly", 23.16], ["bee, wasp, etc.", 18.88], ["throbbing", 14.45]], null, [["throbbing", 24.22], ["hum", 23.67], ["music", 20.48]], null, null, null, [["speech", 14.62], ["music", 11.39], ["fly, housefly", 8.61]], null, null, null, null, null, null, null, null, null, null, null, [["music", 46.45], ["hum", 9.7], ["mains hum", 9.21]], [["fly, housefly", 53.61], ["mosquito", 16.98], ["insect", 16.62]], null, null, [["hum", 26.72], ["throbbing", 21.24], ["music", 13.32]], [["music", 33.06], ["didgeridoo", 14.58], ["throbbing", 8.44]], null, null, null, null, [["speech", 38.21], ["hum", 12.64], ["bee, wasp, etc.", 9.93]], [["fly, housefly", 23.38], ["bee, wasp, etc.", 18.43], ["insect", 12.3]], [["throbbing", 20.62], ["hum", 11.72], ["music", 7.99]], null, null], "duration": [2.87, 2.48, 1.13, 1.86, 4.57, 1.74, 2.43, 0.75, 1.72, 0.37, 2.15, 1.65, 1.69, 1.26, 0.12, 0.5, 0.98, 1.36, 0.61, 0.45, 0.03, -0.25, 2.21, 2.17, 0.01, 0.34, 4.68, 3.94, 1.36, 1.89, 0.93, 0.65, 3.97, 2.5, 3.64, 1.69, 1.63]}
|
annotations_filtered/h8c0Q6aqZG8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.64], [10.0, 10.29], [13.0, 14.74], [15.0, 15.74], [19.0, 19.47], [21.0, 22.0], [23.0, 23.8], [26.0, 26.81], [28.0, 29.17], [30.0, 31.09], [32.0, 31.77], [33.0, 33.83], [34.0, 35.11], [36.0, 37.71]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.64, 0.29, 1.74, 0.74, 0.47, 1.0, 0.8, 0.81, 1.17, 1.09, -0.23, 0.83, 1.11, 1.71]}
|
annotations_filtered/h8m69o_1PoQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 12.58], [13.0, 15.45], [17.0, 27.3], [29.0, 35.83], [37.0, 37.83], [39.0, 39.53], [41.0, 44.73], [45.0, 53.25], [69.0, 72.72], [74.0, 85.6], [86.0, 86.26], [89.0, 97.8], [100.0, 101.11], [103.0, 105.63], [111.0, 113.44], [114.0, 117.8], [119.0, 120.55], [126.0, 125.83]], "keep_status": [false, true, false, true, false, false, true, true, false, false, false, false, false, false, false, false, false, false], "silence_prob": [31.51, 31.88, 32.91, 36.79, 0.0, 0.0, 38.36, 49.78, 61.67, 59.77, 0.0, 73.51, 0.0, 61.18, 64.29, 66.15, 0.0, 0.0], "audiomae_on_audioset": [[["music", 50.88], ["mains hum", 11.68], ["hum", 9.9]], [["music", 37.75], ["mains hum", 18.77], ["hum", 12.07]], [["music", 67.77], ["speech", 12.72], ["hum", 2.04]], [["music", 54.03], ["speech", 6.09], ["hum", 5.06]], null, null, [["music", 52.02], ["foghorn", 6.22], ["theremin", 5.75]], [["music", 23.49], ["speech", 17.16], ["didgeridoo", 16.32]], null, null, null, null, null, null, null, null, null, null], "duration": [8.58, 2.45, 10.3, 6.83, 0.83, 0.53, 3.73, 8.25, 3.72, 11.6, 0.26, 8.8, 1.11, 2.63, 2.44, 3.8, 1.55, -0.17]}
|
annotations_filtered/h8wzJimC5Zc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 14.37], [19.0, 55.78], [57.0, 59.48], [62.0, 75.64], [77.0, 78.54], [80.0, 81.7], [83.0, 84.99]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 57.4, 37.95, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 43.5], ["hum", 28.14], ["mains hum", 13.88]], null, null, null], "duration": [1.37, 36.78, 2.48, 13.64, 1.54, 1.7, 1.99]}
|
annotations_filtered/h9GHe5K0kOI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.32], [8.0, 7.89], [16.0, 16.6], [17.0, 17.64], [26.0, 29.49], [34.0, 34.92], [36.0, 36.32], [37.0, 37.47], [49.0, 50.36], [68.0, 70.77], [84.0, 83.79]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 85.54, 0.0, 0.0, 0.0, 0.0, 61.57, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [0.32, -0.11, 0.6, 0.64, 3.49, 0.92, 0.32, 0.47, 1.36, 2.77, -0.21]}
|
annotations_filtered/h9Rb7mT3juI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 5.43], [8.0, 8.26], [11.0, 27.08], [28.0, 28.68], [30.0, 62.24], [65.0, 93.73], [97.0, 124.06], [125.0, 136.38], [137.0, 173.43], [174.0, 173.96]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [38.05, 0.0, 34.77, 0.0, 0.0, 35.11, 52.33, 38.58, 0.0, 0.0], "audiomae_on_audioset": [[["music", 65.27], ["theremin", 9.11], ["didgeridoo", 3.11]], null, [["livestock, farm animals, working animals", 46.24], ["cattle, bovinae", 23.24], ["moo", 15.97]], null, null, [["music", 66.27], ["didgeridoo", 5.39], ["throbbing", 3.77]], null, [["mains hum", 49.95], ["hum", 31.62], ["buzz", 5.56]], null, null], "duration": [3.43, 0.26, 16.08, 0.68, 32.24, 28.73, 27.06, 11.38, 36.43, -0.04]}
|
annotations_filtered/h9WDm1k4Hz4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 25.57], [28.0, 29.76], [31.0, 32.9], [35.0, 36.9], [39.0, 45.25], [46.0, 51.95], [52.0, 56.61], [58.0, 60.76], [62.0, 64.94], [66.0, 66.11], [67.0, 72.67], [73.0, 77.97], [79.0, 82.85], [84.0, 87.3], [90.0, 93.04], [95.0, 97.11], [99.0, 100.89], [102.0, 111.0], [112.0, 120.12], [121.0, 122.32], [123.0, 124.93], [126.0, 127.67], [129.0, 129.71], [132.0, 134.42], [136.0, 141.72], [143.0, 164.17]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [78.89, 0.0, 0.0, 0.0, 99.99, 99.84, 55.04, 100.0, 100.0, 0.0, 99.98, 100.0, 98.66, 65.91, 55.74, 100.0, 0.0, 100.0, 99.92, 0.0, 0.0, 0.0, 0.0, 100.0, 81.53, 92.8], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [17.57, 1.76, 1.9, 1.9, 6.25, 5.95, 4.61, 2.76, 2.94, 0.11, 5.67, 4.97, 3.85, 3.3, 3.04, 2.11, 1.89, 9.0, 8.12, 1.32, 1.93, 1.67, 0.71, 2.42, 5.72, 21.17]}
|
annotations_filtered/h9jsnAD4aNw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 11.08], [12.0, 12.95], [23.0, 27.85], [30.0, 31.89], [32.0, 32.46], [41.0, 41.84], [49.0, 48.91], [54.0, 54.01], [58.0, 59.68], [62.0, 68.12], [69.0, 72.3], [73.0, 73.75], [75.0, 75.42], [87.0, 88.62], [96.0, 96.01], [102.0, 102.07], [103.0, 103.2], [104.0, 104.36], [111.0, 110.98]], "keep_status": [true, false, true, false, false, false, false, false, false, true, true, false, false, false, false, false, false, false, false], "silence_prob": [30.5, 0.0, 34.39, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.0, 34.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 21.96], ["throbbing", 13.23], ["electronic music", 7.4]], null, [["cattle, bovinae", 24.37], ["moo", 21.76], ["livestock, farm animals, working animals", 14.97]], null, null, null, null, null, null, [["throbbing", 10.51], ["music", 9.35], ["didgeridoo", 7.24]], [["fly, housefly", 26.36], ["cattle, bovinae", 11.46], ["insect", 10.06]], null, null, null, null, null, null, null, null], "duration": [8.08, 0.95, 4.85, 1.89, 0.46, 0.84, -0.09, 0.01, 1.68, 6.12, 3.3, 0.75, 0.42, 1.62, 0.01, 0.07, 0.2, 0.36, -0.02]}
|
annotations_filtered/hA063IaOHyQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.75], [5.0, 7.69], [10.0, 10.52], [15.0, 14.99], [29.0, 29.93], [33.0, 38.48], [49.0, 49.4], [50.0, 50.48], [59.0, 62.31], [67.0, 68.34], [77.0, 81.92], [86.0, 87.39], [94.0, 94.41], [94.0, 96.01], [100.0, 100.55], [105.0, 105.14], [110.0, 113.39], [121.0, 121.29], [129.0, 129.64], [133.0, 134.1]], "keep_status": [false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 60.14, 0.0, 0.0, 0.0, 39.25, 0.0, 0.0, 99.65, 0.0, 90.78, 0.0, 0.0, 70.86, 0.0, 0.0, 54.7, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 45.17], ["timpani", 8.42], ["foghorn", 7.26]], null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.75, 2.69, 0.52, -0.01, 0.93, 5.48, 0.4, 0.48, 3.31, 1.34, 4.92, 1.39, 0.41, 2.01, 0.55, 0.14, 3.39, 0.29, 0.64, 1.1]}
|
annotations_filtered/hA0OlCQLC0Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[26.0, 75.3]], "keep_status": [false], "silence_prob": [0.0], "audiomae_on_audioset": [null], "duration": [49.3]}
|
annotations_filtered/hA1BikUzBKc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 4.87], [11.0, 10.61], [13.0, 13.02], [19.0, 19.4], [21.0, 21.14], [23.0, 23.09], [25.0, 26.22], [32.0, 32.34], [38.0, 38.91], [40.0, 40.0], [45.0, 60.69], [65.0, 73.6], [80.0, 98.81], [101.0, 105.93], [109.0, 109.8], [111.0, 112.43], [113.0, 114.45]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, true, true, true, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.65, 34.86, 33.82, 33.38, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["music", 25.43], ["speech", 9.35], ["groan", 8.66]], [["livestock, farm animals, working animals", 23.73], ["moo", 15.15], ["music", 14.69]], [["speech", 25.57], ["music", 20.57], ["didgeridoo", 6.75]], [["speech", 39.81], ["fart", 10.36], ["music", 6.69]], null, null, null], "duration": [-0.13, -0.39, 0.02, 0.4, 0.14, 0.09, 1.22, 0.34, 0.91, 0.0, 15.69, 8.6, 18.81, 4.93, 0.8, 1.43, 1.45]}
|
annotations_filtered/hAQ2xTr4U64_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.43], [5.0, 5.37], [7.0, 8.65], [14.0, 14.07], [15.0, 16.39], [18.0, 17.85], [19.0, 33.17], [37.0, 37.2], [39.0, 44.83], [47.0, 48.02], [50.0, 52.71], [61.0, 62.99], [64.0, 64.39], [65.0, 68.25], [69.0, 70.71], [73.0, 73.84], [75.0, 75.36], [78.0, 79.39], [82.0, 82.56], [85.0, 88.6], [90.0, 90.0], [90.0, 90.24], [92.0, 92.67], [95.0, 95.52], [97.0, 98.22], [99.0, 100.13], [101.0, 102.51], [103.0, 105.36], [107.0, 108.38], [109.0, 114.45], [118.0, 127.82], [128.0, 130.57], [131.0, 133.95], [137.0, 138.82], [139.0, 146.77], [149.0, 149.2], [152.0, 153.37], [158.0, 160.96], [162.0, 163.17], [169.0, 169.7]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 82.79, 0.0, 98.86, 0.0, 100.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 52.92, 52.39, 73.67, 54.1, 0.0, 98.51, 0.0, 0.0, 99.85, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.43, 0.37, 1.65, 0.07, 1.39, -0.15, 14.17, 0.2, 5.83, 1.02, 2.71, 1.99, 0.39, 3.25, 1.71, 0.84, 0.36, 1.39, 0.56, 3.6, 0.0, 0.24, 0.67, 0.52, 1.22, 1.13, 1.51, 2.36, 1.38, 5.45, 9.82, 2.57, 2.95, 1.82, 7.77, 0.2, 1.37, 2.96, 1.17, 0.7]}
|
annotations_filtered/hAU8AQ6xlw8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 6.66], [11.0, 11.87], [13.0, 14.15], [15.0, 15.43], [18.0, 18.33], [19.0, 21.44], [22.0, 23.04], [31.0, 30.92], [32.0, 34.53], [35.0, 36.05], [37.0, 39.83], [47.0, 48.42], [55.0, 55.17], [60.0, 60.49], [65.0, 67.49], [70.0, 70.93], [76.0, 76.35], [107.0, 108.06], [114.0, 115.89], [117.0, 118.15], [121.0, 121.44], [123.0, 123.43], [124.0, 123.84], [140.0, 140.34], [144.0, 145.99], [148.0, 148.51], [149.0, 148.59], [149.0, 149.13], [151.0, 151.26], [154.0, 154.85], [158.0, 158.19]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [96.29, 0.0, 0.0, 0.0, 0.0, 60.51, 0.0, 0.0, 100.0, 0.0, 99.99, 0.0, 0.0, 0.0, 99.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.66, 0.87, 1.15, 0.43, 0.33, 2.44, 1.04, -0.08, 2.53, 1.05, 2.83, 1.42, 0.17, 0.49, 2.49, 0.93, 0.35, 1.06, 1.89, 1.15, 0.44, 0.43, -0.16, 0.34, 1.99, 0.51, -0.41, 0.13, 0.26, 0.85, 0.19]}
|
annotations_filtered/hAUbdHw8QG4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 10.76], [17.0, 20.88], [24.0, 24.27], [30.0, 37.81], [41.0, 44.74], [46.0, 48.84], [53.0, 54.16], [55.0, 73.14], [80.0, 82.8], [86.0, 98.83], [102.0, 104.38], [106.0, 108.18]], "keep_status": [false, false, false, false, true, false, false, false, false, true, true, false], "silence_prob": [0.0, 30.22, 0.0, 32.19, 32.26, 32.0, 0.0, 30.65, 30.38, 29.89, 33.37, 61.97], "audiomae_on_audioset": [null, [["speech", 35.04], ["music", 28.96], ["hum", 6.16]], null, [["music", 57.02], ["throbbing", 18.65], ["hum", 4.61]], [["music", 53.22], ["throbbing", 6.54], ["synthesizer", 5.15]], [["music", 61.36], ["speech", 7.9], ["synthesizer", 6.19]], null, [["music", 71.79], ["throbbing", 7.61], ["vehicle", 2.77]], [["music", 60.98], ["throbbing", 16.6], ["hum", 6.98]], [["music", 36.87], ["speech", 28.26], ["vehicle", 3.8]], [["music", 34.06], ["vehicle", 9.81], ["car", 7.26]], null], "duration": [0.76, 3.88, 0.27, 7.81, 3.74, 2.84, 1.16, 18.14, 2.8, 12.83, 2.38, 2.18]}
|
annotations_filtered/hAbVFxYi_q0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.79], [9.0, 14.13], [27.0, 27.28], [49.0, 49.0], [59.0, 59.76], [61.0, 61.35], [63.0, 63.27], [65.0, 65.99], [68.0, 68.13], [79.0, 79.02], [80.0, 80.11], [83.0, 84.37], [85.0, 85.14], [96.0, 96.99], [108.0, 108.21], [116.0, 118.18], [120.0, 120.97], [121.0, 124.66], [127.0, 127.82], [132.0, 132.6], [143.0, 143.4], [144.0, 145.1], [147.0, 146.67], [158.0, 159.76], [162.0, 162.41], [167.0, 167.46], [172.0, 172.05], [173.0, 173.18], [177.0, 179.83], [183.0, 185.23]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 37.88, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 94.37, 0.0, 82.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0, 92.8], "audiomae_on_audioset": [null, [["speech", 69.16], ["tap", 5.81], ["music", 4.97]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.79, 5.13, 0.28, 0.0, 0.76, 0.35, 0.27, 0.99, 0.13, 0.02, 0.11, 1.37, 0.14, 0.99, 0.21, 2.18, 0.97, 3.66, 0.82, 0.6, 0.4, 1.1, -0.33, 1.76, 0.41, 0.46, 0.05, 0.18, 2.83, 2.23]}
|