Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/l-GbvgBXi18_filtered.json +1 -0
- annotations_filtered/l-PmluGC2wk_filtered.json +1 -0
- annotations_filtered/l-uqZJwOieA_filtered.json +1 -0
- annotations_filtered/l-vk0fgFGd4_filtered.json +1 -0
- annotations_filtered/l081UdHizvg_filtered.json +1 -0
- annotations_filtered/l0Io_aXWgkQ_filtered.json +1 -0
- annotations_filtered/l0RkrxY9mH8_filtered.json +1 -0
- annotations_filtered/l0VX6h8lP08_filtered.json +1 -0
- annotations_filtered/l0zmCUVB0Yw_filtered.json +1 -0
- annotations_filtered/l17e0M4TTBA_filtered.json +1 -0
- annotations_filtered/l1B1_jQnlFk_filtered.json +1 -0
- annotations_filtered/l1NB8NQc7wU_filtered.json +1 -0
- annotations_filtered/l1OgTkhFJn8_filtered.json +1 -0
- annotations_filtered/l1SZ4ccagFQ_filtered.json +1 -0
- annotations_filtered/l1jCg_FmQmQ_filtered.json +1 -0
- annotations_filtered/l24yOwR9saU_filtered.json +1 -0
- annotations_filtered/l2IJxv1lbAc_filtered.json +1 -0
- annotations_filtered/l2K4Fw-pmLw_filtered.json +1 -0
- annotations_filtered/l2g7v4DYYik_filtered.json +1 -0
- annotations_filtered/l2zrJ_LZrhg_filtered.json +1 -0
- annotations_filtered/l38Qliee6VE_filtered.json +1 -0
- annotations_filtered/l3t1ZSuwLzg_filtered.json +1 -0
- annotations_filtered/l46yjkR0SqU_filtered.json +1 -0
- annotations_filtered/l4AmSVb6Hew_filtered.json +1 -0
- annotations_filtered/l4L9Yi-lXbo_filtered.json +1 -0
- annotations_filtered/l4S4IBACQCM_filtered.json +1 -0
- annotations_filtered/l53Q1UXk2DE_filtered.json +1 -0
- annotations_filtered/l59t24vh3QI_filtered.json +1 -0
- annotations_filtered/l5s3_XV1rkA_filtered.json +1 -0
- annotations_filtered/l65KNW2ZGV8_filtered.json +1 -0
- annotations_filtered/l6NIVn6_m1c_filtered.json +1 -0
- annotations_filtered/l6StIaMaRsg_filtered.json +1 -0
- annotations_filtered/l6TGERgrXmA_filtered.json +1 -0
- annotations_filtered/l6cFM5Ubilw_filtered.json +1 -0
- annotations_filtered/l6e1M2d4BJ0_filtered.json +1 -0
- annotations_filtered/l6uaxfye2Ig_filtered.json +1 -0
- annotations_filtered/l6zm1uCb30w_filtered.json +1 -0
- annotations_filtered/l7FkN4ooYvA_filtered.json +1 -0
- annotations_filtered/l83CcqhP-kY_filtered.json +1 -0
- annotations_filtered/l8MFxT9ILKY_filtered.json +1 -0
- annotations_filtered/l8aozWddbPA_filtered.json +1 -0
- annotations_filtered/l94geYuwNJg_filtered.json +1 -0
- annotations_filtered/l97NtEMUx0M_filtered.json +1 -0
- annotations_filtered/l9LOKUiY0Dg_filtered.json +1 -0
- annotations_filtered/l9c1k_m6POA_filtered.json +1 -0
- annotations_filtered/l9k9_K8Tea0_filtered.json +1 -0
- annotations_filtered/lAIJ6Twk8aQ_filtered.json +1 -0
- annotations_filtered/lAcZxn1DeHs_filtered.json +1 -0
- annotations_filtered/lAgPsmTxBfc_filtered.json +1 -0
- annotations_filtered/lAhQbCN-Zvg_filtered.json +1 -0
annotations_filtered/l-GbvgBXi18_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.6], [10.0, 13.85], [16.0, 16.63], [18.0, 23.73], [27.0, 26.89], [27.0, 28.32], [29.0, 30.75], [34.0, 37.71], [41.0, 41.62], [44.0, 46.57], [70.0, 71.1], [72.0, 76.99], [78.0, 87.88], [96.0, 96.42], [97.0, 97.51], [98.0, 99.82], [108.0, 133.25], [140.0, 145.94]], "keep_status": [false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 55.04, 0.0, 35.0, 0.0, 0.0, 0.0, 42.44, 0.0, 99.92, 0.0, 87.55, 60.05, 0.0, 0.0, 0.0, 37.59, 99.65], "audiomae_on_audioset": [null, null, null, [["speech", 64.85], ["whack, thwack", 5.66], ["arrow", 2.1]], null, null, null, [["hum", 36.27], ["whale vocalization", 16.44], ["throbbing", 11.94]], null, null, null, null, null, null, null, null, [["whale vocalization", 81.95], ["rumble", 5.63], ["stomach rumble", 2.14]], null], "duration": [0.6, 3.85, 0.63, 5.73, -0.11, 1.32, 1.75, 3.71, 0.62, 2.57, 1.1, 4.99, 9.88, 0.42, 0.51, 1.82, 25.25, 5.94]}
|
annotations_filtered/l-PmluGC2wk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.06], [5.0, 5.76], [8.0, 9.76], [13.0, 13.46], [20.0, 20.95], [22.0, 22.52], [24.0, 25.05], [26.0, 27.09], [29.0, 28.83], [31.0, 32.8], [34.0, 35.5], [37.0, 37.56], [38.0, 39.06], [41.0, 41.47], [42.0, 42.33], [43.0, 43.87], [46.0, 46.23], [48.0, 49.47], [50.0, 50.63]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [52.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.06, 0.76, 1.76, 0.46, 0.95, 0.52, 1.05, 1.09, -0.17, 1.8, 1.5, 0.56, 1.06, 0.47, 0.33, 0.87, 0.23, 1.47, 0.63]}
|
annotations_filtered/l-uqZJwOieA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/l-vk0fgFGd4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 15.9], [21.0, 22.49], [25.0, 25.15], [26.0, 31.36], [35.0, 35.58], [38.0, 37.84], [39.0, 39.19], [42.0, 42.72], [47.0, 47.24], [50.0, 49.87], [52.0, 54.36], [57.0, 57.97], [58.0, 58.78], [62.0, 62.73], [65.0, 64.74], [66.0, 66.58], [72.0, 73.52], [75.0, 76.74], [80.0, 79.95], [81.0, 81.48], [87.0, 87.66]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.26, 0.0, 0.0, 98.93, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [6.9, 1.49, 0.15, 5.36, 0.58, -0.16, 0.19, 0.72, 0.24, -0.13, 2.36, 0.97, 0.78, 0.73, -0.26, 0.58, 1.52, 1.74, -0.05, 0.48, 0.66]}
|
annotations_filtered/l081UdHizvg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 8.7], [10.0, 13.47], [15.0, 15.4], [20.0, 20.39], [23.0, 23.77], [25.0, 25.22], [27.0, 27.99], [29.0, 30.05], [31.0, 31.7], [35.0, 35.82], [39.0, 38.99], [51.0, 57.72], [62.0, 62.19], [64.0, 64.56], [65.0, 66.09], [69.0, 71.64], [72.0, 73.57], [74.0, 75.15], [76.0, 76.86], [79.0, 80.76], [86.0, 88.43], [89.0, 89.5], [91.0, 91.49], [100.0, 101.8], [103.0, 102.95], [104.0, 105.81], [106.0, 107.18], [108.0, 108.8], [110.0, 112.02], [113.0, 114.02], [117.0, 116.99], [122.0, 122.67], [125.0, 125.54], [129.0, 129.83], [133.0, 133.03], [135.0, 154.74], [173.0, 174.51]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 46.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.81, 0.0, 0.0, 0.0, 84.25, 0.0, 0.0, 0.0, 0.0, 75.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 87.55, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 37.47, 0.0], "audiomae_on_audioset": [null, [["music", 24.02], ["speech", 22.13], ["animal", 4.54]], null, null, null, null, null, null, null, null, null, [["hum", 22.94], ["livestock, farm animals, working animals", 12.7], ["cattle, bovinae", 11.34]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 67.94], ["foghorn", 7.79], ["musical instrument", 4.8]], null], "duration": [1.7, 3.47, 0.4, 0.39, 0.77, 0.22, 0.99, 1.05, 0.7, 0.82, -0.01, 6.72, 0.19, 0.56, 1.09, 2.64, 1.57, 1.15, 0.86, 1.76, 2.43, 0.5, 0.49, 1.8, -0.05, 1.81, 1.18, 0.8, 2.02, 1.02, -0.01, 0.67, 0.54, 0.83, 0.03, 19.74, 1.51]}
|
annotations_filtered/l0Io_aXWgkQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 1.17], [18.0, 18.0], [21.0, 33.61], [38.0, 48.63], [55.0, 55.66], [57.0, 79.15], [81.0, 81.5], [86.0, 86.48], [94.0, 94.09], [94.0, 95.08], [101.0, 101.01], [106.0, 106.61], [111.0, 111.16], [114.0, 113.95], [120.0, 120.38]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 29.94, 29.26, 0.0, 33.28, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["cattle, bovinae", 27.52], ["livestock, farm animals, working animals", 23.66], ["moo", 22.53]], [["cattle, bovinae", 40.19], ["livestock, farm animals, working animals", 37.6], ["moo", 19.68]], null, [["livestock, farm animals, working animals", 33.52], ["cattle, bovinae", 29.27], ["moo", 24.99]], null, null, null, null, null, null, null, null, null], "duration": [1.17, 0.0, 12.61, 10.63, 0.66, 22.15, 0.5, 0.48, 0.09, 1.08, 0.01, 0.61, 0.16, -0.05, 0.38]}
|
annotations_filtered/l0RkrxY9mH8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[17.0, 29.73], [31.0, 48.68], [49.0, 52.52], [53.0, 55.11], [57.0, 59.27], [61.0, 63.31], [65.0, 66.95], [69.0, 75.57], [77.0, 78.24], [82.0, 82.68], [85.0, 87.67], [89.0, 91.23], [93.0, 95.13], [96.0, 98.31], [99.0, 108.03], [112.0, 112.95], [114.0, 116.21], [119.0, 119.55], [121.0, 127.4]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.99, 100.0, 98.19, 98.59, 99.95, 100.0, 0.0, 100.0, 0.0, 0.0, 100.0, 100.0, 100.0, 100.0, 100.0, 0.0, 100.0, 0.0, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [12.73, 17.68, 3.52, 2.11, 2.27, 2.31, 1.95, 6.57, 1.24, 0.68, 2.67, 2.23, 2.13, 2.31, 9.03, 0.95, 2.21, 0.55, 6.4]}
|
annotations_filtered/l0VX6h8lP08_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 43.92], [45.0, 45.76], [47.0, 47.83], [72.0, 84.45], [88.0, 89.56], [92.0, 92.99], [93.0, 94.2], [102.0, 104.35], [109.0, 111.4], [114.0, 114.89], [115.0, 118.99], [122.0, 122.54]], "keep_status": [false, false, false, false, false, false, false, false, true, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 33.22, 0.0, 0.0, 0.0, 38.31, 39.94, 0.0, 37.15, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 51.78], ["speech", 16.91], ["throbbing", 2.96]], null, null, null, [["music", 61.89], ["didgeridoo", 5.62], ["musical instrument", 3.64]], [["frog", 33.3], ["music", 16.99], ["croak", 10.56]], null, [["music", 28.84], ["speech", 8.34], ["hum", 6.57]], null], "duration": [42.92, 0.76, 0.83, 12.45, 1.56, 0.99, 1.2, 2.35, 2.4, 0.89, 3.99, 0.54]}
|
annotations_filtered/l0zmCUVB0Yw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.4], [1.0, 5.49], [12.0, 12.66], [26.0, 25.68], [27.0, 27.46], [41.0, 42.52], [54.0, 53.82], [66.0, 66.01], [72.0, 72.57], [86.0, 86.7], [91.0, 91.45], [93.0, 94.9], [97.0, 97.38], [98.0, 99.45], [100.0, 100.8], [116.0, 116.51], [120.0, 120.8], [122.0, 122.44], [124.0, 124.61], [128.0, 127.89], [132.0, 132.21], [143.0, 143.43], [144.0, 143.95], [145.0, 144.76], [145.0, 145.66], [152.0, 161.81], [172.0, 172.57], [174.0, 174.8]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 32.31, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 49.18, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 49.72], ["musical instrument", 5.08], ["speech", 3.57]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 34.24], ["synthesizer", 16.42], ["speech", 15.84]], null, null], "duration": [0.4, 4.49, 0.66, -0.32, 0.46, 1.52, -0.18, 0.01, 0.57, 0.7, 0.45, 1.9, 0.38, 1.45, 0.8, 0.51, 0.8, 0.44, 0.61, -0.11, 0.21, 0.43, -0.05, -0.24, 0.66, 9.81, 0.57, 0.8]}
|
annotations_filtered/l17e0M4TTBA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.29], [17.0, 17.44], [19.0, 20.68], [22.0, 22.79], [24.0, 28.0], [30.0, 32.7], [33.0, 36.27], [38.0, 43.34], [44.0, 45.54], [50.0, 51.41], [52.0, 54.48], [57.0, 57.3], [69.0, 70.6], [71.0, 88.65], [91.0, 95.96], [99.0, 104.23], [109.0, 111.72], [116.0, 117.9], [121.0, 122.81], [124.0, 125.85], [130.0, 133.51], [134.0, 135.68], [137.0, 138.23], [144.0, 147.04], [150.0, 151.31], [153.0, 158.25]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 96.54, 53.59, 97.92, 75.88, 0.0, 0.0, 72.01, 0.0, 0.0, 59.15, 88.46, 99.21, 54.97, 0.0, 0.0, 0.0, 83.34, 0.0, 0.0, 59.33, 0.0, 99.65], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.29, 0.44, 1.68, 0.79, 4.0, 2.7, 3.27, 5.34, 1.54, 1.41, 2.48, 0.3, 1.6, 17.65, 4.96, 5.23, 2.72, 1.9, 1.81, 1.85, 3.51, 1.68, 1.23, 3.04, 1.31, 5.25]}
|
annotations_filtered/l1B1_jQnlFk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 5.19], [8.0, 9.71], [11.0, 11.37], [13.0, 14.2], [22.0, 24.14], [25.0, 26.86], [27.0, 30.01], [31.0, 31.7], [34.0, 34.26], [47.0, 49.72], [50.0, 51.19], [52.0, 52.76], [54.0, 56.46], [59.0, 59.64], [70.0, 98.29], [101.0, 103.42], [105.0, 107.35], [112.0, 111.94], [114.0, 115.35], [116.0, 116.75], [119.0, 120.65], [122.0, 122.66], [124.0, 125.31], [126.0, 126.25]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 72.16, 0.0, 76.37, 0.0, 0.0, 30.91, 0.0, 0.0, 99.76, 0.0, 49.82, 49.18, 98.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["speech", 54.3], ["beatboxing", 13.17], ["fart", 7.43]], null, null, null, null, [["speech", 48.32], ["frog", 12.16], ["boing", 12.08]], [["speech", 77.95], ["boing", 2.87], ["laughter", 2.56]], null, null, null, null, null, null, null, null], "duration": [1.19, 1.71, 0.37, 1.2, 2.14, 1.86, 3.01, 0.7, 0.26, 2.72, 1.19, 0.76, 2.46, 0.64, 28.29, 2.42, 2.35, -0.06, 1.35, 0.75, 1.65, 0.66, 1.31, 0.25]}
|
annotations_filtered/l1NB8NQc7wU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[39.0, 69.7], [73.0, 73.55]], "keep_status": [false, false], "silence_prob": [0.0, 0.0], "audiomae_on_audioset": [null, null], "duration": [30.7, 0.55]}
|
annotations_filtered/l1OgTkhFJn8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 11.35], [15.0, 31.35], [45.0, 45.47], [48.0, 49.28], [50.0, 51.29], [54.0, 56.81], [57.0, 57.94], [59.0, 59.46], [63.0, 63.48], [66.0, 67.27], [71.0, 71.96], [81.0, 81.13], [85.0, 85.92], [87.0, 87.74], [89.0, 90.37], [92.0, 92.8], [97.0, 98.14], [101.0, 101.95], [102.0, 105.09], [108.0, 110.93], [115.0, 117.14], [120.0, 124.83], [126.0, 128.36], [136.0, 136.44], [141.0, 145.42], [147.0, 148.44], [152.0, 152.34], [153.0, 154.52], [155.0, 156.09], [157.0, 158.3]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 55.18, 0.0, 0.0, 0.0, 74.76, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 62.17, 68.02, 100.0, 78.04, 39.54, 0.0, 59.07, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 71.82], ["thunk", 2.3], ["music", 2.01]], null, null, null, null, null, null, null], "duration": [0.35, 16.35, 0.47, 1.28, 1.29, 2.81, 0.94, 0.46, 0.48, 1.27, 0.96, 0.13, 0.92, 0.74, 1.37, 0.8, 1.14, 0.95, 3.09, 2.93, 2.14, 4.83, 2.36, 0.44, 4.42, 1.44, 0.34, 1.52, 1.09, 1.3]}
|
annotations_filtered/l1SZ4ccagFQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 5.66], [7.0, 22.3], [22.0, 38.26], [40.0, 40.44], [40.0, 52.02], [52.0, 52.05], [59.0, 95.88], [102.0, 109.46], [112.0, 112.29], [116.0, 119.77]], "keep_status": [false, true, true, false, true, false, false, true, false, false], "silence_prob": [0.0, 34.61, 31.06, 0.0, 31.34, 0.0, 0.0, 33.26, 0.0, 98.01], "audiomae_on_audioset": [null, [["music", 22.16], ["hum", 13.46], ["speech", 12.07]], [["speech", 21.07], ["throbbing", 15.61], ["music", 12.57]], null, [["music", 39.76], ["speech", 7.23], ["whack, thwack", 4.67]], null, null, [["whip", 20.46], ["whack, thwack", 16.94], ["hum", 16.13]], null, null], "duration": [0.66, 15.3, 16.26, 0.44, 12.02, 0.05, 36.88, 7.46, 0.29, 3.77]}
|
annotations_filtered/l1jCg_FmQmQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[35.0, 35.78], [37.0, 40.32], [41.0, 56.37], [57.0, 62.97], [65.0, 66.93], [68.0, 73.01], [74.0, 75.15], [81.0, 96.36], [96.0, 96.45], [98.0, 98.46]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 100.0, 99.73, 99.73, 0.0, 100.0, 0.0, 89.01, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null], "duration": [0.78, 3.32, 15.37, 5.97, 1.93, 5.01, 1.15, 15.36, 0.45, 0.46]}
|
annotations_filtered/l24yOwR9saU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 11.62], [12.0, 16.56], [19.0, 22.49], [23.0, 24.33], [28.0, 28.24], [36.0, 36.1], [39.0, 39.5], [40.0, 45.72], [67.0, 70.24], [72.0, 73.2], [76.0, 85.99], [90.0, 95.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 50.86, 52.1, 0.0, 0.0, 0.0, 0.0, 59.15, 65.09, 0.0, 52.56, 47.9], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["mains hum", 30.8], ["speech", 22.93], ["hum", 22.33]]], "duration": [1.62, 4.56, 3.49, 1.33, 0.24, 0.1, 0.5, 5.72, 3.24, 1.2, 9.99, 5.76]}
|
annotations_filtered/l2IJxv1lbAc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.87], [6.0, 6.99], [11.0, 12.14], [13.0, 14.17], [25.0, 25.46], [26.0, 25.86], [28.0, 30.4], [35.0, 35.26], [36.0, 36.53], [39.0, 42.87], [45.0, 45.81], [46.0, 47.22], [48.0, 48.95], [50.0, 50.63], [52.0, 52.62], [55.0, 92.21], [101.0, 101.95], [106.0, 107.43], [108.0, 108.3], [112.0, 112.43], [113.0, 113.07], [119.0, 120.77], [124.0, 126.62]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.78, 0.0, 0.0, 37.66, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 29.99], "audiomae_on_audioset": [null, null, null, null, null, null, [["quack", 17.71], ["music", 9.24], ["speech", 6.88]], null, null, [["mains hum", 32.6], ["hum", 27.43], ["music", 20.32]], null, null, null, null, null, null, null, null, null, null, null, null, [["music", 24.73], ["hum", 24.04], ["throbbing", 11.77]]], "duration": [0.87, 0.99, 1.14, 1.17, 0.46, -0.14, 2.4, 0.26, 0.53, 3.87, 0.81, 1.22, 0.95, 0.63, 0.62, 37.21, 0.95, 1.43, 0.3, 0.43, 0.07, 1.77, 2.62]}
|
annotations_filtered/l2K4Fw-pmLw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 10.29], [14.0, 15.2], [17.0, 17.98], [18.0, 18.87], [28.0, 32.56], [34.0, 35.11], [40.0, 40.49], [45.0, 45.39], [55.0, 57.67], [61.0, 63.91], [64.0, 67.86], [69.0, 69.15], [73.0, 73.43], [87.0, 87.62], [92.0, 93.51], [101.0, 101.87], [103.0, 103.94], [105.0, 105.6], [107.0, 107.89], [110.0, 111.67], [113.0, 113.43], [119.0, 119.26], [122.0, 123.45], [125.0, 125.39]], "keep_status": [false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 30.43, 36.47, 50.81, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 43.51], ["music", 18.43], ["boing", 4.59]], [["speech", 57.27], ["sneeze", 7.77], ["inside, small room", 3.53]], null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.29, 1.2, 0.98, 0.87, 4.56, 1.11, 0.49, 0.39, 2.67, 2.91, 3.86, 0.15, 0.43, 0.62, 1.51, 0.87, 0.94, 0.6, 0.89, 1.67, 0.43, 0.26, 1.45, 0.39]}
|
annotations_filtered/l2g7v4DYYik_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[66.0, 107.99], [114.0, 114.84], [132.0, 132.61], [140.0, 140.78]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [41.99, 0.84, 0.61, 0.78]}
|
annotations_filtered/l2zrJ_LZrhg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 6.25], [22.0, 26.7], [30.0, 31.02], [32.0, 33.84], [41.0, 41.71], [46.0, 48.88], [59.0, 59.75], [60.0, 65.45], [68.0, 78.95], [86.0, 87.83], [88.0, 89.95], [92.0, 94.49], [98.0, 98.02], [99.0, 103.5], [113.0, 113.46], [117.0, 120.11], [123.0, 131.89], [146.0, 147.12], [151.0, 152.14], [155.0, 157.84], [158.0, 162.06], [165.0, 167.04]], "keep_status": [false, true, false, false, false, true, false, true, false, false, false, true, false, true, false, false, false, false, false, true, false, true], "silence_prob": [0.0, 33.61, 0.0, 0.0, 0.0, 33.9, 0.0, 30.07, 33.22, 0.0, 0.0, 42.74, 0.0, 31.58, 0.0, 90.6, 51.18, 0.0, 0.0, 39.46, 34.79, 39.3], "audiomae_on_audioset": [null, [["speech", 34.27], ["thunk", 17.83], ["whack, thwack", 10.15]], null, null, null, [["hum", 32.05], ["speech", 29.31], ["mains hum", 8.42]], null, [["speech", 36.88], ["sidetone", 14.05], ["music", 13.06]], [["speech", 56.79], ["music", 22.23], ["fart", 4.71]], null, null, [["hum", 17.38], ["throbbing", 16.39], ["music", 14.13]], null, [["livestock, farm animals, working animals", 24.58], ["cattle, bovinae", 20.05], ["moo", 18.87]], null, null, null, null, null, [["music", 53.96], ["musical instrument", 7.06], ["didgeridoo", 4.37]], [["music", 65.83], ["musical instrument", 3.32], ["guitar", 2.0]], [["music", 47.04], ["cello", 10.83], ["double bass", 8.04]]], "duration": [1.25, 4.7, 1.02, 1.84, 0.71, 2.88, 0.75, 5.45, 10.95, 1.83, 1.95, 2.49, 0.02, 4.5, 0.46, 3.11, 8.89, 1.12, 1.14, 2.84, 4.06, 2.04]}
|
annotations_filtered/l38Qliee6VE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 8.6], [10.0, 15.33], [18.0, 19.43], [20.0, 20.38], [25.0, 27.9], [49.0, 51.92], [61.0, 61.3], [62.0, 62.94], [64.0, 65.42], [67.0, 69.18], [70.0, 71.02], [71.0, 87.2], [87.0, 87.37], [87.0, 87.71], [88.0, 87.74], [88.0, 87.98], [88.0, 95.86], [107.0, 108.51], [112.0, 112.11], [113.0, 113.46], [119.0, 127.38], [129.0, 132.38], [134.0, 136.65], [143.0, 151.14], [152.0, 156.39]], "keep_status": [false, false, false, false, true, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, true], "silence_prob": [0.0, 55.25, 0.0, 0.0, 38.11, 59.96, 0.0, 0.0, 0.0, 40.99, 0.0, 31.57, 0.0, 0.0, 0.0, 0.0, 31.75, 0.0, 0.0, 0.0, 40.19, 39.22, 75.23, 78.38, 49.18], "audiomae_on_audioset": [null, null, null, null, [["speech", 29.2], ["wail, moan", 19.16], ["fart", 11.28]], null, null, null, null, [["music", 33.31], ["hum", 6.27], ["musical instrument", 3.69]], null, [["music", 56.52], ["speech", 10.06], ["synthesizer", 9.18]], null, null, null, null, [["music", 56.53], ["sidetone", 14.12], ["speech", 5.12]], null, null, null, [["music", 82.44], ["speech", 4.41], ["musical instrument", 2.06]], [["music", 56.16], ["musical instrument", 4.13], ["hum", 2.77]], null, null, [["music", 28.34], ["speech", 17.1], ["sidetone", 11.26]]], "duration": [-0.4, 5.33, 1.43, 0.38, 2.9, 2.92, 0.3, 0.94, 1.42, 2.18, 1.02, 16.2, 0.37, 0.71, -0.26, -0.02, 7.86, 1.51, 0.11, 0.46, 8.38, 3.38, 2.65, 8.14, 4.39]}
|
annotations_filtered/l3t1ZSuwLzg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 18.4]], "keep_status": [false], "silence_prob": [71.14], "audiomae_on_audioset": [null], "duration": [2.4]}
|
annotations_filtered/l46yjkR0SqU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 14.69], [19.0, 20.33], [21.0, 21.47], [23.0, 22.98], [24.0, 36.44], [38.0, 84.97], [85.0, 86.39], [87.0, 100.45], [104.0, 115.13], [120.0, 122.1], [124.0, 125.63], [126.0, 126.79], [130.0, 130.27], [131.0, 134.03], [137.0, 137.24], [138.0, 140.02], [141.0, 158.82], [160.0, 160.37], [163.0, 167.86]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, true, false, true], "silence_prob": [35.42, 0.0, 0.0, 0.0, 53.53, 0.0, 0.0, 29.03, 30.52, 62.58, 0.0, 0.0, 0.0, 40.45, 0.0, 43.35, 39.89, 0.0, 37.35], "audiomae_on_audioset": [[["hum", 28.04], ["throbbing", 17.13], ["whale vocalization", 15.07]], null, null, null, null, null, null, [["music", 68.86], ["marimba, xylophone", 4.87], ["musical instrument", 2.74]], [["music", 69.46], ["theremin", 12.56], ["soundtrack music", 1.42]], null, null, null, null, [["cattle, bovinae", 9.82], ["fly, housefly", 9.7], ["speech", 9.15]], null, [["speech", 35.97], ["music", 5.78], ["hum", 4.73]], [["hum", 37.13], ["mains hum", 16.82], ["throbbing", 14.55]], null, [["speech", 32.24], ["music", 8.79], ["mains hum", 7.83]]], "duration": [10.69, 1.33, 0.47, -0.02, 12.44, 46.97, 1.39, 13.45, 11.13, 2.1, 1.63, 0.79, 0.27, 3.03, 0.24, 2.02, 17.82, 0.37, 4.86]}
|
annotations_filtered/l4AmSVb6Hew_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 10.25], [11.0, 11.52], [16.0, 16.28], [30.0, 30.54], [42.0, 42.6], [45.0, 47.36], [48.0, 48.57], [52.0, 52.69], [54.0, 54.72], [61.0, 61.4], [67.0, 67.0], [70.0, 72.42], [96.0, 97.71], [99.0, 112.45]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 29.59], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 53.24], ["thunk", 3.96], ["livestock, farm animals, working animals", 3.31]]], "duration": [1.25, 0.52, 0.28, 0.54, 0.6, 2.36, 0.57, 0.69, 0.72, 0.4, 0.0, 2.42, 1.71, 13.45]}
|
annotations_filtered/l4L9Yi-lXbo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 11.99], [13.0, 14.22], [17.0, 19.89], [22.0, 27.97], [29.0, 30.54], [33.0, 35.31], [37.0, 41.34], [46.0, 48.81], [52.0, 53.79], [56.0, 57.48], [60.0, 62.04], [67.0, 69.28], [73.0, 82.63], [87.0, 89.46], [92.0, 97.75], [99.0, 107.2], [108.0, 116.92], [118.0, 122.35]], "keep_status": [true, false, false, true, false, false, true, false, false, false, false, false, false, false, true, true, false, true], "silence_prob": [39.12, 0.0, 78.04, 31.18, 0.0, 56.48, 36.44, 70.3, 0.0, 0.0, 48.82, 95.51, 57.97, 51.99, 36.71, 37.52, 33.17, 37.28], "audiomae_on_audioset": [[["whale vocalization", 39.52], ["burping, eructation", 15.88], ["buzz", 7.06]], null, null, [["speech", 45.25], ["telephone", 7.51], ["fart", 6.38]], null, null, [["fart", 21.09], ["music", 15.31], ["speech", 14.87]], null, null, null, [["hum", 38.02], ["speech", 24.89], ["throbbing", 8.72]], null, null, null, [["hum", 28.91], ["speech", 21.43], ["mains hum", 13.65]], [["music", 30.29], ["theremin", 20.95], ["hum", 5.5]], [["hum", 45.83], ["mains hum", 21.13], ["music", 8.79]], [["hum", 32.82], ["music", 24.23], ["theremin", 11.33]]], "duration": [6.99, 1.22, 2.89, 5.97, 1.54, 2.31, 4.34, 2.81, 1.79, 1.48, 2.04, 2.28, 9.63, 2.46, 5.75, 8.2, 8.92, 4.35]}
|
annotations_filtered/l4S4IBACQCM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.93], [3.0, 7.15], [8.0, 8.94], [9.0, 10.83], [22.0, 22.69], [25.0, 25.02], [26.0, 26.47], [30.0, 31.56], [32.0, 32.66], [34.0, 34.13], [40.0, 41.39], [51.0, 51.46], [71.0, 108.7], [113.0, 134.6]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 31.64, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.85], "audiomae_on_audioset": [null, [["music", 53.16], ["brass instrument", 11.65], ["trombone", 9.66]], null, null, null, null, null, null, null, null, null, null, null, [["music", 62.04], ["buzz", 6.35], ["musical instrument", 4.06]]], "duration": [0.93, 4.15, 0.94, 1.83, 0.69, 0.02, 0.47, 1.56, 0.66, 0.13, 1.39, 0.46, 37.7, 21.6]}
|
annotations_filtered/l53Q1UXk2DE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 37.69], [43.0, 46.4], [50.0, 54.51], [55.0, 54.89], [58.0, 80.86], [89.0, 173.99]], "keep_status": [false, true, true, false, true, false], "silence_prob": [0.0, 34.23, 33.9, 0.0, 35.08, 0.0], "audiomae_on_audioset": [null, [["music", 46.21], ["sampler", 8.77], ["boing", 6.26]], [["music", 53.37], ["throbbing", 8.39], ["synthesizer", 4.07]], null, [["music", 56.27], ["sampler", 4.85], ["synthesizer", 4.5]], null], "duration": [30.69, 3.4, 4.51, -0.11, 22.86, 84.99]}
|
annotations_filtered/l59t24vh3QI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.82], [8.0, 7.86], [15.0, 16.07], [20.0, 21.41], [29.0, 29.71], [47.0, 47.68], [49.0, 50.13], [52.0, 53.92], [55.0, 64.52], [67.0, 67.31], [70.0, 80.15], [81.0, 85.53], [86.0, 88.21], [92.0, 92.84], [93.0, 109.58], [110.0, 116.87], [119.0, 119.25], [120.0, 126.42], [127.0, 129.19]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, true, true, false, true, true, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.11, 0.0, 32.24, 32.55, 30.09, 0.0, 32.85, 31.76, 0.0, 30.8, 31.04], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["hum", 31.24], ["mains hum", 30.3], ["music", 16.7]], null, [["music", 42.67], ["theremin", 11.75], ["hum", 9.61]], [["music", 33.04], ["mains hum", 9.78], ["hum", 8.58]], [["music", 15.87], ["didgeridoo", 11.19], ["fart", 4.53]], null, [["music", 34.11], ["synthesizer", 16.37], ["noise", 10.09]], [["music", 24.44], ["theremin", 16.95], ["mains hum", 14.25]], null, [["theremin", 42.21], ["music", 41.41], ["musical instrument", 2.89]], [["theremin", 30.74], ["music", 26.66], ["siren", 3.81]]], "duration": [0.82, -0.14, 1.07, 1.41, 0.71, 0.68, 1.13, 1.92, 9.52, 0.31, 10.15, 4.53, 2.21, 0.84, 16.58, 6.87, 0.25, 6.42, 2.19]}
|
annotations_filtered/l5s3_XV1rkA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 9.1], [14.0, 16.9], [20.0, 27.31], [30.0, 43.66], [45.0, 45.32], [46.0, 46.74], [52.0, 64.35], [67.0, 69.35], [80.0, 80.79], [83.0, 84.05], [84.0, 85.94], [87.0, 86.85], [89.0, 96.58], [98.0, 98.58], [100.0, 100.72]], "keep_status": [true, false, true, false, false, false, false, true, false, false, false, false, true, false, false], "silence_prob": [31.38, 31.12, 30.95, 30.95, 0.0, 0.0, 30.13, 31.77, 0.0, 0.0, 0.0, 0.0, 30.84, 0.0, 0.0], "audiomae_on_audioset": [[["music", 55.54], ["synthesizer", 6.75], ["ambient music", 6.19]], [["music", 79.54], ["synthesizer", 2.82], ["musical instrument", 2.42]], [["music", 45.58], ["didgeridoo", 11.6], ["synthesizer", 6.57]], [["brass instrument", 34.83], ["music", 34.34], ["trombone", 10.29]], null, null, [["music", 39.35], ["trombone", 27.12], ["brass instrument", 15.12]], [["hum", 31.78], ["mains hum", 20.32], ["throbbing", 6.21]], null, null, null, null, [["music", 44.77], ["musical instrument", 7.11], ["hum", 6.93]], null, null], "duration": [4.1, 2.9, 7.31, 13.66, 0.32, 0.74, 12.35, 2.35, 0.79, 1.05, 1.94, -0.15, 7.58, 0.58, 0.72]}
|
annotations_filtered/l65KNW2ZGV8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.78], [4.0, 4.78], [6.0, 6.96], [7.0, 8.92], [11.0, 11.13], [12.0, 12.43], [12.0, 13.91], [16.0, 17.02], [18.0, 17.95], [19.0, 19.94], [31.0, 32.07], [34.0, 34.32], [38.0, 38.82], [42.0, 43.09], [45.0, 45.2], [46.0, 50.03], [52.0, 52.73], [54.0, 53.59], [54.0, 55.68], [56.0, 67.44], [72.0, 72.49], [77.0, 77.16], [79.0, 79.68], [80.0, 80.84], [82.0, 83.3], [87.0, 88.23], [91.0, 91.2], [94.0, 94.36], [95.0, 96.26], [106.0, 106.62], [114.0, 115.86], [117.0, 119.1], [119.0, 120.41]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.6, 0.0, 0.0, 0.0, 99.93, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.78, 0.78, 0.96, 1.92, 0.13, 0.43, 1.91, 1.02, -0.05, 0.94, 1.07, 0.32, 0.82, 1.09, 0.2, 4.03, 0.73, -0.41, 1.68, 11.44, 0.49, 0.16, 0.68, 0.84, 1.3, 1.23, 0.2, 0.36, 1.26, 0.62, 1.86, 2.1, 1.41]}
|
annotations_filtered/l6NIVn6_m1c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.47], [9.0, 9.48], [11.0, 11.74], [12.0, 14.32], [16.0, 20.87], [21.0, 24.76], [34.0, 36.07], [45.0, 55.8], [59.0, 59.91], [61.0, 64.02], [65.0, 78.07], [85.0, 84.99], [87.0, 87.24], [103.0, 113.75], [116.0, 121.12], [123.0, 123.57], [125.0, 131.41], [134.0, 144.68], [147.0, 147.78], [149.0, 149.88], [151.0, 154.94], [156.0, 157.59], [161.0, 162.21], [164.0, 164.98], [167.0, 167.26], [170.0, 170.7], [171.0, 172.29], [173.0, 173.16]], "keep_status": [false, false, false, false, true, true, false, true, false, true, false, false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 51.18, 35.93, 31.36, 31.73, 45.11, 0.0, 34.16, 31.58, 0.0, 0.0, 39.93, 39.35, 0.0, 33.81, 37.23, 0.0, 0.0, 32.56, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["music", 49.32], ["theremin", 6.9], ["hum", 4.51]], [["music", 40.76], ["livestock, farm animals, working animals", 4.18], ["trombone", 3.16]], [["livestock, farm animals, working animals", 40.38], ["moo", 19.43], ["cattle, bovinae", 13.4]], [["music", 23.42], ["hum", 21.11], ["throbbing", 10.95]], null, [["speech", 27.41], ["music", 21.88], ["radio", 17.97]], [["music", 39.8], ["theremin", 33.28], ["synthesizer", 8.36]], null, null, [["hum", 50.56], ["mains hum", 20.53], ["throbbing", 11.8]], [["music", 60.11], ["theremin", 12.69], ["musical instrument", 1.7]], null, [["music", 75.75], ["musical instrument", 8.78], ["synthesizer", 1.24]], [["music", 37.06], ["musical instrument", 18.03], ["brass instrument", 8.7]], null, null, [["music", 26.4], ["theremin", 9.04], ["noise", 8.98]], null, null, null, null, null, null, null], "duration": [0.47, 0.48, 0.74, 2.32, 4.87, 3.76, 2.07, 10.8, 0.91, 3.02, 13.07, -0.01, 0.24, 10.75, 5.12, 0.57, 6.41, 10.68, 0.78, 0.88, 3.94, 1.59, 1.21, 0.98, 0.26, 0.7, 1.29, 0.16]}
|
annotations_filtered/l6StIaMaRsg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[28.0, 30.25], [33.0, 37.1], [38.0, 40.74], [42.0, 47.93], [50.0, 50.28], [50.0, 50.77], [52.0, 53.65], [58.0, 64.42], [66.0, 71.41], [72.0, 72.44], [73.0, 73.31], [76.0, 76.44], [79.0, 79.68], [91.0, 96.99], [97.0, 102.32], [104.0, 107.69], [110.0, 111.67]], "keep_status": [false, true, true, true, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [61.97, 37.81, 37.3, 36.98, 0.0, 0.0, 0.0, 35.39, 90.78, 0.0, 0.0, 0.0, 0.0, 62.99, 47.94, 82.43, 0.0], "audiomae_on_audioset": [null, [["hum", 16.67], ["fly, housefly", 11.46], ["music", 10.78]], [["mains hum", 31.58], ["hum", 23.01], ["music", 14.88]], [["hum", 24.06], ["mains hum", 18.55], ["music", 9.55]], null, null, null, [["hum", 53.18], ["mains hum", 22.64], ["music", 9.57]], null, null, null, null, null, null, [["music", 36.9], ["speech", 26.42], ["musical instrument", 6.03]], null, null], "duration": [2.25, 4.1, 2.74, 5.93, 0.28, 0.77, 1.65, 6.42, 5.41, 0.44, 0.31, 0.44, 0.68, 5.99, 5.32, 3.69, 1.67]}
|
annotations_filtered/l6TGERgrXmA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 1.76], [2.0, 5.68], [19.0, 19.84], [25.0, 26.74], [35.0, 36.02], [38.0, 38.2], [43.0, 45.15], [46.0, 46.57], [54.0, 61.33], [64.0, 64.56], [65.0, 67.07], [68.0, 68.5], [71.0, 71.22], [74.0, 75.03], [82.0, 84.2], [95.0, 98.25], [102.0, 103.13], [115.0, 124.48], [127.0, 130.47], [131.0, 132.68], [133.0, 144.15]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 90.95, 0.0, 0.0, 0.0, 0.0, 62.58, 0.0, 61.97, 0.0, 53.84, 0.0, 0.0, 0.0, 56.25, 99.92, 0.0, 90.95, 92.8, 0.0, 96.42], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.24, 3.68, 0.84, 1.74, 1.02, 0.2, 2.15, 0.57, 7.33, 0.56, 2.07, 0.5, 0.22, 1.03, 2.2, 3.25, 1.13, 9.48, 3.47, 1.68, 11.15]}
|
annotations_filtered/l6cFM5Ubilw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 5.91], [12.0, 16.92], [19.0, 20.43], [24.0, 25.32], [28.0, 29.02], [30.0, 36.22], [37.0, 43.92], [44.0, 43.95], [44.0, 47.01], [47.0, 47.04], [56.0, 57.2], [61.0, 64.81], [68.0, 69.26], [73.0, 80.45], [86.0, 107.87], [110.0, 130.01]], "keep_status": [false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, true], "silence_prob": [0.0, 49.73, 0.0, 0.0, 0.0, 58.47, 39.91, 0.0, 32.6, 0.0, 0.0, 41.07, 0.0, 31.61, 29.03, 44.04], "audiomae_on_audioset": [null, [["speech", 37.02], ["music", 34.49], ["foghorn", 3.37]], null, null, null, null, [["music", 37.81], ["speech", 14.96], ["singing bowl", 6.91]], null, [["speech", 28.81], ["music", 20.96], ["whale vocalization", 19.75]], null, null, [["whale vocalization", 43.99], ["music", 38.61], ["speech", 9.2]], null, [["music", 60.17], ["speech", 13.45], ["hum", 2.59]], [["speech", 45.77], ["music", 23.68], ["buzz", 4.08]], [["music", 40.34], ["hum", 15.37], ["mains hum", 8.31]]], "duration": [-0.09, 4.92, 1.43, 1.32, 1.02, 6.22, 6.92, -0.05, 3.01, 0.04, 1.2, 3.81, 1.26, 7.45, 21.87, 20.01]}
|
annotations_filtered/l6e1M2d4BJ0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.94], [5.0, 7.5], [10.0, 10.5], [11.0, 12.39], [16.0, 16.98], [18.0, 21.34], [25.0, 25.81], [27.0, 28.73], [30.0, 31.75], [33.0, 35.04], [37.0, 39.85], [42.0, 42.52], [45.0, 46.68], [47.0, 51.12], [54.0, 55.65], [57.0, 57.65], [62.0, 64.61], [68.0, 68.89], [71.0, 72.17], [76.0, 77.82], [82.0, 84.1], [85.0, 86.22], [88.0, 88.72], [90.0, 90.78], [92.0, 94.63], [95.0, 95.81], [98.0, 99.25], [101.0, 102.3], [104.0, 104.87], [107.0, 108.4], [111.0, 111.27], [112.0, 112.8], [117.0, 117.22], [119.0, 120.51], [124.0, 127.14], [127.0, 128.9], [131.0, 132.9], [135.0, 135.78], [138.0, 138.72], [140.0, 147.95], [150.0, 151.68], [154.0, 154.41], [156.0, 157.69], [159.0, 162.75], [163.0, 164.91], [165.0, 166.08], [168.0, 170.13], [172.0, 175.83]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 87.55, 0.0, 0.0, 0.0, 99.98, 0.0, 0.0, 0.0, 100.0, 98.01, 0.0, 0.0, 84.43, 0.0, 0.0, 98.1, 0.0, 0.0, 0.0, 92.48, 0.0, 0.0, 0.0, 99.21, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 94.52, 0.0, 0.0, 0.0, 0.0, 86.09, 0.0, 0.0, 0.0, 87.74, 0.0, 0.0, 84.8, 73.67], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.94, 2.5, 0.5, 1.39, 0.98, 3.34, 0.81, 1.73, 1.75, 2.04, 2.85, 0.52, 1.68, 4.12, 1.65, 0.65, 2.61, 0.89, 1.17, 1.82, 2.1, 1.22, 0.72, 0.78, 2.63, 0.81, 1.25, 1.3, 0.87, 1.4, 0.27, 0.8, 0.22, 1.51, 3.14, 1.9, 1.9, 0.78, 0.72, 7.95, 1.68, 0.41, 1.69, 3.75, 1.91, 1.08, 2.13, 3.83]}
|
annotations_filtered/l6uaxfye2Ig_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.72], [7.0, 7.47], [12.0, 13.0], [14.0, 16.44], [23.0, 22.99], [44.0, 44.58], [48.0, 48.2], [51.0, 51.68], [60.0, 60.69], [62.0, 62.63], [65.0, 65.94], [69.0, 70.34], [74.0, 75.68], [86.0, 87.76], [89.0, 90.49]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 37.82, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["speech", 69.16], ["beatboxing", 3.82], ["fart", 3.41]], null, null, null, null, null, null, null, null, null, null, null], "duration": [0.72, 0.47, 1.0, 2.44, -0.01, 0.58, 0.2, 0.68, 0.69, 0.63, 0.94, 1.34, 1.68, 1.76, 1.49]}
|
annotations_filtered/l6zm1uCb30w_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.73], [3.0, 3.7], [6.0, 8.14], [10.0, 10.4], [11.0, 10.98], [11.0, 11.97], [13.0, 14.37], [18.0, 19.16], [21.0, 22.25], [22.0, 22.94], [24.0, 24.34], [27.0, 27.87]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 44.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["hum", 24.58], ["mains hum", 8.74], ["speech", 6.41]], null, null, null, null, null, null, null, null, null], "duration": [0.73, 0.7, 2.14, 0.4, -0.02, 0.97, 1.37, 1.16, 1.25, 0.94, 0.34, 0.87]}
|
annotations_filtered/l7FkN4ooYvA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 11.3], [13.0, 13.07], [28.0, 27.82], [28.0, 28.24], [28.0, 28.29], [35.0, 38.47], [43.0, 59.8], [61.0, 69.09], [69.0, 69.16], [69.0, 70.78], [77.0, 79.84], [84.0, 95.12], [102.0, 101.88], [108.0, 107.94], [108.0, 108.26], [116.0, 116.51], [119.0, 118.94], [124.0, 129.41], [130.0, 139.38], [142.0, 144.83], [166.0, 169.23], [172.0, 174.31]], "keep_status": [true, false, false, false, false, true, false, false, false, false, true, true, false, false, false, false, false, false, false, false, true, true], "silence_prob": [43.08, 0.0, 0.0, 0.0, 0.0, 33.77, 32.13, 31.81, 0.0, 0.0, 29.05, 32.61, 0.0, 0.0, 0.0, 0.0, 0.0, 37.37, 32.2, 35.53, 29.39, 29.62], "audiomae_on_audioset": [[["music", 41.04], ["bow-wow", 11.26], ["speech", 8.64]], null, null, null, null, [["vehicle", 12.32], ["motorcycle", 12.29], ["siren", 11.47]], [["music", 53.36], ["boing", 32.63], ["speech", 4.03]], [["music", 67.0], ["boing", 10.35], ["scratching (performance technique)", 3.59]], null, null, [["speech", 26.34], ["sidetone", 11.01], ["music", 10.72]], [["music", 55.98], ["theremin", 10.15], ["buzz", 3.46]], null, null, null, null, null, [["music", 71.55], ["speech", 11.47], ["sidetone", 3.01]], [["music", 67.5], ["speech", 6.65], ["groan", 4.84]], [["groan", 34.29], ["music", 23.55], ["boing", 18.51]], [["music", 53.34], ["electronic music", 3.58], ["effects unit", 3.02]], [["music", 23.69], ["speech", 4.54], ["musical instrument", 2.28]]], "duration": [2.3, 0.07, -0.18, 0.24, 0.29, 3.47, 16.8, 8.09, 0.16, 1.78, 2.84, 11.12, -0.12, -0.06, 0.26, 0.51, -0.06, 5.41, 9.38, 2.83, 3.23, 2.31]}
|
annotations_filtered/l83CcqhP-kY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.2], [15.0, 15.63], [19.0, 19.31], [19.0, 19.45], [47.0, 47.83], [62.0, 63.04], [64.0, 69.01], [69.0, 69.96], [72.0, 77.46], [79.0, 79.32], [80.0, 80.28], [81.0, 82.8], [96.0, 96.35], [97.0, 97.63], [100.0, 102.44], [103.0, 103.55], [108.0, 107.79], [117.0, 117.86], [119.0, 119.85]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.96, 0.0, 29.63, 0.0, 0.0, 0.0, 0.0, 0.0, 34.16, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["speech", 70.34], ["sidetone", 8.07], ["noise", 4.93]], null, [["explosion", 23.21], ["whack, thwack", 16.38], ["speech", 9.62]], null, null, null, null, null, [["speech", 29.3], ["coin (dropping)", 23.43], ["dishes, pots, and pans", 16.08]], null, null, null, null], "duration": [0.2, 0.63, 0.31, 0.45, 0.83, 1.04, 5.01, 0.96, 5.46, 0.32, 0.28, 1.8, 0.35, 0.63, 2.44, 0.55, -0.21, 0.86, 0.85]}
|
annotations_filtered/l8MFxT9ILKY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 13.26], [15.0, 26.89], [31.0, 34.96], [37.0, 38.3], [39.0, 46.01], [47.0, 53.87], [60.0, 60.45], [63.0, 74.66], [78.0, 79.2], [80.0, 81.51], [83.0, 89.67], [93.0, 104.14], [106.0, 107.42], [108.0, 144.15], [146.0, 146.11], [148.0, 147.75], [149.0, 160.91], [162.0, 163.76], [165.0, 166.13]], "keep_status": [false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 30.45, 30.18, 0.0, 33.63, 44.32, 0.0, 41.36, 0.0, 0.0, 34.3, 30.33, 0.0, 0.0, 0.0, 0.0, 30.44, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 36.47], ["vehicle", 8.92], ["hum", 8.59]], [["speech", 45.64], ["hubbub, speech noise, speech babble", 11.15], ["cattle, bovinae", 3.68]], null, [["speech", 64.71], ["theremin", 5.02], ["music", 4.38]], [["speech", 37.49], ["music", 30.69], ["hum", 3.35]], null, [["mains hum", 55.52], ["hum", 38.39], ["speech", 1.05]], null, null, [["music", 67.67], ["hum", 2.68], ["scary music", 2.54]], [["music", 56.25], ["hum", 9.0], ["buzz", 8.47]], null, null, null, null, [["music", 53.96], ["hum", 20.39], ["mains hum", 12.97]], null, null], "duration": [0.26, 11.89, 3.96, 1.3, 7.01, 6.87, 0.45, 11.66, 1.2, 1.51, 6.67, 11.14, 1.42, 36.15, 0.11, -0.25, 11.91, 1.76, 1.13]}
|
annotations_filtered/l8aozWddbPA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 11.77], [13.0, 13.29], [14.0, 16.19], [20.0, 20.38], [22.0, 24.09], [28.0, 28.39], [36.0, 37.4], [40.0, 40.51], [43.0, 45.64], [48.0, 48.22], [51.0, 50.87], [53.0, 53.33], [55.0, 57.43], [60.0, 60.12], [61.0, 61.62], [63.0, 66.06], [68.0, 68.98], [72.0, 72.89], [76.0, 77.08], [80.0, 84.43], [85.0, 88.94], [113.0, 113.02], [113.0, 113.24], [114.0, 116.77]], "keep_status": [true, false, false, false, true, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, false, true], "silence_prob": [32.76, 0.0, 38.63, 0.0, 45.36, 0.0, 0.0, 0.0, 34.01, 0.0, 0.0, 0.0, 40.64, 0.0, 0.0, 39.5, 0.0, 0.0, 0.0, 39.63, 34.56, 0.0, 0.0, 33.52], "audiomae_on_audioset": [[["music", 25.36], ["animal", 6.72], ["speech", 6.36]], null, [["speech", 61.69], ["baby laughter", 6.46], ["laughter", 3.63]], null, [["speech", 24.83], ["sidetone", 14.98], ["frog", 10.32]], null, null, null, [["noise", 15.62], ["speech", 11.84], ["whale vocalization", 11.6]], null, null, null, [["mosquito", 35.81], ["frog", 30.79], ["croak", 9.93]], null, null, [["music", 33.54], ["speech", 14.09], ["animal", 10.0]], null, null, null, [["music", 51.21], ["speech", 17.89], ["didgeridoo", 5.08]], [["speech", 47.15], ["music", 18.45], ["radio", 5.41]], null, null, [["frog", 39.4], ["music", 13.37], ["boing", 6.73]]], "duration": [3.77, 0.29, 2.19, 0.38, 2.09, 0.39, 1.4, 0.51, 2.64, 0.22, -0.13, 0.33, 2.43, 0.12, 0.62, 3.06, 0.98, 0.89, 1.08, 4.43, 3.94, 0.02, 0.24, 2.77]}
|
annotations_filtered/l94geYuwNJg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 18.33]], "keep_status": [false], "silence_prob": [0.0], "audiomae_on_audioset": [null], "duration": [0.33]}
|
annotations_filtered/l97NtEMUx0M_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 31.62], [32.0, 40.46], [42.0, 43.09], [50.0, 51.07], [53.0, 57.6], [61.0, 64.42], [67.0, 67.74], [69.0, 71.61], [72.0, 72.57], [73.0, 74.24], [80.0, 82.76], [88.0, 89.83]], "keep_status": [false, false, false, false, true, true, false, false, false, false, false, false], "silence_prob": [29.42, 29.83, 0.0, 0.0, 28.78, 49.22, 0.0, 75.07, 0.0, 0.0, 74.44, 0.0], "audiomae_on_audioset": [[["buzz", 44.5], ["music", 24.7], ["speech", 7.74]], [["music", 37.74], ["speech", 23.11], ["buzz", 18.34]], null, null, [["music", 38.5], ["hum", 18.33], ["throbbing", 11.99]], [["hum", 20.6], ["music", 17.27], ["mains hum", 8.07]], null, null, null, null, null, null], "duration": [12.62, 8.46, 1.09, 1.07, 4.6, 3.42, 0.74, 2.61, 0.57, 1.24, 2.76, 1.83]}
|
annotations_filtered/l9LOKUiY0Dg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 13.12], [32.0, 33.59], [36.0, 45.67], [46.0, 45.77], [47.0, 47.21], [57.0, 59.31], [72.0, 73.85], [84.0, 107.38], [109.0, 110.84], [112.0, 113.24], [123.0, 124.43], [125.0, 126.15], [126.0, 127.63], [128.0, 129.41], [134.0, 134.92], [137.0, 138.11], [147.0, 148.15], [149.0, 151.48], [153.0, 153.52], [155.0, 158.8]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [31.85, 0.0, 32.5, 0.0, 0.0, 44.84, 0.0, 34.06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 62.78, 0.0, 50.11], "audiomae_on_audioset": [[["music", 16.07], ["speech", 14.51], ["hum", 6.83]], null, [["music", 35.37], ["throbbing", 29.57], ["speech", 9.58]], null, null, [["throbbing", 31.82], ["hum", 29.31], ["music", 20.48]], null, [["music", 49.62], ["throbbing", 18.95], ["hum", 10.41]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.12, 1.59, 9.67, -0.23, 0.21, 2.31, 1.85, 23.38, 1.84, 1.24, 1.43, 1.15, 1.63, 1.41, 0.92, 1.11, 1.15, 2.48, 0.52, 3.8]}
|
annotations_filtered/l9c1k_m6POA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.47], [4.0, 4.53], [7.0, 8.34], [12.0, 15.5], [16.0, 17.83], [18.0, 25.64], [27.0, 29.02], [30.0, 30.4], [31.0, 31.62], [32.0, 35.97], [37.0, 40.74], [42.0, 46.36], [52.0, 53.74], [54.0, 55.29], [58.0, 58.95], [69.0, 69.48], [72.0, 73.52], [75.0, 76.2], [79.0, 80.57], [81.0, 83.51], [85.0, 86.31], [89.0, 89.14], [92.0, 92.62], [96.0, 98.32], [101.0, 101.97], [103.0, 109.07], [110.0, 110.46], [115.0, 115.5], [118.0, 121.78], [126.0, 130.54], [132.0, 133.19], [134.0, 147.19], [149.0, 149.61], [155.0, 155.83], [157.0, 163.44], [164.0, 165.01], [166.0, 167.46]], "keep_status": [false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 49.0, 0.0, 35.47, 92.64, 0.0, 0.0, 81.71, 99.95, 85.35, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 72.9, 0.0, 0.0, 0.0, 65.91, 0.0, 83.34, 0.0, 0.0, 99.94, 63.74, 0.0, 96.17, 0.0, 0.0, 89.9, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 41.68], ["synthesizer", 14.41], ["musical instrument", 10.82]], null, [["music", 58.13], ["speech", 26.66], ["thunk", 3.56]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.47, 0.53, 1.34, 3.5, 1.83, 7.64, 2.02, 0.4, 0.62, 3.97, 3.74, 4.36, 1.74, 1.29, 0.95, 0.48, 1.52, 1.2, 1.57, 2.51, 1.31, 0.14, 0.62, 2.32, 0.97, 6.07, 0.46, 0.5, 3.78, 4.54, 1.19, 13.19, 0.61, 0.83, 6.44, 1.01, 1.46]}
|
annotations_filtered/l9k9_K8Tea0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[27.0, 46.72], [49.0, 49.76], [51.0, 52.56], [60.0, 60.34], [65.0, 66.33], [67.0, 67.93], [69.0, 75.69]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [35.33, 0.0, 0.0, 0.0, 0.0, 0.0, 99.97], "audiomae_on_audioset": [[["speech", 72.77], ["music", 12.71], ["sidetone", 2.7]], null, null, null, null, null, null], "duration": [19.72, 0.76, 1.56, 0.34, 1.33, 0.93, 6.69]}
|
annotations_filtered/lAIJ6Twk8aQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.19], [8.0, 8.48], [11.0, 11.91], [16.0, 36.22], [37.0, 52.08], [53.0, 54.16], [56.0, 59.09], [61.0, 61.96], [63.0, 64.81], [67.0, 76.27], [77.0, 78.11], [81.0, 81.84], [83.0, 84.77], [88.0, 88.7], [91.0, 100.2], [102.0, 102.93], [106.0, 112.5], [113.0, 113.88], [117.0, 117.93], [121.0, 121.61], [123.0, 123.13], [125.0, 128.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 65.91, 80.82, 0.0, 99.26, 0.0, 0.0, 99.52, 0.0, 0.0, 0.0, 0.0, 97.92, 0.0, 89.01, 0.0, 0.0, 0.0, 0.0, 98.36], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.19, 0.48, 0.91, 20.22, 15.08, 1.16, 3.09, 0.96, 1.81, 9.27, 1.11, 0.84, 1.77, 0.7, 9.2, 0.93, 6.5, 0.88, 0.93, 0.61, 0.13, 3.76]}
|
annotations_filtered/lAcZxn1DeHs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 12.53], [14.0, 57.25], [59.0, 66.51], [70.0, 84.43]], "keep_status": [true, false, true, true], "silence_prob": [31.8, 0.0, 32.98, 30.72], "audiomae_on_audioset": [[["music", 37.34], ["synthesizer", 22.43], ["didgeridoo", 8.34]], null, [["music", 34.77], ["synthesizer", 16.58], ["speech", 9.15]], [["music", 57.25], ["speech", 7.79], ["trance music", 4.39]]], "duration": [2.53, 43.25, 7.51, 14.43]}
|
annotations_filtered/lAgPsmTxBfc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 12.51], [17.0, 18.66], [21.0, 23.09], [27.0, 27.68], [28.0, 29.08], [31.0, 33.29], [37.0, 37.18], [39.0, 40.86], [45.0, 45.17], [49.0, 50.5], [58.0, 61.28], [72.0, 72.5], [84.0, 84.81], [94.0, 94.44], [94.0, 94.71], [96.0, 98.73], [100.0, 99.93], [110.0, 110.47], [116.0, 116.53], [117.0, 118.0], [119.0, 119.65], [124.0, 124.88], [128.0, 129.12]], "keep_status": [false, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 33.61, 0.0, 0.0, 32.72, 0.0, 0.0, 0.0, 0.0, 54.17, 0.0, 0.0, 0.0, 0.0, 31.85, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["burping, eructation", 29.83], ["cough", 21.97], ["boing", 3.78]], null, null, [["boing", 31.77], ["speech", 19.78], ["didgeridoo", 8.49]], null, null, null, null, null, null, null, null, null, [["speech", 57.7], ["laughter", 14.41], ["fart", 3.73]], null, null, null, null, null, null, null], "duration": [0.51, 1.66, 2.09, 0.68, 1.08, 2.29, 0.18, 1.86, 0.17, 1.5, 3.28, 0.5, 0.81, 0.44, 0.71, 2.73, -0.07, 0.47, 0.53, 1.0, 0.65, 0.88, 1.12]}
|
annotations_filtered/lAhQbCN-Zvg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.02], [5.0, 5.49], [8.0, 8.95], [11.0, 13.78], [15.0, 14.96], [16.0, 16.88], [18.0, 18.47], [19.0, 19.85], [21.0, 22.15], [23.0, 24.6], [28.0, 27.99], [29.0, 32.59], [33.0, 34.5], [38.0, 39.24], [40.0, 40.91], [47.0, 47.38], [62.0, 63.17], [68.0, 68.64], [72.0, 73.35], [77.0, 78.65], [88.0, 91.84], [94.0, 97.04], [98.0, 99.2], [100.0, 101.12], [103.0, 105.48], [106.0, 107.99], [111.0, 115.2]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 0.0, 0.0, 100.0, 0.0, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.02, 0.49, 0.95, 2.78, -0.04, 0.88, 0.47, 0.85, 1.15, 1.6, -0.01, 3.59, 1.5, 1.24, 0.91, 0.38, 1.17, 0.64, 1.35, 1.65, 3.84, 3.04, 1.2, 1.12, 2.48, 1.99, 4.2]}
|