Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/j-47cwN0w_c_filtered.json +1 -0
- annotations_filtered/j-7pVks8avo_filtered.json +1 -0
- annotations_filtered/j-OcaLECz1k_filtered.json +1 -0
- annotations_filtered/j-TPDJFWErg_filtered.json +1 -0
- annotations_filtered/j-V12tL78Mc_filtered.json +1 -0
- annotations_filtered/j-dYZPMpoqI_filtered.json +1 -0
- annotations_filtered/j-v6XtJFNQE_filtered.json +1 -0
- annotations_filtered/j0IXQIUh3jQ_filtered.json +1 -0
- annotations_filtered/j0c_RQDfjSM_filtered.json +1 -0
- annotations_filtered/j0cqqCpIZHE_filtered.json +1 -0
- annotations_filtered/j0iplsU1qa4_filtered.json +1 -0
- annotations_filtered/j0sbjGj7ONo_filtered.json +1 -0
- annotations_filtered/j0silSyYFPM_filtered.json +1 -0
- annotations_filtered/j0z0V2JJ5II_filtered.json +1 -0
- annotations_filtered/j19-hpjJ4ok_filtered.json +1 -0
- annotations_filtered/j1C0Tw80Fgk_filtered.json +1 -0
- annotations_filtered/j1VL-y9JHuI_filtered.json +1 -0
- annotations_filtered/j1q-QWHUU0g_filtered.json +1 -0
- annotations_filtered/j1tXIl0snEk_filtered.json +1 -0
- annotations_filtered/j1tkwdfz7n4_filtered.json +1 -0
- annotations_filtered/j21idqW08wU_filtered.json +1 -0
- annotations_filtered/j2JFTz9KQhk_filtered.json +1 -0
- annotations_filtered/j2MbvFYy_8Y_filtered.json +1 -0
- annotations_filtered/j2SPawJewxA_filtered.json +1 -0
- annotations_filtered/j2ZsEQ4Fr4c_filtered.json +1 -0
- annotations_filtered/j2aGGNQW_7M_filtered.json +1 -0
- annotations_filtered/j2e41FeccuA_filtered.json +1 -0
- annotations_filtered/j32LbrHGak0_filtered.json +1 -0
- annotations_filtered/j38t2lDi4GU_filtered.json +1 -0
- annotations_filtered/j3MZdcbv-ew_filtered.json +1 -0
- annotations_filtered/j3d3mrWBTpM_filtered.json +1 -0
- annotations_filtered/j40IcG_BZuc_filtered.json +1 -0
- annotations_filtered/j42TrAVceCI_filtered.json +1 -0
- annotations_filtered/j477dAxaeck_filtered.json +1 -0
- annotations_filtered/j4onAJ-3FAM_filtered.json +1 -0
- annotations_filtered/j4ujHOSbQB0_filtered.json +1 -0
- annotations_filtered/j4yXEmQRq34_filtered.json +1 -0
- annotations_filtered/j5B70NEq_fY_filtered.json +1 -0
- annotations_filtered/j5Fd6TqePnk_filtered.json +1 -0
- annotations_filtered/j638xTM36I8_filtered.json +1 -0
- annotations_filtered/j66Fsl_q5Ig_filtered.json +1 -0
- annotations_filtered/j6_umKYN_JU_filtered.json +1 -0
- annotations_filtered/j6gLJ4_sfG8_filtered.json +1 -0
- annotations_filtered/j6oBbBfhgYE_filtered.json +1 -0
- annotations_filtered/j6oHprwdTeA_filtered.json +1 -0
- annotations_filtered/j6qjibwpEzM_filtered.json +1 -0
- annotations_filtered/j71oHN1i2pU_filtered.json +1 -0
- annotations_filtered/j7O-SUEh-54_filtered.json +1 -0
- annotations_filtered/j7PgnjEiMcA_filtered.json +1 -0
- annotations_filtered/j7m47I9BuuY_filtered.json +1 -0
annotations_filtered/j-47cwN0w_c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 10.23], [13.0, 15.7], [17.0, 19.74], [21.0, 24.26], [25.0, 30.0], [32.0, 37.56], [39.0, 80.72], [81.0, 83.93], [85.0, 84.94]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [98.59, 99.1, 99.21, 92.31, 94.37, 97.11, 0.0, 51.39, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null], "duration": [5.23, 2.7, 2.74, 3.26, 5.0, 5.56, 41.72, 2.93, -0.06]}
|
annotations_filtered/j-7pVks8avo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 10.42], [16.0, 16.11], [38.0, 38.35], [41.0, 40.88], [47.0, 47.71], [51.0, 52.22], [57.0, 57.01], [63.0, 63.95], [66.0, 66.7], [67.0, 67.49], [77.0, 77.94], [84.0, 84.82], [132.0, 132.09], [133.0, 133.44], [144.0, 145.03], [146.0, 147.39], [148.0, 152.31]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 37.78], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 30.47], ["radio", 24.87], ["speech", 19.64]]], "duration": [0.42, 0.11, 0.35, -0.12, 0.71, 1.22, 0.01, 0.95, 0.7, 0.49, 0.94, 0.82, 0.09, 0.44, 1.03, 1.39, 4.31]}
|
annotations_filtered/j-OcaLECz1k_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.5], [9.0, 10.47], [12.0, 12.21], [15.0, 17.39], [24.0, 26.28], [31.0, 34.38], [39.0, 40.42], [42.0, 43.31], [49.0, 50.13], [54.0, 55.63], [56.0, 57.92], [59.0, 59.86], [63.0, 66.07], [67.0, 69.36], [71.0, 72.44]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 99.97, 96.66, 85.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.5, 1.47, 0.21, 2.39, 2.28, 3.38, 1.42, 1.31, 1.13, 1.63, 1.92, 0.86, 3.07, 2.36, 1.44]}
|
annotations_filtered/j-TPDJFWErg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.23], [5.0, 8.53], [12.0, 12.51], [14.0, 55.49], [57.0, 62.99], [63.0, 64.15], [65.0, 70.8], [71.0, 72.03], [73.0, 74.29], [75.0, 81.58], [84.0, 92.01], [96.0, 120.68], [123.0, 136.92], [138.0, 138.38], [141.0, 141.17], [143.0, 145.2], [146.0, 146.5], [147.0, 148.44], [149.0, 151.01], [151.0, 152.79], [153.0, 160.17], [162.0, 163.51], [164.0, 167.59], [168.0, 170.78], [171.0, 172.52], [173.0, 174.04]], "keep_status": [false, false, false, false, true, false, true, false, false, true, true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 34.81, 0.0, 0.0, 33.3, 0.0, 34.98, 0.0, 0.0, 34.14, 32.26, 34.07, 33.98, 0.0, 0.0, 63.1, 0.0, 0.0, 74.44, 0.0, 51.6, 0.0, 63.64, 79.24, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 58.52], ["speech", 21.63], ["animal", 1.21]], null, null, [["music", 27.81], ["noise", 14.59], ["theremin", 8.23]], null, [["speech", 22.75], ["music", 19.09], ["noise", 10.49]], null, null, [["music", 17.3], ["noise", 16.54], ["speech", 13.02]], [["music", 38.99], ["smash, crash", 14.07], ["whack, thwack", 9.83]], [["music", 54.57], ["speech", 20.23], ["throbbing", 8.14]], [["music", 41.43], ["throbbing", 11.01], ["hum", 10.99]], null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.23, 3.53, 0.51, 41.49, 5.99, 1.15, 5.8, 1.03, 1.29, 6.58, 8.01, 24.68, 13.92, 0.38, 0.17, 2.2, 0.5, 1.44, 2.01, 1.79, 7.17, 1.51, 3.59, 2.78, 1.52, 1.04]}
|
annotations_filtered/j-V12tL78Mc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[22.0, 22.77], [24.0, 44.98], [46.0, 68.12], [74.0, 78.36], [80.0, 82.7], [87.0, 87.27], [88.0, 89.02], [90.0, 91.44], [93.0, 93.24], [95.0, 96.33], [97.0, 100.95], [106.0, 107.7], [108.0, 109.0], [110.0, 120.06], [120.0, 120.11], [121.0, 121.42]], "keep_status": [false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 66.63, 70.86, 36.02, 85.72, 0.0, 0.0, 0.0, 0.0, 0.0, 59.33, 0.0, 0.0, 62.17, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 16.24], ["speech", 14.0], ["hum", 13.38]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.77, 20.98, 22.12, 4.36, 2.7, 0.27, 1.02, 1.44, 0.24, 1.33, 3.95, 1.7, 1.0, 10.06, 0.11, 0.42]}
|
annotations_filtered/j-dYZPMpoqI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[22.0, 26.45], [28.0, 32.97], [35.0, 43.11], [44.0, 43.95], [44.0, 177.79], [178.0, 184.27], [185.0, 186.19], [188.0, 188.67]], "keep_status": [false, true, true, false, false, false, false, false], "silence_prob": [28.49, 32.58, 28.54, 0.0, 0.0, 86.82, 0.0, 0.0], "audiomae_on_audioset": [[["music", 45.8], ["throbbing", 20.37], ["smash, crash", 8.71]], [["hum", 27.42], ["mains hum", 23.46], ["music", 16.45]], [["music", 58.54], ["cacophony", 5.39], ["smash, crash", 4.97]], null, null, null, null, null], "duration": [4.45, 4.97, 8.11, -0.05, 133.79, 6.27, 1.19, 0.67]}
|
annotations_filtered/j-v6XtJFNQE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.44], [13.0, 16.24], [18.0, 19.57], [21.0, 24.0], [27.0, 27.45], [31.0, 32.66], [35.0, 38.6], [40.0, 40.42], [42.0, 41.67], [46.0, 46.8], [50.0, 52.24], [55.0, 81.06], [82.0, 83.37], [85.0, 86.71], [87.0, 87.72], [90.0, 90.69], [91.0, 91.86], [93.0, 94.41], [99.0, 118.74]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 89.54, 0.0, 94.22, 0.0, 0.0, 99.1, 0.0, 0.0, 0.0, 60.32, 41.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 94.66], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["speech", 41.68], ["explosion", 14.41], ["fly, housefly", 7.36]], null, null, null, null, null, null, null], "duration": [0.44, 3.24, 1.57, 3.0, 0.45, 1.66, 3.6, 0.42, -0.33, 0.8, 2.24, 26.06, 1.37, 1.71, 0.72, 0.69, 0.86, 1.41, 19.74]}
|
annotations_filtered/j0IXQIUh3jQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/j0c_RQDfjSM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.82], [6.0, 6.1], [14.0, 13.91], [18.0, 18.57], [23.0, 23.28], [24.0, 24.95], [35.0, 35.65], [38.0, 38.97], [42.0, 42.77], [46.0, 47.56], [52.0, 52.51], [57.0, 57.18], [63.0, 63.04], [63.0, 63.91], [65.0, 66.43], [68.0, 68.96], [69.0, 81.09], [82.0, 83.37], [84.0, 84.94], [88.0, 88.96], [93.0, 100.36], [102.0, 108.38], [110.0, 121.05], [121.0, 121.17], [121.0, 136.71]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 29.63, 0.0, 0.0, 0.0, 29.35, 29.12, 29.18, 0.0, 30.58], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 88.64], ["boing", 4.79], ["speech", 1.66]], null, null, null, [["music", 82.08], ["fly, housefly", 2.34], ["sidetone", 1.7]], [["music", 50.23], ["didgeridoo", 15.0], ["hum", 3.49]], [["music", 62.88], ["electric shaver, electric razor", 9.88], ["electronic music", 3.1]], null, [["music", 88.33], ["throbbing", 1.83], ["speech", 1.33]]], "duration": [1.82, 0.1, -0.09, 0.57, 0.28, 0.95, 0.65, 0.97, 0.77, 1.56, 0.51, 0.18, 0.04, 0.91, 1.43, 0.96, 12.09, 1.37, 0.94, 0.96, 7.36, 6.38, 11.05, 0.17, 15.71]}
|
annotations_filtered/j0cqqCpIZHE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.4], [6.0, 7.32], [11.0, 11.77], [26.0, 25.89], [26.0, 26.0], [26.0, 26.18], [26.0, 26.22], [26.0, 26.45], [26.0, 27.18], [27.0, 27.23], [28.0, 28.59], [30.0, 30.18], [32.0, 33.52], [35.0, 35.34], [40.0, 44.71], [45.0, 46.23], [48.0, 48.51], [50.0, 50.23], [54.0, 54.55], [60.0, 67.51], [69.0, 70.73], [72.0, 72.93], [75.0, 77.4], [79.0, 81.04], [88.0, 88.48], [89.0, 90.41], [97.0, 97.56], [98.0, 98.36], [99.0, 99.84], [115.0, 115.32], [118.0, 118.84], [120.0, 120.51], [122.0, 123.74], [125.0, 126.1], [127.0, 128.46], [135.0, 136.81], [138.0, 140.12], [141.0, 143.63], [144.0, 147.7], [149.0, 150.04]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.35, 0.0, 0.0, 0.0, 0.0, 30.6, 0.0, 0.0, 40.55, 37.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 43.1, 57.89, 50.21, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 56.82], ["mains hum", 8.46], ["hum", 6.42]], null, null, null, null, [["speech", 67.69], ["music", 12.28], ["breaking", 2.32]], null, null, [["music", 55.71], ["didgeridoo", 8.39], ["hum", 6.08]], [["music", 35.96], ["musical instrument", 5.33], ["vehicle", 3.29]], null, null, null, null, null, null, null, null, null, null, null, null, [["music", 37.76], ["speech", 24.3], ["theremin", 16.26]], null, null, null], "duration": [0.4, 1.32, 0.77, -0.11, 0.0, 0.18, 0.22, 0.45, 1.18, 0.23, 0.59, 0.18, 1.52, 0.34, 4.71, 1.23, 0.51, 0.23, 0.55, 7.51, 1.73, 0.93, 2.4, 2.04, 0.48, 1.41, 0.56, 0.36, 0.84, 0.32, 0.84, 0.51, 1.74, 1.1, 1.46, 1.81, 2.12, 2.63, 3.7, 1.04]}
|
annotations_filtered/j0iplsU1qa4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 5.04], [7.0, 7.25], [8.0, 8.68], [13.0, 14.0], [21.0, 21.47], [24.0, 24.54], [26.0, 30.47], [32.0, 34.74], [36.0, 45.22], [50.0, 50.35], [52.0, 53.16], [56.0, 58.01], [59.0, 60.79], [61.0, 62.68], [68.0, 68.39], [73.0, 73.41], [77.0, 80.32], [81.0, 81.19], [86.0, 86.68], [88.0, 88.57], [90.0, 97.31], [98.0, 102.96], [103.0, 104.21], [108.0, 108.87], [114.0, 114.35], [115.0, 119.45], [120.0, 120.97], [123.0, 126.47], [128.0, 129.83], [132.0, 133.69], [135.0, 137.15], [138.0, 138.97], [140.0, 141.24], [145.0, 145.94], [152.0, 153.55], [155.0, 155.88], [157.0, 157.96], [159.0, 159.92], [164.0, 164.73]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, true, false, false, false, false, true, false, false, false, true, true, false, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.22, 53.1, 28.98, 0.0, 0.0, 33.19, 0.0, 0.0, 0.0, 0.0, 30.42, 0.0, 0.0, 0.0, 30.69, 30.38, 0.0, 0.0, 0.0, 28.81, 0.0, 34.07, 0.0, 0.0, 31.86, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["hum", 32.17], ["music", 16.16], ["throbbing", 13.5]], null, [["hum", 46.09], ["mains hum", 17.32], ["whale vocalization", 8.72]], null, null, [["music", 14.26], ["hum", 13.45], ["mains hum", 9.15]], null, null, null, null, [["hum", 35.76], ["mains hum", 24.92], ["throbbing", 6.9]], null, null, null, [["hum", 29.61], ["fly, housefly", 11.37], ["bee, wasp, etc.", 10.02]], [["bee, wasp, etc.", 36.47], ["insect", 16.69], ["fly, housefly", 14.32]], null, null, null, [["speech", 63.13], ["music", 5.4], ["rumble", 3.59]], null, [["music", 18.39], ["hum", 15.08], ["speech", 11.99]], null, null, [["noise", 29.61], ["white noise", 10.44], ["hum", 8.41]], null, null, null, null, null, null, null, null], "duration": [1.04, 0.25, 0.68, 1.0, 0.47, 0.54, 4.47, 2.74, 9.22, 0.35, 1.16, 2.01, 1.79, 1.68, 0.39, 0.41, 3.32, 0.19, 0.68, 0.57, 7.31, 4.96, 1.21, 0.87, 0.35, 4.45, 0.97, 3.47, 1.83, 1.69, 2.15, 0.97, 1.24, 0.94, 1.55, 0.88, 0.96, 0.92, 0.73]}
|
annotations_filtered/j0sbjGj7ONo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 11.62], [14.0, 20.04], [21.0, 21.88], [30.0, 38.55], [40.0, 40.37], [42.0, 51.48], [52.0, 61.67], [63.0, 63.85], [65.0, 74.12], [75.0, 76.33], [79.0, 80.82], [82.0, 83.78], [85.0, 87.49], [88.0, 88.99], [91.0, 95.59], [97.0, 98.64], [104.0, 112.02], [113.0, 115.99], [117.0, 119.45], [123.0, 127.04]], "keep_status": [false, false, false, true, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [57.72, 57.89, 0.0, 48.91, 0.0, 42.17, 39.09, 0.0, 32.96, 0.0, 0.0, 0.0, 79.76, 0.0, 69.47, 0.0, 81.17, 58.81, 70.72, 67.25], "audiomae_on_audioset": [null, null, null, [["music", 32.16], ["speech", 12.31], ["fly, housefly", 11.53]], null, [["speech", 30.92], ["music", 25.19], ["insect", 8.66]], [["music", 33.78], ["speech", 25.99], ["theremin", 11.72]], null, [["gunshot, gunfire", 12.5], ["thunk", 10.18], ["music", 8.19]], null, null, null, null, null, null, null, null, null, null, null], "duration": [8.62, 6.04, 0.88, 8.55, 0.37, 9.48, 9.67, 0.85, 9.12, 1.33, 1.82, 1.78, 2.49, 0.99, 4.59, 1.64, 8.02, 2.99, 2.45, 4.04]}
|
annotations_filtered/j0silSyYFPM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.95], [11.0, 13.81], [17.0, 17.44], [22.0, 22.82], [26.0, 28.11], [31.0, 31.16], [34.0, 35.77], [39.0, 42.47], [43.0, 46.48], [50.0, 50.75], [53.0, 54.19], [58.0, 59.64], [63.0, 65.25], [66.0, 66.73], [70.0, 70.46], [74.0, 74.01], [75.0, 76.49], [78.0, 79.15], [82.0, 82.21], [85.0, 86.75], [92.0, 97.88], [99.0, 102.51], [104.0, 104.82], [106.0, 108.41]], "keep_status": [false, false, false, false, true, false, false, true, false, false, false, false, true, false, false, false, false, false, false, false, true, true, false, false], "silence_prob": [41.93, 39.27, 0.0, 0.0, 42.67, 0.0, 0.0, 44.52, 39.07, 0.0, 0.0, 0.0, 45.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.5, 43.4, 0.0, 36.77], "audiomae_on_audioset": [[["theremin", 39.84], ["music", 25.84], ["hum", 6.3]], [["music", 56.47], ["hum", 7.86], ["mains hum", 6.01]], null, null, [["hum", 14.32], ["music", 10.87], ["throbbing", 8.56]], null, null, [["music", 33.73], ["hum", 16.43], ["mains hum", 6.03]], [["hum", 44.41], ["mains hum", 28.82], ["whale vocalization", 4.04]], null, null, null, [["music", 40.12], ["hum", 10.22], ["didgeridoo", 9.89]], null, null, null, null, null, null, null, [["music", 38.09], ["singing bowl", 12.06], ["speech", 8.22]], [["theremin", 41.14], ["music", 21.26], ["hum", 7.34]], null, [["speech", 45.79], ["didgeridoo", 24.99], ["music", 8.53]]], "duration": [2.95, 2.81, 0.44, 0.82, 2.11, 0.16, 1.77, 3.47, 3.48, 0.75, 1.19, 1.64, 2.25, 0.73, 0.46, 0.01, 1.49, 1.15, 0.21, 1.75, 5.88, 3.51, 0.82, 2.41]}
|
annotations_filtered/j0z0V2JJ5II_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.32], [1.0, 1.36], [1.0, 1.39], [1.0, 1.43], [2.0, 1.53], [3.0, 13.76], [17.0, 18.12], [21.0, 21.32], [26.0, 44.07], [46.0, 54.48], [56.0, 70.04], [71.0, 73.8], [76.0, 79.56], [80.0, 94.05], [95.0, 101.24], [108.0, 110.78], [113.0, 121.59]], "keep_status": [false, false, false, false, false, true, false, false, true, false, false, false, false, false, true, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 33.76, 0.0, 0.0, 38.6, 31.11, 35.96, 40.9, 63.1, 38.49, 33.13, 30.14, 30.44], "audiomae_on_audioset": [null, null, null, null, null, [["music", 24.18], ["sidetone", 22.43], ["throbbing", 14.22]], null, null, [["hum", 27.49], ["throbbing", 16.24], ["mains hum", 11.34]], [["music", 49.98], ["noise", 20.77], ["grunt", 6.91]], [["grunt", 49.69], ["noise", 19.04], ["groan", 16.39]], [["sidetone", 49.17], ["speech", 26.68], ["radio", 6.35]], null, [["insect", 40.06], ["fly, housefly", 28.57], ["music", 9.94]], [["music", 49.71], ["theremin", 6.06], ["musical instrument", 4.44]], [["cattle, bovinae", 38.42], ["moo", 31.73], ["speech", 9.58]], [["speech", 46.58], ["electric shaver, electric razor", 10.67], ["hum", 10.19]]], "duration": [0.32, 0.36, 0.39, 0.43, -0.47, 10.76, 1.12, 0.32, 18.07, 8.48, 14.04, 2.8, 3.56, 14.05, 6.24, 2.78, 8.59]}
|
annotations_filtered/j19-hpjJ4ok_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 10.94], [14.0, 14.77], [16.0, 17.44], [18.0, 18.3], [20.0, 20.51], [21.0, 21.81], [26.0, 27.99], [33.0, 33.18], [36.0, 36.31], [37.0, 37.72], [39.0, 40.36], [41.0, 46.63], [48.0, 50.14], [60.0, 61.75], [62.0, 63.9], [71.0, 72.13], [74.0, 77.52], [81.0, 81.23], [82.0, 82.7], [86.0, 86.49], [89.0, 89.43], [90.0, 90.8], [94.0, 95.42], [97.0, 97.77], [98.0, 99.08], [126.0, 128.22], [132.0, 132.41]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 98.99, 84.25, 0.0, 0.0, 0.0, 98.51, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 37.75, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["whale vocalization", 65.88], ["fly, housefly", 4.66], ["insect", 4.28]], null], "duration": [0.94, 0.77, 1.44, 0.3, 0.51, 0.81, 1.99, 0.18, 0.31, 0.72, 1.36, 5.63, 2.14, 1.75, 1.9, 1.13, 3.52, 0.23, 0.7, 0.49, 0.43, 0.8, 1.42, 0.77, 1.08, 2.22, 0.41]}
|
annotations_filtered/j1C0Tw80Fgk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 2.76], [8.0, 8.34], [10.0, 10.86], [12.0, 13.39], [24.0, 25.27], [26.0, 29.61], [37.0, 39.28], [40.0, 41.96], [44.0, 44.66], [45.0, 46.35], [50.0, 50.43], [53.0, 55.73], [57.0, 58.36], [68.0, 74.83], [75.0, 75.86], [77.0, 77.89], [78.0, 83.03], [88.0, 87.98], [93.0, 92.64], [94.0, 98.49], [103.0, 104.53], [111.0, 111.4], [120.0, 120.78], [133.0, 134.87]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 85.9, 99.44, 0.0, 0.0, 0.0, 0.0, 42.96, 0.0, 61.67, 0.0, 0.0, 43.35, 0.0, 0.0, 64.75, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["chirp tone", 28.93], ["dial tone", 26.59], ["sine wave", 9.95]], null, null, null, null, [["speech", 83.12], ["telephone", 2.36], ["inside, small room", 2.01]], null, null, null, null, null, null, null], "duration": [-0.24, 0.34, 0.86, 1.39, 1.27, 3.61, 2.28, 1.96, 0.66, 1.35, 0.43, 2.73, 1.36, 6.83, 0.86, 0.89, 5.03, -0.02, -0.36, 4.49, 1.53, 0.4, 0.78, 1.87]}
|
annotations_filtered/j1VL-y9JHuI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 16.43], [23.0, 29.08], [35.0, 37.56], [43.0, 46.47], [47.0, 48.2], [57.0, 56.79], [58.0, 58.48], [64.0, 63.91], [65.0, 70.09], [74.0, 74.49], [76.0, 79.79], [84.0, 96.11], [99.0, 99.77], [105.0, 105.73], [107.0, 109.59], [110.0, 110.84], [116.0, 116.13], [117.0, 121.1], [125.0, 138.23]], "keep_status": [false, true, false, false, false, false, false, false, true, false, true, true, false, false, true, false, false, true, false], "silence_prob": [0.0, 30.96, 30.46, 30.49, 0.0, 0.0, 0.0, 0.0, 31.88, 0.0, 30.36, 29.44, 0.0, 0.0, 30.4, 0.0, 0.0, 31.08, 32.92], "audiomae_on_audioset": [null, [["music", 60.55], ["musical instrument", 4.89], ["fireworks", 3.62]], [["music", 50.83], ["musical instrument", 21.31], ["drum", 9.16]], [["music", 70.29], ["musical instrument", 2.17], ["mains hum", 1.91]], null, null, null, null, [["music", 19.08], ["fly, housefly", 17.55], ["insect", 10.62]], null, [["music", 42.99], ["synthetic singing", 4.08], ["speech", 3.75]], [["explosion", 24.34], ["burst, pop", 19.28], ["speech", 16.65]], null, null, [["music", 34.11], ["mains hum", 11.84], ["hum", 9.15]], null, null, [["music", 57.71], ["didgeridoo", 5.42], ["musical instrument", 4.01]], [["music", 70.23], ["vocal music", 5.36], ["singing", 3.98]]], "duration": [0.43, 6.08, 2.56, 3.47, 1.2, -0.21, 0.48, -0.09, 5.09, 0.49, 3.79, 12.11, 0.77, 0.73, 2.59, 0.84, 0.13, 4.1, 13.23]}
|
annotations_filtered/j1q-QWHUU0g_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.67], [12.0, 26.25], [29.0, 33.81], [36.0, 37.39], [41.0, 44.09], [45.0, 46.13], [53.0, 52.74], [56.0, 56.59], [58.0, 58.99], [61.0, 62.72], [64.0, 64.47], [64.0, 64.57], [65.0, 64.61], [69.0, 69.6], [74.0, 74.55], [76.0, 77.57], [81.0, 80.96], [81.0, 81.75], [88.0, 88.84], [91.0, 92.58], [94.0, 95.77], [96.0, 97.26], [107.0, 107.86], [110.0, 111.15], [117.0, 118.99], [131.0, 133.05], [141.0, 141.02], [142.0, 143.33], [144.0, 146.53]], "keep_status": [false, true, true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 34.11, 32.04, 0.0, 32.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.41, 0.0, 0.0, 35.69], "audiomae_on_audioset": [null, [["whale vocalization", 22.81], ["music", 14.93], ["theremin", 9.51]], [["music", 43.4], ["speech", 13.54], ["crowd", 3.96]], null, [["music", 12.62], ["boat, water vehicle", 10.73], ["foghorn", 7.34]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["theremin", 60.68], ["music", 22.9], ["musical instrument", 2.42]], null, null, [["music", 51.15], ["theremin", 30.79], ["musical instrument", 4.88]]], "duration": [0.67, 14.25, 4.81, 1.39, 3.09, 1.13, -0.26, 0.59, 0.99, 1.72, 0.47, 0.57, -0.39, 0.6, 0.55, 1.57, -0.04, 0.75, 0.84, 1.58, 1.77, 1.26, 0.86, 1.15, 1.99, 2.05, 0.02, 1.33, 2.53]}
|
annotations_filtered/j1tXIl0snEk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[20.0, 25.95], [27.0, 28.29], [30.0, 30.65], [33.0, 34.35], [36.0, 36.75], [39.0, 45.69], [47.0, 48.17], [56.0, 57.4], [73.0, 77.48], [83.0, 83.83], [100.0, 101.58], [105.0, 116.6], [118.0, 121.09]], "keep_status": [false, false, false, false, false, true, false, false, true, false, false, true, true], "silence_prob": [29.46, 0.0, 0.0, 0.0, 0.0, 29.41, 0.0, 0.0, 28.69, 0.0, 0.0, 30.37, 30.08], "audiomae_on_audioset": [[["hum", 55.57], ["throbbing", 19.32], ["mains hum", 13.19]], null, null, null, null, [["hum", 28.02], ["music", 20.58], ["throbbing", 17.62]], null, null, [["hum", 35.29], ["mains hum", 22.22], ["music", 9.84]], null, null, [["music", 34.68], ["electronic music", 8.95], ["speech", 6.93]], [["hum", 36.85], ["ambient music", 17.32], ["music", 12.99]]], "duration": [5.95, 1.29, 0.65, 1.35, 0.75, 6.69, 1.17, 1.4, 4.48, 0.83, 1.58, 11.6, 3.09]}
|
annotations_filtered/j1tkwdfz7n4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 18.72], [19.0, 20.7], [25.0, 26.96], [28.0, 27.95], [29.0, 30.23], [33.0, 33.34], [34.0, 35.29], [43.0, 43.92], [44.0, 57.05], [88.0, 92.28], [95.0, 94.74], [101.0, 103.25], [114.0, 116.55], [117.0, 120.78], [121.0, 128.75]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [32.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.25, 33.58, 0.0, 38.86, 34.68, 33.83, 35.33], "audiomae_on_audioset": [[["music", 33.44], ["thunk", 16.36], ["whack, thwack", 11.28]], null, null, null, null, null, null, null, [["music", 79.11], ["speech", 7.67], ["musical instrument", 2.79]], [["music", 61.02], ["musical instrument", 8.02], ["synthesizer", 4.96]], null, [["music", 71.26], ["musical instrument", 7.49], ["didgeridoo", 3.7]], [["music", 50.37], ["didgeridoo", 5.69], ["hum", 3.72]], [["music", 73.65], ["speech", 9.2], ["musical instrument", 2.96]], [["music", 69.34], ["throbbing", 6.25], ["hum", 4.3]]], "duration": [2.72, 1.7, 1.96, -0.05, 1.23, 0.34, 1.29, 0.92, 13.05, 4.28, -0.26, 2.25, 2.55, 3.78, 7.75]}
|
annotations_filtered/j21idqW08wU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[14.0, 33.86], [35.0, 35.4], [40.0, 46.55], [50.0, 50.79], [52.0, 61.74], [64.0, 70.83], [73.0, 86.04], [101.0, 114.15], [115.0, 115.57], [117.0, 122.72], [128.0, 128.39], [129.0, 130.15], [134.0, 135.38], [136.0, 137.0], [141.0, 141.02], [142.0, 146.31], [152.0, 156.84], [161.0, 161.97], [167.0, 172.66], [173.0, 174.9]], "keep_status": [false, false, true, false, false, true, false, true, false, true, false, false, false, false, false, true, false, false, true, false], "silence_prob": [32.22, 0.0, 32.6, 0.0, 34.22, 33.96, 34.56, 33.99, 0.0, 34.91, 0.0, 0.0, 0.0, 0.0, 0.0, 36.75, 63.64, 0.0, 35.58, 0.0], "audiomae_on_audioset": [[["music", 44.84], ["speech", 19.84], ["throbbing", 10.25]], null, [["music", 37.8], ["speech", 26.58], ["hum", 4.77]], null, [["music", 51.83], ["speech", 21.56], ["hum", 5.21]], [["music", 37.02], ["hum", 18.68], ["mains hum", 12.15]], [["hum", 38.0], ["throbbing", 18.42], ["music", 18.25]], [["speech", 32.13], ["buzz", 25.72], ["vehicle", 9.6]], null, [["hum", 22.53], ["music", 22.48], ["mains hum", 21.9]], null, null, null, null, null, [["speech", 40.96], ["music", 17.52], ["hum", 6.61]], null, null, [["speech", 57.15], ["music", 7.3], ["rumble", 3.78]], null], "duration": [19.86, 0.4, 6.55, 0.79, 9.74, 6.83, 13.04, 13.15, 0.57, 5.72, 0.39, 1.15, 1.38, 1.0, 0.02, 4.31, 4.84, 0.97, 5.66, 1.9]}
|
annotations_filtered/j2JFTz9KQhk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 12.85], [14.0, 32.04], [37.0, 38.26], [40.0, 47.98], [50.0, 51.44], [53.0, 63.09], [63.0, 72.32], [73.0, 73.28], [74.0, 74.26], [77.0, 83.13], [84.0, 85.21], [86.0, 88.89], [90.0, 93.21], [94.0, 94.24], [96.0, 98.51], [99.0, 99.13], [101.0, 101.21], [103.0, 102.76], [109.0, 109.29], [123.0, 124.5], [129.0, 130.23], [133.0, 132.68], [137.0, 137.13], [138.0, 139.68]], "keep_status": [false, true, false, true, false, true, true, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 34.72, 0.0, 34.88, 0.0, 32.09, 30.54, 0.0, 0.0, 32.85, 0.0, 30.19, 31.94, 0.0, 81.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 55.43], ["throbbing", 8.11], ["hum", 5.67]], null, [["music", 23.4], ["throbbing", 15.32], ["hum", 5.86]], null, [["fly, housefly", 29.84], ["insect", 18.46], ["mosquito", 17.36]], [["hum", 23.12], ["speech", 20.18], ["music", 19.81]], null, null, [["hum", 49.85], ["music", 19.37], ["throbbing", 17.11]], null, [["hum", 27.64], ["music", 15.93], ["throbbing", 15.17]], [["vehicle", 17.77], ["music", 16.51], ["hum", 10.52]], null, null, null, null, null, null, null, null, null, null, null], "duration": [0.85, 18.04, 1.26, 7.98, 1.44, 10.09, 9.32, 0.28, 0.26, 6.13, 1.21, 2.89, 3.21, 0.24, 2.51, 0.13, 0.21, -0.24, 0.29, 1.5, 1.23, -0.32, 0.13, 1.68]}
|
annotations_filtered/j2MbvFYy_8Y_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.86], [11.0, 11.23], [13.0, 13.32], [15.0, 14.91], [19.0, 22.06], [26.0, 26.81], [29.0, 29.73], [31.0, 48.03], [48.0, 48.44], [50.0, 49.92], [54.0, 58.48], [64.0, 65.43], [67.0, 67.42], [69.0, 69.74], [73.0, 75.02], [76.0, 76.79], [77.0, 78.19], [81.0, 82.22], [83.0, 83.86], [86.0, 91.67], [94.0, 98.02], [99.0, 99.57], [101.0, 101.43], [102.0, 102.41], [104.0, 105.19], [119.0, 119.31], [120.0, 121.04], [122.0, 123.08], [125.0, 141.0], [144.0, 155.76]], "keep_status": [false, false, false, false, true, false, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 38.71, 0.0, 0.0, 30.07, 0.0, 0.0, 41.62, 0.0, 0.0, 0.0, 47.12, 0.0, 0.0, 0.0, 0.0, 73.06, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 31.68, 32.52], "audiomae_on_audioset": [null, null, null, null, [["music", 51.0], ["musical instrument", 7.89], ["percussion", 4.2]], null, null, [["music", 27.78], ["noise", 15.02], ["mains hum", 9.2]], null, null, [["breaking", 22.9], ["crushing", 14.49], ["thunk", 12.59]], null, null, null, [["sidetone", 46.67], ["speech", 41.12], ["echo", 1.99]], null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 67.85], ["musical instrument", 6.8], ["effects unit", 2.0]], [["music", 62.93], ["speech", 11.34], ["guitar", 3.87]]], "duration": [0.86, 0.23, 0.32, -0.09, 3.06, 0.81, 0.73, 17.03, 0.44, -0.08, 4.48, 1.43, 0.42, 0.74, 2.02, 0.79, 1.19, 1.22, 0.86, 5.67, 4.02, 0.57, 0.43, 0.41, 1.19, 0.31, 1.04, 1.08, 16.0, 11.76]}
|
annotations_filtered/j2SPawJewxA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[53.0, 74.93], [76.0, 115.6], [117.0, 119.15], [121.0, 120.83]], "keep_status": [false, false, true, false], "silence_prob": [38.45, 0.0, 35.93, 0.0], "audiomae_on_audioset": [[["music", 52.92], ["speech", 13.5], ["fart", 11.26]], null, [["speech", 14.62], ["music", 11.49], ["crowd", 8.18]], null], "duration": [21.93, 39.6, 2.15, -0.17]}
|
annotations_filtered/j2ZsEQ4Fr4c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 8.19], [41.0, 40.78], [73.0, 73.43], [77.0, 78.02], [79.0, 80.74], [82.0, 83.66], [87.0, 87.07], [88.0, 89.29], [90.0, 92.2], [93.0, 93.02], [94.0, 95.57], [96.0, 98.54], [99.0, 100.67]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [32.51, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 60.23, 0.0, 0.0, 77.36, 0.0], "audiomae_on_audioset": [[["speech", 22.34], ["music", 17.83], ["mosquito", 11.07]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [7.19, -0.22, 0.43, 1.02, 1.74, 1.66, 0.07, 1.29, 2.2, 0.02, 1.57, 2.54, 1.67]}
|
annotations_filtered/j2aGGNQW_7M_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 8.72], [9.0, 10.23], [11.0, 11.13], [12.0, 13.76], [15.0, 19.01], [23.0, 24.7], [26.0, 36.66], [37.0, 38.21], [39.0, 59.1], [59.0, 62.23], [63.0, 82.9]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 97.33, 0.0, 97.64, 0.0, 92.8, 99.73, 94.95], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [1.72, 1.23, 0.13, 1.76, 4.01, 1.7, 10.66, 1.21, 20.1, 3.23, 19.9]}
|
annotations_filtered/j2e41FeccuA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.11], [15.0, 15.58], [17.0, 17.81], [18.0, 25.27], [26.0, 26.94], [33.0, 35.48], [40.0, 45.3], [46.0, 52.41], [53.0, 54.16], [55.0, 56.57], [64.0, 65.25], [67.0, 67.86], [68.0, 69.47], [72.0, 77.06], [78.0, 79.37], [82.0, 82.56], [84.0, 85.41], [89.0, 90.66], [92.0, 93.23], [94.0, 95.27], [97.0, 97.17], [99.0, 101.8], [103.0, 106.73], [109.0, 109.7], [111.0, 114.42], [115.0, 116.53], [120.0, 127.5]], "keep_status": [false, false, false, true, false, true, true, false, false, false, false, false, false, true, false, false, false, false, false, false, false, true, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 34.83, 0.0, 34.62, 31.17, 29.72, 0.0, 0.0, 0.0, 0.0, 0.0, 45.14, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 41.78, 38.82, 0.0, 37.89, 0.0, 33.55], "audiomae_on_audioset": [null, null, null, [["music", 32.12], ["speech", 24.97], ["theremin", 5.18]], null, [["music", 25.81], ["noise", 9.46], ["fart", 4.69]], [["music", 27.64], ["throbbing", 22.04], ["hum", 19.6]], [["speech", 38.79], ["music", 26.93], ["fart", 9.93]], null, null, null, null, null, [["music", 54.18], ["synthesizer", 5.8], ["musical instrument", 4.04]], null, null, null, null, null, null, null, [["music", 61.7], ["musical instrument", 4.71], ["guitar", 3.5]], [["music", 66.9], ["ambient music", 6.15], ["electronic music", 5.04]], null, [["music", 44.39], ["hum", 12.9], ["speech", 6.54]], null, [["theremin", 43.09], ["music", 36.81], ["synthesizer", 6.0]]], "duration": [1.11, 0.58, 0.81, 7.27, 0.94, 2.48, 5.3, 6.41, 1.16, 1.57, 1.25, 0.86, 1.47, 5.06, 1.37, 0.56, 1.41, 1.66, 1.23, 1.27, 0.17, 2.8, 3.73, 0.7, 3.42, 1.53, 7.5]}
|
annotations_filtered/j32LbrHGak0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 20.53], [22.0, 26.94], [28.0, 28.11]], "keep_status": [false, false, false], "silence_prob": [0.0, 32.39, 0.0], "audiomae_on_audioset": [null, [["music", 38.57], ["hum", 17.1], ["throbbing", 16.0]], null], "duration": [1.53, 4.94, 0.11]}
|
annotations_filtered/j38t2lDi4GU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[30.0, 30.01], [31.0, 32.44], [37.0, 47.31], [48.0, 47.76], [48.0, 47.8], [49.0, 53.37], [60.0, 59.76], [67.0, 69.11], [70.0, 70.95], [73.0, 78.07], [78.0, 78.12], [79.0, 82.92], [87.0, 102.24], [103.0, 136.61]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 31.18, 0.0, 0.0, 31.57, 0.0, 35.9, 0.0, 33.86, 0.0, 34.48, 33.0, 0.0], "audiomae_on_audioset": [null, null, [["music", 61.62], ["theremin", 16.34], ["musical instrument", 4.35]], null, null, [["music", 70.21], ["theremin", 13.05], ["musical instrument", 2.06]], null, [["music", 46.78], ["theremin", 14.38], ["didgeridoo", 13.67]], null, [["music", 63.28], ["ambient music", 8.94], ["synthesizer", 3.88]], null, [["music", 69.04], ["ambient music", 4.92], ["synthesizer", 3.93]], [["music", 77.75], ["ambient music", 6.04], ["electronic music", 2.7]], null], "duration": [0.01, 1.44, 10.31, -0.24, -0.2, 4.37, -0.24, 2.11, 0.95, 5.07, 0.12, 3.92, 15.24, 33.61]}
|
annotations_filtered/j3MZdcbv-ew_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 11.74], [42.0, 42.69], [44.0, 44.49], [49.0, 48.93], [52.0, 52.25], [76.0, 76.92], [85.0, 85.09], [88.0, 89.04], [95.0, 95.74], [98.0, 98.54], [101.0, 101.92], [118.0, 117.98], [118.0, 118.1]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.74, 0.69, 0.49, -0.07, 0.25, 0.92, 0.09, 1.04, 0.74, 0.54, 0.92, -0.02, 0.1]}
|
annotations_filtered/j3d3mrWBTpM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.45], [6.0, 7.05], [8.0, 8.78], [11.0, 11.43], [12.0, 13.66], [14.0, 16.12], [18.0, 19.33], [21.0, 21.95], [23.0, 24.6], [25.0, 27.73], [32.0, 33.69], [34.0, 35.51], [40.0, 40.24], [42.0, 42.72], [45.0, 46.57], [49.0, 50.89], [54.0, 54.65], [60.0, 60.32], [62.0, 61.87], [62.0, 61.97], [62.0, 62.02], [62.0, 62.09], [62.0, 62.18], [63.0, 75.24], [76.0, 77.9], [79.0, 80.13], [82.0, 82.8], [88.0, 88.8], [89.0, 90.9], [93.0, 93.46], [96.0, 98.24], [101.0, 101.36], [103.0, 103.23], [105.0, 106.69], [109.0, 110.25], [111.0, 113.64], [118.0, 118.5], [120.0, 121.68], [126.0, 125.69], [128.0, 129.46]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 99.98, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.98, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 99.8, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.45, 1.05, 0.78, 0.43, 1.66, 2.12, 1.33, 0.95, 1.6, 2.73, 1.69, 1.51, 0.24, 0.72, 1.57, 1.89, 0.65, 0.32, -0.13, -0.03, 0.02, 0.09, 0.18, 12.24, 1.9, 1.13, 0.8, 0.8, 1.9, 0.46, 2.24, 0.36, 0.23, 1.69, 1.25, 2.64, 0.5, 1.68, -0.31, 1.46]}
|
annotations_filtered/j40IcG_BZuc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 6.17], [8.0, 9.46], [15.0, 29.34], [30.0, 41.96], [45.0, 45.59], [46.0, 46.33], [47.0, 50.41], [54.0, 56.07], [56.0, 56.81], [57.0, 69.62], [81.0, 90.19], [91.0, 91.32], [94.0, 100.58], [108.0, 118.3], [119.0, 124.78], [125.0, 125.81], [127.0, 127.52], [129.0, 137.19], [137.0, 138.0], [139.0, 141.05], [142.0, 142.06], [147.0, 148.31], [149.0, 149.84], [150.0, 151.53], [152.0, 153.06], [154.0, 155.19], [156.0, 164.64]], "keep_status": [false, false, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 40.82, 47.5, 0.0, 0.0, 36.12, 64.52, 0.0, 40.88, 48.56, 0.0, 29.67, 30.03, 31.82, 0.0, 0.0, 37.24, 0.0, 46.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.67], "audiomae_on_audioset": [null, null, [["hum", 50.78], ["throbbing", 16.57], ["mains hum", 12.27]], [["hum", 30.44], ["speech", 17.26], ["mains hum", 14.99]], null, null, [["mains hum", 31.04], ["hum", 16.8], ["music", 14.31]], null, null, [["music", 55.17], ["speech", 11.98], ["theremin", 9.81]], [["mains hum", 62.29], ["hum", 23.57], ["music", 3.57]], null, [["music", 46.67], ["brass instrument", 18.33], ["musical instrument", 9.87]], [["didgeridoo", 37.11], ["music", 32.96], ["theremin", 11.93]], [["music", 61.67], ["musical instrument", 7.05], ["didgeridoo", 6.5]], null, null, [["music", 34.49], ["theremin", 33.11], ["musical instrument", 6.17]], null, [["music", 23.71], ["civil defense siren", 14.64], ["theremin", 11.9]], null, null, null, null, null, null, [["music", 56.46], ["theremin", 15.24], ["musical instrument", 3.57]]], "duration": [1.17, 1.46, 14.34, 11.96, 0.59, 0.33, 3.41, 2.07, 0.81, 12.62, 9.19, 0.32, 6.58, 10.3, 5.78, 0.81, 0.52, 8.19, 1.0, 2.05, 0.06, 1.31, 0.84, 1.53, 1.06, 1.19, 8.64]}
|
annotations_filtered/j42TrAVceCI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/j477dAxaeck_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[23.0, 26.05], [28.0, 30.89], [32.0, 32.85], [35.0, 35.95], [38.0, 39.01], [42.0, 41.84], [44.0, 46.89], [48.0, 48.47], [49.0, 49.06], [53.0, 54.67], [60.0, 61.2], [62.0, 62.24], [70.0, 69.94], [71.0, 72.94], [74.0, 98.59], [99.0, 102.3], [106.0, 106.35], [107.0, 107.11], [119.0, 119.3], [122.0, 122.17], [124.0, 125.85], [129.0, 130.79], [132.0, 132.53], [136.0, 137.32], [143.0, 144.81], [145.0, 145.91], [150.0, 151.04], [151.0, 152.83], [166.0, 166.55], [167.0, 168.34], [169.0, 171.54], [174.0, 177.08], [177.0, 178.34], [179.0, 179.24], [180.0, 179.98]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [87.19, 94.66, 0.0, 0.0, 0.0, 0.0, 95.37, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 31.43, 99.65, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 96.04, 99.84, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["thunk", 23.73], ["whack, thwack", 20.65], ["music", 8.51]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [3.05, 2.89, 0.85, 0.95, 1.01, -0.16, 2.89, 0.47, 0.06, 1.67, 1.2, 0.24, -0.06, 1.94, 24.59, 3.3, 0.35, 0.11, 0.3, 0.17, 1.85, 1.79, 0.53, 1.32, 1.81, 0.91, 1.04, 1.83, 0.55, 1.34, 2.54, 3.08, 1.34, 0.24, -0.02]}
|
annotations_filtered/j4onAJ-3FAM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.44], [5.0, 4.94], [5.0, 7.65], [8.0, 7.86], [8.0, 8.01], [9.0, 8.56], [10.0, 9.63], [11.0, 24.88], [28.0, 36.85], [43.0, 52.2], [52.0, 54.19], [55.0, 60.15], [61.0, 77.97], [79.0, 83.86]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, true, true, true], "silence_prob": [0.0, 0.0, 32.68, 0.0, 0.0, 0.0, 0.0, 31.84, 32.89, 33.3, 40.38, 30.89, 29.1, 29.22], "audiomae_on_audioset": [null, null, [["music", 59.93], ["speech", 10.05], ["electronic music", 6.49]], null, null, null, null, [["throbbing", 33.85], ["hum", 27.74], ["music", 25.22]], [["music", 39.41], ["throbbing", 18.43], ["fart", 5.85]], [["music", 46.49], ["throbbing", 23.09], ["hum", 15.45]], [["music", 74.41], ["throbbing", 11.33], ["hum", 2.95]], [["music", 49.95], ["speech", 7.65], ["electronic music", 4.5]], [["music", 44.19], ["throbbing", 13.42], ["buzz", 10.64]], [["explosion", 30.12], ["whack, thwack", 10.38], ["music", 5.6]]], "duration": [0.44, -0.06, 2.65, -0.14, 0.01, -0.44, -0.37, 13.88, 8.85, 9.2, 2.19, 5.15, 16.97, 4.86]}
|
annotations_filtered/j4ujHOSbQB0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[23.0, 23.65], [43.0, 43.29], [46.0, 45.89], [47.0, 47.29], [49.0, 49.32], [54.0, 54.24], [56.0, 56.49], [58.0, 60.42], [61.0, 63.36], [71.0, 70.75], [78.0, 83.22], [84.0, 85.72], [87.0, 87.57]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 73.21, 50.61, 0.0, 72.46, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.65, 0.29, -0.11, 0.29, 0.32, 0.24, 0.49, 2.42, 2.36, -0.25, 5.22, 1.72, 0.57]}
|
annotations_filtered/j4yXEmQRq34_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.49], [3.0, 3.37], [7.0, 7.59], [9.0, 9.76], [10.0, 11.69], [15.0, 16.29], [23.0, 23.25], [26.0, 26.42], [27.0, 28.27], [29.0, 29.32], [30.0, 31.33], [39.0, 39.87], [43.0, 48.76], [51.0, 52.34], [53.0, 56.3], [61.0, 61.67], [66.0, 67.22], [68.0, 69.28], [71.0, 71.07], [71.0, 73.7], [75.0, 89.45], [92.0, 94.34], [97.0, 103.81], [105.0, 107.05], [112.0, 146.65], [147.0, 147.88], [150.0, 150.89]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.51, 0.0, 43.38, 0.0, 0.0, 0.0, 0.0, 52.05, 43.15, 34.83, 78.38, 90.95, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["mains hum", 51.06], ["hum", 34.65], ["speech", 4.96]], null, null, null, null, null, [["speech", 42.36], ["animal", 28.15], ["stomach rumble", 7.58]], [["music", 25.38], ["effects unit", 19.25], ["noise", 11.07]], null, null, null, null, null], "duration": [0.49, 0.37, 0.59, 0.76, 1.69, 1.29, 0.25, 0.42, 1.27, 0.32, 1.33, 0.87, 5.76, 1.34, 3.3, 0.67, 1.22, 1.28, 0.07, 2.7, 14.45, 2.34, 6.81, 2.05, 34.65, 0.88, 0.89]}
|
annotations_filtered/j5B70NEq_fY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 10.94], [13.0, 13.0], [13.0, 14.03], [18.0, 19.94], [22.0, 22.49], [26.0, 26.28], [30.0, 31.26], [33.0, 34.72], [37.0, 38.21], [46.0, 51.24], [55.0, 56.83], [59.0, 62.56], [63.0, 64.86], [66.0, 69.28], [70.0, 71.04], [72.0, 72.84], [85.0, 89.97], [91.0, 96.25]], "keep_status": [false, false, false, false, false, false, false, false, false, true, false, true, false, true, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.1, 0.0, 30.67, 0.0, 31.79, 0.0, 0.0, 91.64, 31.3], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["speech", 17.69], ["hum", 12.59], ["music", 7.05]], null, [["speech", 28.15], ["crowd", 14.51], ["cheering", 13.03]], null, [["music", 57.36], ["crackle", 3.23], ["applause", 2.92]], null, null, null, [["music", 63.04], ["musical instrument", 4.46], ["echo", 2.07]]], "duration": [-0.06, 0.0, 1.03, 1.94, 0.49, 0.28, 1.26, 1.72, 1.21, 5.24, 1.83, 3.56, 1.86, 3.28, 1.04, 0.84, 4.97, 5.25]}
|
annotations_filtered/j5Fd6TqePnk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.81], [4.0, 5.09], [7.0, 8.73], [10.0, 12.88], [14.0, 14.34], [15.0, 19.18], [19.0, 22.11], [23.0, 31.65], [33.0, 34.3], [36.0, 40.41], [42.0, 43.44], [45.0, 45.0], [45.0, 48.32], [49.0, 51.97], [53.0, 56.96], [58.0, 62.4], [63.0, 63.49], [65.0, 65.43], [67.0, 69.28], [72.0, 75.61], [80.0, 81.87], [84.0, 86.61], [88.0, 93.56], [95.0, 97.75], [99.0, 101.93], [106.0, 108.73], [110.0, 114.0], [117.0, 120.58], [125.0, 128.26], [130.0, 132.88], [134.0, 135.48], [137.0, 137.89], [139.0, 142.8], [144.0, 145.29], [147.0, 148.56], [149.0, 156.68], [157.0, 156.95]], "keep_status": [false, false, false, true, false, true, true, true, false, false, false, false, true, false, true, true, false, false, false, false, false, false, false, false, true, true, true, true, true, false, false, false, true, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 39.64, 0.0, 40.62, 35.05, 35.59, 0.0, 88.1, 0.0, 0.0, 30.89, 56.93, 44.96, 47.2, 0.0, 0.0, 71.29, 72.75, 0.0, 53.59, 68.02, 53.65, 42.81, 37.63, 48.14, 41.89, 41.44, 73.82, 0.0, 0.0, 44.93, 0.0, 0.0, 34.83, 0.0], "audiomae_on_audioset": [null, null, null, [["throbbing", 19.27], ["hum", 15.55], ["heart sounds, heartbeat", 9.61]], null, [["speech", 33.49], ["sidetone", 10.89], ["radio", 9.15]], [["fart", 26.61], ["music", 22.76], ["effects unit", 6.35]], [["speech", 23.5], ["noise", 21.12], ["radio", 16.37]], null, null, null, null, [["speech", 29.75], ["radio", 21.07], ["music", 9.38]], null, [["radio", 20.97], ["speech", 18.87], ["noise", 3.78]], [["hum", 36.02], ["mains hum", 11.44], ["burping, eructation", 9.43]], null, null, null, null, null, null, null, null, [["fly, housefly", 13.73], ["hum", 12.61], ["whale vocalization", 11.54]], [["burping, eructation", 8.53], ["hum", 6.87], ["growling", 6.83]], [["dog", 18.44], ["bow-wow", 15.65], ["animal", 14.61]], [["hum", 40.22], ["inside, small room", 7.39], ["mains hum", 7.02]], [["dog", 35.09], ["domestic animals, pets", 15.36], ["animal", 10.45]], null, null, null, [["creak", 17.58], ["hum", 9.14], ["mains hum", 5.81]], null, null, [["theremin", 20.2], ["whale vocalization", 18.58], ["hum", 13.89]], null], "duration": [0.81, 1.09, 1.73, 2.88, 0.34, 4.18, 3.11, 8.65, 1.3, 4.41, 1.44, 0.0, 3.32, 2.97, 3.96, 4.4, 0.49, 0.43, 2.28, 3.61, 1.87, 2.61, 5.56, 2.75, 2.93, 2.73, 4.0, 3.58, 3.26, 2.88, 1.48, 0.89, 3.8, 1.29, 1.56, 7.68, -0.05]}
|
annotations_filtered/j638xTM36I8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 30.5], [31.0, 88.25], [90.0, 91.79], [94.0, 95.22], [97.0, 98.96], [100.0, 101.73], [103.0, 103.25], [106.0, 105.83], [106.0, 107.64], [108.0, 108.95], [110.0, 110.49], [111.0, 111.86], [112.0, 113.43], [114.0, 114.44], [115.0, 115.84], [116.0, 117.51]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [35.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 47.44], ["speech", 15.07], ["horse", 5.92]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [29.5, 57.25, 1.79, 1.22, 1.96, 1.73, 0.25, -0.17, 1.64, 0.95, 0.49, 0.86, 1.43, 0.44, 0.84, 1.51]}
|
annotations_filtered/j66Fsl_q5Ig_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 16.04], [19.0, 21.76], [23.0, 23.36], [25.0, 25.95], [28.0, 29.08], [32.0, 34.87], [38.0, 38.04], [39.0, 53.35], [59.0, 59.98], [63.0, 74.9], [76.0, 90.21], [92.0, 94.04], [99.0, 99.98], [102.0, 105.02], [106.0, 107.05], [111.0, 112.13], [113.0, 113.7], [115.0, 115.48], [119.0, 142.5], [146.0, 151.43], [154.0, 166.62], [168.0, 198.41]], "keep_status": [false, false, false, false, false, true, false, true, false, true, false, true, false, true, false, false, false, false, false, true, false, false], "silence_prob": [31.03, 43.58, 0.0, 0.0, 0.0, 31.17, 0.0, 31.2, 0.0, 30.56, 30.17, 32.12, 0.0, 31.42, 0.0, 0.0, 0.0, 0.0, 30.91, 31.26, 30.95, 0.0], "audiomae_on_audioset": [[["music", 41.92], ["throbbing", 19.18], ["hum", 9.42]], [["music", 68.98], ["synthesizer", 3.41], ["musical instrument", 2.98]], null, null, null, [["speech", 22.66], ["music", 21.91], ["eruption", 3.77]], null, [["music", 56.6], ["electronic music", 6.76], ["cacophony", 4.1]], null, [["music", 38.69], ["speech", 25.57], ["hum", 4.62]], [["mains hum", 34.69], ["music", 24.04], ["hum", 16.33]], [["music", 33.04], ["hum", 8.99], ["throbbing", 8.05]], null, [["music", 52.22], ["hum", 6.57], ["cacophony", 5.99]], null, null, null, null, [["music", 55.75], ["throbbing", 14.39], ["hum", 10.11]], [["music", 36.34], ["throbbing", 20.42], ["hum", 12.54]], [["music", 46.95], ["speech", 26.96], ["electric shaver, electric razor", 2.43]], null], "duration": [5.04, 2.76, 0.36, 0.95, 1.08, 2.87, 0.04, 14.35, 0.98, 11.9, 14.21, 2.04, 0.98, 3.02, 1.05, 1.13, 0.7, 0.48, 23.5, 5.43, 12.62, 30.41]}
|
annotations_filtered/j6_umKYN_JU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 15.36], [50.0, 49.87], [50.0, 50.8], [106.0, 114.94], [119.0, 122.99], [126.0, 130.33]], "keep_status": [false, false, false, false, true, true], "silence_prob": [0.0, 0.0, 0.0, 33.2, 28.87, 29.21], "audiomae_on_audioset": [null, null, null, [["music", 37.73], ["speech", 30.94], ["livestock, farm animals, working animals", 5.76]], [["music", 22.72], ["boom", 16.14], ["smash, crash", 7.83]], [["music", 21.45], ["explosion", 18.67], ["fart", 7.73]]], "duration": [0.36, -0.13, 0.8, 8.94, 3.99, 4.33]}
|
annotations_filtered/j6gLJ4_sfG8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[33.0, 35.7], [37.0, 61.03], [63.0, 65.33], [67.0, 69.43], [72.0, 75.17], [78.0, 78.9], [80.0, 82.7], [84.0, 90.78], [92.0, 93.53], [95.0, 95.4], [97.0, 98.27], [100.0, 101.38], [102.0, 103.67], [105.0, 107.87], [111.0, 126.33]], "keep_status": [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [41.76, 48.56, 47.98, 53.53, 71.43, 0.0, 82.61, 62.17, 0.0, 0.0, 0.0, 0.0, 0.0, 70.58, 69.2], "audiomae_on_audioset": [[["mains hum", 25.83], ["hum", 25.76], ["music", 12.91]], [["music", 25.96], ["fly, housefly", 13.68], ["bee, wasp, etc.", 8.28]], [["speech", 29.94], ["music", 15.06], ["throbbing", 5.91]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.7, 24.03, 2.33, 2.43, 3.17, 0.9, 2.7, 6.78, 1.53, 0.4, 1.27, 1.38, 1.67, 2.87, 15.33]}
|
annotations_filtered/j6oBbBfhgYE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 1.12], [4.0, 3.89], [5.0, 7.64], [18.0, 18.39], [19.0, 21.34], [23.0, 27.38], [30.0, 30.79], [34.0, 42.79], [43.0, 44.64]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 59.86, 0.0, 89.19, 61.18, 0.0, 36.38, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["speech", 89.83], ["meow", 1.43], ["speech synthesizer", 1.42]], null], "duration": [1.12, -0.11, 2.64, 0.39, 2.34, 4.38, 0.79, 8.79, 1.64]}
|
annotations_filtered/j6oHprwdTeA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.58], [16.0, 16.9], [17.0, 19.03], [21.0, 21.57], [31.0, 32.21], [38.0, 37.81], [38.0, 39.11], [41.0, 40.69], [44.0, 44.52], [49.0, 48.81], [49.0, 50.01], [53.0, 54.68], [56.0, 56.24], [57.0, 57.13], [65.0, 65.96], [77.0, 80.74], [82.0, 83.54], [89.0, 89.41], [96.0, 99.81], [103.0, 103.11]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 44.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 68.54, 0.0, 0.0, 41.03, 0.0], "audiomae_on_audioset": [null, null, [["speech", 47.85], ["sidetone", 10.12], ["radio", 4.12]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 24.52], ["throbbing", 17.09], ["synthesizer", 8.84]], null], "duration": [1.58, 0.9, 2.03, 0.57, 1.21, -0.19, 1.11, -0.31, 0.52, -0.19, 1.01, 1.68, 0.24, 0.13, 0.96, 3.74, 1.54, 0.41, 3.81, 0.11]}
|
annotations_filtered/j6qjibwpEzM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.79], [5.0, 5.76], [7.0, 7.37], [13.0, 13.22], [23.0, 22.86], [31.0, 31.11], [32.0, 32.51], [34.0, 34.03], [37.0, 37.44], [38.0, 38.87], [43.0, 43.55], [51.0, 54.7], [88.0, 89.34], [119.0, 119.26], [124.0, 128.09], [130.0, 130.32], [139.0, 139.28], [176.0, 176.49], [177.0, 177.35], [192.0, 193.06]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 28.99, 0.0, 0.0, 33.99, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["clang", 25.92], ["speech", 19.74], ["firecracker", 10.41]], null, null, [["music", 79.67], ["boing", 10.04], ["marimba, xylophone", 2.18]], null, null, null, null, null], "duration": [0.79, 0.76, 0.37, 0.22, -0.14, 0.11, 0.51, 0.03, 0.44, 0.87, 0.55, 3.7, 1.34, 0.26, 4.09, 0.32, 0.28, 0.49, 0.35, 1.06]}
|
annotations_filtered/j71oHN1i2pU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 18.98], [29.0, 29.34], [30.0, 30.45], [31.0, 31.48], [32.0, 35.43], [38.0, 38.5], [55.0, 66.9], [67.0, 68.82], [70.0, 82.27], [86.0, 85.95], [89.0, 108.94]], "keep_status": [false, false, false, false, true, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 35.08, 0.0, 28.95, 0.0, 31.53, 0.0, 29.8], "audiomae_on_audioset": [null, null, null, null, [["music", 14.46], ["grunt", 14.01], ["groan", 13.98]], null, [["music", 65.78], ["speech", 4.59], ["hum", 4.4]], null, [["music", 48.5], ["speech", 19.42], ["foghorn", 4.17]], null, [["livestock, farm animals, working animals", 41.39], ["cattle, bovinae", 12.81], ["moo", 8.54]]], "duration": [-0.02, 0.34, 0.45, 0.48, 3.43, 0.5, 11.9, 1.82, 12.27, -0.05, 19.94]}
|
annotations_filtered/j7O-SUEh-54_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.74], [8.0, 8.92], [10.0, 10.56], [11.0, 11.58], [13.0, 13.71], [15.0, 15.97], [17.0, 17.64], [19.0, 23.7], [25.0, 25.52], [26.0, 27.77], [28.0, 29.02], [30.0, 30.67], [31.0, 32.22], [33.0, 35.13], [36.0, 36.14], [40.0, 40.19], [41.0, 41.52], [42.0, 48.49], [49.0, 50.13], [51.0, 52.1], [54.0, 55.95], [62.0, 68.64], [70.0, 70.68], [74.0, 75.95], [79.0, 80.59], [83.0, 83.39], [85.0, 84.96], [86.0, 86.36], [96.0, 96.43], [97.0, 98.1], [101.0, 102.32], [105.0, 105.8], [109.0, 109.44], [112.0, 112.56], [122.0, 126.76], [127.0, 127.53], [132.0, 132.92], [134.0, 135.03], [137.0, 140.78], [145.0, 150.03], [154.0, 154.57], [158.0, 158.72], [166.0, 166.48], [166.0, 168.66], [170.0, 173.5], [179.0, 179.76], [180.0, 181.8], [183.0, 184.05], [185.0, 185.18], [188.0, 188.06], [192.0, 193.41], [195.0, 195.44], [198.0, 203.81], [205.0, 209.9], [221.0, 220.67], [221.0, 222.52], [224.0, 225.39], [229.0, 236.55], [237.0, 239.09], [240.0, 241.51]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, true, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 56.48, 0.0, 0.0, 0.0, 0.0, 0.0, 67.13, 0.0, 0.0, 0.0, 36.13, 0.0, 0.0, 0.0, 67.89, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.42, 0.0, 0.0, 0.0, 42.55, 39.93, 0.0, 0.0, 0.0, 35.77, 41.74, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.02, 31.97, 0.0, 0.0, 0.0, 36.61, 36.24, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 57.93], ["groan", 2.59], ["quack", 2.36]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 23.09], ["whale vocalization", 11.94], ["whack, thwack", 11.7]], null, null, null, [["fly, housefly", 47.17], ["insect", 13.97], ["speech", 10.92]], [["speech", 20.14], ["radio", 17.62], ["music", 12.36]], null, null, null, [["speech", 81.45], ["radio", 2.69], ["gunshot, gunfire", 0.9]], [["speech", 63.63], ["music", 4.12], ["hum", 2.92]], null, null, null, null, null, null, null, [["livestock, farm animals, working animals", 24.73], ["fly, housefly", 11.84], ["moo", 10.3]], [["whale vocalization", 24.28], ["livestock, farm animals, working animals", 19.1], ["cattle, bovinae", 17.72]], null, null, null, [["hum", 34.78], ["mains hum", 19.88], ["throbbing", 8.19]], [["music", 30.24], ["whale vocalization", 12.02], ["speech", 4.57]], null], "duration": [0.74, 0.92, 0.56, 0.58, 0.71, 0.97, 0.64, 4.7, 0.52, 1.77, 1.02, 0.67, 1.22, 2.13, 0.14, 0.19, 0.52, 6.49, 1.13, 1.1, 1.95, 6.64, 0.68, 1.95, 1.59, 0.39, -0.04, 0.36, 0.43, 1.1, 1.32, 0.8, 0.44, 0.56, 4.76, 0.53, 0.92, 1.03, 3.78, 5.03, 0.57, 0.72, 0.48, 2.66, 3.5, 0.76, 1.8, 1.05, 0.18, 0.06, 1.41, 0.44, 5.81, 4.9, -0.33, 1.52, 1.39, 7.55, 2.09, 1.51]}
|
annotations_filtered/j7PgnjEiMcA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 3.08], [7.0, 7.15], [10.0, 19.72], [25.0, 28.36], [33.0, 39.04], [42.0, 45.99], [47.0, 47.83], [53.0, 53.55], [60.0, 60.47], [65.0, 65.15], [68.0, 68.52], [70.0, 79.12], [79.0, 96.65], [98.0, 103.22], [105.0, 105.36], [106.0, 116.7], [123.0, 123.23], [132.0, 134.79], [135.0, 136.7], [140.0, 146.21], [150.0, 150.63]], "keep_status": [true, false, false, true, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, true, false], "silence_prob": [33.45, 0.0, 36.65, 37.52, 36.06, 37.99, 0.0, 0.0, 0.0, 0.0, 0.0, 35.51, 35.53, 35.65, 0.0, 31.34, 0.0, 38.46, 0.0, 33.94, 0.0], "audiomae_on_audioset": [[["music", 36.3], ["theremin", 11.42], ["musical instrument", 11.24]], null, [["music", 70.44], ["musical instrument", 5.88], ["keyboard (musical)", 2.0]], [["music", 34.98], ["didgeridoo", 11.97], ["musical instrument", 7.56]], [["music", 69.36], ["musical instrument", 6.01], ["percussion", 1.76]], [["music", 75.28], ["musical instrument", 4.33], ["theremin", 3.57]], null, null, null, null, null, [["music", 74.18], ["musical instrument", 4.34], ["theremin", 3.3]], [["music", 60.71], ["musical instrument", 7.24], ["double bass", 4.86]], [["music", 55.22], ["didgeridoo", 7.45], ["musical instrument", 5.03]], null, [["music", 69.13], ["music of latin america", 3.49], ["musical instrument", 3.41]], null, [["music", 51.95], ["didgeridoo", 17.17], ["guitar", 6.3]], null, [["music", 48.43], ["musical instrument", 5.83], ["flamenco", 5.23]], null], "duration": [2.08, 0.15, 9.72, 3.36, 6.04, 3.99, 0.83, 0.55, 0.47, 0.15, 0.52, 9.12, 17.65, 5.22, 0.36, 10.7, 0.23, 2.79, 1.7, 6.21, 0.63]}
|
annotations_filtered/j7m47I9BuuY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[209.0, 214.22]], "keep_status": [false], "silence_prob": [60.89], "audiomae_on_audioset": [null], "duration": [5.22]}
|