Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/i-2kXcQgs_w_filtered.json +1 -0
- annotations_filtered/i-9K5-x7_so_filtered.json +1 -0
- annotations_filtered/i-VM7_DJlkQ_filtered.json +1 -0
- annotations_filtered/i-VeLFEMeko_filtered.json +1 -0
- annotations_filtered/i-bRuSNSvcU_filtered.json +1 -0
- annotations_filtered/i-f0M5TqmsY_filtered.json +1 -0
- annotations_filtered/i07yEczcujQ_filtered.json +1 -0
- annotations_filtered/i0p2X2rQ6Ag_filtered.json +1 -0
- annotations_filtered/i1Nh_3JCFj8_filtered.json +1 -0
- annotations_filtered/i1ZUVkU_XK4_filtered.json +1 -0
- annotations_filtered/i1igdJh44yU_filtered.json +1 -0
- annotations_filtered/i1lkrSFlpss_filtered.json +1 -0
- annotations_filtered/i1n8bNgTUTw_filtered.json +1 -0
- annotations_filtered/i2gVXd7FzhQ_filtered.json +1 -0
- annotations_filtered/i2xyQnF1kro_filtered.json +1 -0
- annotations_filtered/i31XFSORRfc_filtered.json +1 -0
- annotations_filtered/i3EF63p3v-I_filtered.json +1 -0
- annotations_filtered/i3JbGwGNRI8_filtered.json +1 -0
- annotations_filtered/i3VNgECX8Ko_filtered.json +1 -0
- annotations_filtered/i3xjZomB1s0_filtered.json +1 -0
- annotations_filtered/i3yO0OagpNY_filtered.json +1 -0
- annotations_filtered/i4GeD9FWdG4_filtered.json +1 -0
- annotations_filtered/i4M2tehIejI_filtered.json +1 -0
- annotations_filtered/i4NIiCSEiTg_filtered.json +1 -0
- annotations_filtered/i4NRgUeziqA_filtered.json +1 -0
- annotations_filtered/i4h9xcdtyrE_filtered.json +1 -0
- annotations_filtered/i5Y6BTlx37s_filtered.json +1 -0
- annotations_filtered/i5dTE5dgWOw_filtered.json +1 -0
- annotations_filtered/i5j1wWY-qus_filtered.json +1 -0
- annotations_filtered/i5jTH89HjTA_filtered.json +1 -0
- annotations_filtered/i5mSHPKEbas_filtered.json +1 -0
- annotations_filtered/i6OCtSqrOQ0_filtered.json +1 -0
- annotations_filtered/i6klSHVWbrk_filtered.json +1 -0
- annotations_filtered/i6n8VyqaCQ4_filtered.json +1 -0
- annotations_filtered/i6oNzS6kCR8_filtered.json +1 -0
- annotations_filtered/i6ymVjU5hno_filtered.json +1 -0
- annotations_filtered/i7AUpGXLDdk_filtered.json +1 -0
- annotations_filtered/i7Jg_6-fYF8_filtered.json +1 -0
- annotations_filtered/i7KcAEPxDwQ_filtered.json +1 -0
- annotations_filtered/i7hF7BAKV_I_filtered.json +1 -0
- annotations_filtered/i7hk-TupE5g_filtered.json +1 -0
- annotations_filtered/i7tGEEWQIhQ_filtered.json +1 -0
- annotations_filtered/i7vKbmKF9VI_filtered.json +1 -0
- annotations_filtered/i812ZsyyeLg_filtered.json +1 -0
- annotations_filtered/i94ldGNNSQ0_filtered.json +1 -0
- annotations_filtered/i9DMpMCCxuE_filtered.json +1 -0
- annotations_filtered/i9Iy9amffa4_filtered.json +1 -0
- annotations_filtered/i9KJXFbkMH0_filtered.json +1 -0
- annotations_filtered/i9NIwHKBqy0_filtered.json +1 -0
- annotations_filtered/i9_lCyG67Rc_filtered.json +1 -0
annotations_filtered/i-2kXcQgs_w_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.78], [5.0, 5.75], [10.0, 13.36], [14.0, 15.01], [16.0, 16.77], [18.0, 20.82], [21.0, 21.03], [21.0, 21.07], [21.0, 21.44], [22.0, 22.6], [25.0, 30.13], [33.0, 34.37], [35.0, 36.96], [38.0, 44.36], [45.0, 49.72], [53.0, 53.11], [55.0, 55.92], [60.0, 64.03], [67.0, 68.57], [70.0, 71.25], [73.0, 74.63], [76.0, 85.65], [90.0, 91.76]], "keep_status": [false, false, true, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 37.81, 0.0, 0.0, 76.37, 0.0, 0.0, 0.0, 0.0, 29.86, 0.0, 0.0, 100.0, 99.85, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 100.0, 0.0], "audiomae_on_audioset": [null, null, [["speech", 49.05], ["sidetone", 14.96], ["crackle", 3.11]], null, null, null, null, null, null, null, [["speech", 48.12], ["whack, thwack", 13.41], ["music", 5.62]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.78, 0.75, 3.36, 1.01, 0.77, 2.82, 0.03, 0.07, 0.44, 0.6, 5.13, 1.37, 1.96, 6.36, 4.72, 0.11, 0.92, 4.03, 1.57, 1.25, 1.63, 9.65, 1.76]}
|
annotations_filtered/i-9K5-x7_so_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[67.0, 69.01], [70.0, 70.48], [71.0, 71.52], [77.0, 80.33], [84.0, 84.99], [91.0, 92.55], [96.0, 100.8], [102.0, 121.17], [124.0, 124.82], [126.0, 126.93], [130.0, 131.21], [132.0, 132.56], [135.0, 136.07], [141.0, 141.29], [145.0, 146.23], [150.0, 150.89], [154.0, 153.87]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [40.59, 0.0, 0.0, 37.42, 0.0, 0.0, 35.94, 32.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 32.97], ["speech", 13.75], ["synthesizer", 11.25]], null, null, [["music", 62.4], ["speech", 13.3], ["musical instrument", 4.67]], null, null, [["music", 56.29], ["speech", 12.83], ["drum machine", 9.63]], [["music", 91.99], ["synthesizer", 1.46], ["musical instrument", 1.26]], null, null, null, null, null, null, null, null, null], "duration": [2.01, 0.48, 0.52, 3.33, 0.99, 1.55, 4.8, 19.17, 0.82, 0.93, 1.21, 0.56, 1.07, 0.29, 1.23, 0.89, -0.13]}
|
annotations_filtered/i-VM7_DJlkQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[51.0, 78.49], [81.0, 81.48]], "keep_status": [true, false], "silence_prob": [33.35, 0.0], "audiomae_on_audioset": [[["music", 14.52], ["moo", 11.24], ["fly, housefly", 10.63]], null], "duration": [27.49, 0.48]}
|
annotations_filtered/i-VeLFEMeko_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[26.0, 28.7], [38.0, 38.89], [40.0, 41.01], [44.0, 44.86], [46.0, 46.74], [51.0, 55.32], [57.0, 57.91], [59.0, 64.89], [77.0, 78.21], [78.0, 79.89], [90.0, 94.85], [100.0, 101.06], [102.0, 103.81], [105.0, 105.92], [108.0, 108.3], [111.0, 112.23]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [82.43, 0.0, 0.0, 0.0, 0.0, 72.75, 0.0, 98.8, 0.0, 0.0, 30.87, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["music", 44.12], ["crack", 16.66], ["thunk", 10.3]], null, null, null, null, null], "duration": [2.7, 0.89, 1.01, 0.86, 0.74, 4.32, 0.91, 5.89, 1.21, 1.89, 4.85, 1.06, 1.81, 0.92, 0.3, 1.23]}
|
annotations_filtered/i-bRuSNSvcU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[27.0, 57.84], [59.0, 61.23], [61.0, 61.62], [62.0, 61.86], [63.0, 67.04], [68.0, 67.88], [72.0, 73.67], [75.0, 77.68], [80.0, 80.05], [92.0, 91.89], [93.0, 93.72], [94.0, 96.18]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 79.94, 0.0, 0.0, 83.16, 0.0, 0.0, 99.94, 0.0, 0.0, 0.0, 97.73], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null], "duration": [30.84, 2.23, 0.62, -0.14, 4.04, -0.12, 1.67, 2.68, 0.05, -0.11, 0.72, 2.18]}
|
annotations_filtered/i-f0M5TqmsY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.1], [4.0, 4.92], [20.0, 19.9], [27.0, 27.67], [37.0, 37.99], [44.0, 44.59], [48.0, 47.9], [50.0, 49.91], [53.0, 52.89], [54.0, 53.94], [55.0, 55.49], [68.0, 67.9], [69.0, 68.77], [80.0, 81.55], [82.0, 83.12], [101.0, 101.31], [103.0, 103.23], [105.0, 104.92], [113.0, 114.47], [116.0, 116.72], [122.0, 122.0], [123.0, 126.76], [129.0, 131.68]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.97, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.1, 0.92, -0.1, 0.67, 0.99, 0.59, -0.1, -0.09, -0.11, -0.06, 0.49, -0.1, -0.23, 1.55, 1.12, 0.31, 0.23, -0.08, 1.47, 0.72, 0.0, 3.76, 2.68]}
|
annotations_filtered/i07yEczcujQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 11.65], [13.0, 15.03], [16.0, 16.73], [18.0, 18.17], [19.0, 19.8], [21.0, 21.84], [27.0, 27.11], [29.0, 30.05], [34.0, 37.25], [42.0, 42.9], [44.0, 45.25], [49.0, 50.06], [55.0, 55.12], [57.0, 57.72], [65.0, 65.06], [77.0, 78.24], [80.0, 80.76], [85.0, 86.91], [88.0, 88.69], [90.0, 90.53], [91.0, 92.33], [93.0, 93.99], [97.0, 97.46], [100.0, 100.62], [104.0, 104.33], [107.0, 107.43], [108.0, 108.99]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 49.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 30.49], ["whale vocalization", 15.65], ["hum", 7.21]], null, null, null, null, null, null, [["music", 58.09], ["theremin", 13.29], ["musical instrument", 6.95]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.65, 2.03, 0.73, 0.17, 0.8, 0.84, 0.11, 1.05, 3.25, 0.9, 1.25, 1.06, 0.12, 0.72, 0.06, 1.24, 0.76, 1.91, 0.69, 0.53, 1.33, 0.99, 0.46, 0.62, 0.33, 0.43, 0.99]}
|
annotations_filtered/i0p2X2rQ6Ag_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 18.5], [19.0, 20.11], [22.0, 23.08], [27.0, 27.21], [31.0, 32.17], [37.0, 37.59], [41.0, 41.49], [43.0, 52.29], [53.0, 53.54], [71.0, 78.58], [80.0, 81.06], [81.0, 81.14], [81.0, 81.55], [86.0, 86.78], [93.0, 94.47], [96.0, 97.31], [99.0, 101.04], [103.0, 104.43], [106.0, 108.33], [110.0, 112.11], [115.0, 116.4], [117.0, 124.33], [127.0, 127.84]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [44.87, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 60.89, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.71, 0.0, 49.4, 90.95, 0.0, 71.57, 0.0], "audiomae_on_audioset": [[["speech", 22.59], ["mosquito", 20.25], ["fly, housefly", 12.66]], null, null, null, null, null, null, [["music", 68.21], ["musical instrument", 10.08], ["plucked string instrument", 3.78]], null, null, null, null, null, null, null, null, null, null, [["music", 49.12], ["synthesizer", 13.21], ["effects unit", 6.37]], null, null, null, null], "duration": [5.5, 1.11, 1.08, 0.21, 1.17, 0.59, 0.49, 9.29, 0.54, 7.58, 1.06, 0.14, 0.55, 0.78, 1.47, 1.31, 2.04, 1.43, 2.33, 2.11, 1.4, 7.33, 0.84]}
|
annotations_filtered/i1Nh_3JCFj8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 5.97], [20.0, 21.05], [29.0, 30.43], [32.0, 32.54], [46.0, 47.85], [52.0, 52.59], [65.0, 66.06], [67.0, 67.81], [79.0, 79.41], [80.0, 80.91], [83.0, 82.95], [85.0, 85.9], [87.0, 87.67], [88.0, 88.48], [100.0, 101.02]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.97, 1.05, 1.43, 0.54, 1.85, 0.59, 1.06, 0.81, 0.41, 0.91, -0.05, 0.9, 0.67, 0.48, 1.02]}
|
annotations_filtered/i1ZUVkU_XK4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.37], [4.0, 4.75], [6.0, 7.42], [8.0, 9.63], [10.0, 10.79], [18.0, 18.89], [20.0, 21.0], [25.0, 25.08], [26.0, 43.21], [45.0, 45.62], [46.0, 48.51], [51.0, 57.64], [85.0, 85.55], [90.0, 89.78], [94.0, 101.95], [106.0, 113.73], [114.0, 114.32]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.89, 0.0, 46.29, 50.16, 0.0, 0.0, 52.92, 58.38, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 30.73], ["electric shaver, electric razor", 29.48], ["hum", 17.69]], null, [["music", 31.51], ["speech", 14.19], ["fireworks", 4.47]], null, null, null, null, null, null], "duration": [0.37, 0.75, 1.42, 1.63, 0.79, 0.89, 1.0, 0.08, 17.21, 0.62, 2.51, 6.64, 0.55, -0.22, 7.95, 7.73, 0.32]}
|
annotations_filtered/i1igdJh44yU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 34.18], [70.0, 71.42], [74.0, 96.31], [97.0, 97.78], [98.0, 99.49], [101.0, 102.41], [109.0, 111.42], [116.0, 117.12], [123.0, 124.95], [130.0, 131.48], [141.0, 143.45], [148.0, 148.09], [156.0, 158.3]], "keep_status": [false, false, false, false, false, false, true, false, false, false, true, false, true], "silence_prob": [28.51, 0.0, 28.83, 0.0, 0.0, 0.0, 32.82, 0.0, 0.0, 0.0, 37.04, 0.0, 34.88], "audiomae_on_audioset": [[["music", 62.8], ["electronic music", 11.65], ["speech", 4.74]], null, [["music", 92.84], ["techno", 1.69], ["electronic music", 1.42]], null, null, null, [["livestock, farm animals, working animals", 18.75], ["moo", 16.55], ["sheep", 15.99]], null, null, null, [["speech", 26.5], ["fly, housefly", 15.29], ["insect", 10.15]], null, [["sheep", 13.73], ["bleat", 11.43], ["fly, housefly", 10.85]]], "duration": [25.18, 1.42, 22.31, 0.78, 1.49, 1.41, 2.42, 1.12, 1.95, 1.48, 2.45, 0.09, 2.3]}
|
annotations_filtered/i1lkrSFlpss_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.27], [5.0, 8.55], [12.0, 14.57], [21.0, 21.71], [23.0, 26.76], [29.0, 29.15], [31.0, 36.66], [37.0, 38.65], [45.0, 50.8], [54.0, 54.11], [56.0, 56.76], [61.0, 64.54], [66.0, 67.88], [68.0, 69.67], [77.0, 80.82], [82.0, 83.93]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 37.93, 96.66, 0.0, 94.37, 0.0, 97.33, 0.0, 82.97, 0.0, 0.0, 98.8, 0.0, 0.0, 89.54, 0.0], "audiomae_on_audioset": [null, [["hum", 35.82], ["music", 14.8], ["mains hum", 11.39]], null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.27, 3.55, 2.57, 0.71, 3.76, 0.15, 5.66, 1.65, 5.8, 0.11, 0.76, 3.54, 1.88, 1.67, 3.82, 1.93]}
|
annotations_filtered/i1n8bNgTUTw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.99], [5.0, 6.03], [8.0, 13.39], [15.0, 15.26], [16.0, 16.46], [19.0, 20.66], [23.0, 26.99], [41.0, 42.65], [47.0, 49.69], [54.0, 55.75], [58.0, 61.28], [64.0, 65.53], [68.0, 70.34], [71.0, 72.96], [75.0, 75.91], [78.0, 79.51], [83.0, 83.98], [86.0, 87.84], [91.0, 94.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 61.67, 0.0, 0.0, 0.0, 76.86, 0.0, 51.07, 0.0, 56.78, 0.0, 59.33, 0.0, 0.0, 0.0, 0.0, 0.0, 77.03], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.99, 1.03, 5.39, 0.26, 0.46, 1.66, 3.99, 1.65, 2.69, 1.75, 3.28, 1.53, 2.34, 1.96, 0.91, 1.51, 0.98, 1.84, 3.76]}
|
annotations_filtered/i2gVXd7FzhQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 12.82], [14.0, 15.21], [18.0, 20.06], [24.0, 26.45], [32.0, 33.05], [37.0, 37.23], [38.0, 39.07], [40.0, 41.12], [42.0, 43.33], [46.0, 45.96], [47.0, 49.32], [51.0, 52.22], [56.0, 56.05], [57.0, 61.11], [63.0, 63.37], [64.0, 68.59], [70.0, 71.66], [73.0, 73.58], [77.0, 118.62], [119.0, 127.26], [131.0, 132.58], [134.0, 133.98], [135.0, 135.13], [136.0, 137.49], [139.0, 139.68], [144.0, 150.87], [152.0, 152.41], [155.0, 169.45], [195.0, 199.94]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, true, false, false, false], "silence_prob": [0.0, 0.0, 99.48, 99.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 53.97, 0.0, 0.0, 61.97, 0.0, 51.07, 0.0, 0.0, 0.0, 37.24, 0.0, 0.0, 0.0, 0.0, 0.0, 33.14, 0.0, 34.18, 61.67], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["animal", 21.42], ["sheep", 21.21], ["speech", 12.34]], null, null, null, null, null, [["music", 32.66], ["sonar", 8.38], ["cattle, bovinae", 8.12]], null, [["music", 66.64], ["didgeridoo", 23.64], ["throbbing", 2.27]], null], "duration": [0.82, 1.21, 2.06, 2.45, 1.05, 0.23, 1.07, 1.12, 1.33, -0.04, 2.32, 1.22, 0.05, 4.11, 0.37, 4.59, 1.66, 0.58, 41.62, 8.26, 1.58, -0.02, 0.13, 1.49, 0.68, 6.87, 0.41, 14.45, 4.94]}
|
annotations_filtered/i2xyQnF1kro_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.04], [5.0, 21.29], [25.0, 26.33], [27.0, 28.92], [30.0, 30.52], [39.0, 43.39], [44.0, 44.09], [45.0, 45.74], [47.0, 47.51], [53.0, 54.41], [55.0, 56.69], [61.0, 61.26], [62.0, 67.83], [83.0, 84.32], [86.0, 87.13], [88.0, 89.18], [91.0, 93.23]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 73.21, 0.0, 0.0, 0.0, 78.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 48.02, 0.0, 0.0, 0.0, 98.27], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 39.01], ["fly, housefly", 17.05], ["insect", 9.94]], null, null, null, null], "duration": [0.04, 16.29, 1.33, 1.92, 0.52, 4.39, 0.09, 0.74, 0.51, 1.41, 1.69, 0.26, 5.83, 1.32, 1.13, 1.18, 2.23]}
|
annotations_filtered/i31XFSORRfc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[27.0, 35.72], [37.0, 45.1], [46.0, 51.04], [58.0, 64.05], [66.0, 66.5], [68.0, 95.77], [96.0, 106.76], [108.0, 111.96], [113.0, 123.42], [125.0, 125.27], [126.0, 131.52], [133.0, 135.58], [139.0, 159.33]], "keep_status": [true, false, false, false, false, false, true, false, false, false, false, false, true], "silence_prob": [28.83, 29.49, 29.61, 34.56, 0.0, 32.56, 30.6, 42.58, 30.03, 0.0, 61.18, 29.71, 29.77], "audiomae_on_audioset": [[["music", 31.67], ["hum", 21.22], ["rumble", 10.66]], [["hum", 30.48], ["mains hum", 24.58], ["rumble", 15.42]], [["music", 60.87], ["hum", 5.59], ["mains hum", 4.6]], [["hum", 29.71], ["music", 24.93], ["mains hum", 21.21]], null, [["music", 43.6], ["hum", 29.88], ["mains hum", 12.04]], [["hum", 41.93], ["mains hum", 14.03], ["music", 14.03]], [["music", 67.71], ["sonar", 4.39], ["synthesizer", 3.43]], [["music", 57.83], ["scary music", 16.85], ["speech", 9.69]], null, null, [["music", 51.76], ["speech", 23.55], ["mains hum", 6.43]], [["music", 44.19], ["buzz", 9.36], ["mains hum", 5.14]]], "duration": [8.72, 8.1, 5.04, 6.05, 0.5, 27.77, 10.76, 3.96, 10.42, 0.27, 5.52, 2.58, 20.33]}
|
annotations_filtered/i3EF63p3v-I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.1], [10.0, 11.82], [13.0, 16.02], [17.0, 23.95], [29.0, 35.56], [42.0, 43.44], [45.0, 47.19], [48.0, 48.74], [50.0, 50.6], [51.0, 52.05], [56.0, 56.35]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 68.54, 67.0, 64.18, 0.0, 90.08, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [0.1, 1.82, 3.02, 6.95, 6.56, 1.44, 2.19, 0.74, 0.6, 1.05, 0.35]}
|
annotations_filtered/i3JbGwGNRI8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.52], [9.0, 8.87], [11.0, 11.94], [19.0, 19.06], [20.0, 21.96], [23.0, 31.6]], "keep_status": [false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 35.68], "audiomae_on_audioset": [null, null, null, null, null, [["hum", 24.86], ["music", 12.08], ["mains hum", 10.71]]], "duration": [0.52, -0.13, 0.94, 0.06, 1.96, 8.6]}
|
annotations_filtered/i3VNgECX8Ko_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 20.49], [24.0, 26.49], [29.0, 61.16], [63.0, 70.71], [71.0, 71.0], [72.0, 72.13], [76.0, 111.48], [112.0, 111.52]], "keep_status": [false, true, false, true, false, false, false, false], "silence_prob": [0.0, 36.41, 0.0, 34.15, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 42.58], ["hum", 8.19], ["vehicle", 6.54]], null, [["music", 45.39], ["speech", 14.19], ["throbbing", 6.81]], null, null, null, null], "duration": [1.49, 2.49, 32.16, 7.71, 0.0, 0.13, 35.48, -0.48]}
|
annotations_filtered/i3xjZomB1s0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.38], [6.0, 18.35], [20.0, 19.7], [33.0, 40.29], [41.0, 41.76], [43.0, 43.6], [44.0, 45.06], [64.0, 66.68], [68.0, 69.01], [70.0, 70.56], [71.0, 72.35], [74.0, 74.58], [79.0, 79.66], [82.0, 82.46], [88.0, 89.01], [92.0, 92.52], [93.0, 94.14], [96.0, 96.47], [99.0, 100.01], [102.0, 107.74], [114.0, 114.39], [115.0, 115.94], [118.0, 118.2], [128.0, 129.9]], "keep_status": [false, true, false, true, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 30.31, 0.0, 32.8, 0.0, 0.0, 0.0, 34.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30.67, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["sound effect", 12.61], ["vehicle", 12.56], ["buzz", 10.13]], null, [["music", 32.49], ["hum", 19.25], ["mains hum", 13.73]], null, null, null, [["music", 29.43], ["didgeridoo", 10.86], ["foghorn", 9.1]], null, null, null, null, null, null, null, null, null, null, null, [["music", 52.53], ["theremin", 19.7], ["brass instrument", 5.91]], null, null, null, null], "duration": [1.38, 12.35, -0.3, 7.29, 0.76, 0.6, 1.06, 2.68, 1.01, 0.56, 1.35, 0.58, 0.66, 0.46, 1.01, 0.52, 1.14, 0.47, 1.01, 5.74, 0.39, 0.94, 0.2, 1.9]}
|
annotations_filtered/i3yO0OagpNY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[17.0, 23.45], [24.0, 33.45], [34.0, 34.2], [37.0, 37.77], [41.0, 41.76], [42.0, 59.95], [61.0, 83.2], [83.0, 86.21], [91.0, 116.95], [118.0, 123.75], [124.0, 124.88], [128.0, 154.95]], "keep_status": [true, true, false, false, false, true, true, true, true, true, false, true], "silence_prob": [30.42, 33.9, 0.0, 0.0, 0.0, 35.73, 42.13, 33.85, 30.39, 28.88, 0.0, 28.82], "audiomae_on_audioset": [[["music", 34.23], ["brass instrument", 16.41], ["foghorn", 7.97]], [["music", 50.74], ["brass instrument", 8.72], ["didgeridoo", 8.42]], null, null, null, [["music", 47.46], ["effects unit", 10.6], ["musical instrument", 7.21]], [["music", 51.33], ["synthesizer", 11.76], ["theremin", 5.42]], [["music", 47.53], ["musical instrument", 6.59], ["foghorn", 2.95]], [["music", 26.82], ["theremin", 10.71], ["trombone", 7.77]], [["machine gun", 16.76], ["synthesizer", 13.69], ["music", 11.99]], null, [["music", 62.02], ["electronic music", 3.5], ["eruption", 2.69]]], "duration": [6.45, 9.45, 0.2, 0.77, 0.76, 17.95, 22.2, 3.21, 25.95, 5.75, 0.88, 26.95]}
|
annotations_filtered/i4GeD9FWdG4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.86], [9.0, 9.93], [14.0, 15.18], [17.0, 18.6], [20.0, 20.6], [27.0, 28.46], [32.0, 33.05], [35.0, 39.46], [42.0, 41.81], [44.0, 44.42], [45.0, 45.08], [49.0, 49.38], [53.0, 55.19], [57.0, 58.06], [60.0, 60.64], [64.0, 64.2], [66.0, 69.69], [70.0, 74.17], [77.0, 80.52], [82.0, 84.16], [84.0, 84.72], [86.0, 86.14], [86.0, 87.05]], "keep_status": [false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 48.65, 0.0, 0.0, 0.0, 0.0, 85.9, 0.0, 0.0, 0.0, 69.47, 88.1, 80.64, 88.46, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["rumble", 6.95], ["stomach rumble", 6.94], ["speech", 6.47]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.86, 0.93, 1.18, 1.6, 0.6, 1.46, 1.05, 4.46, -0.19, 0.42, 0.08, 0.38, 2.19, 1.06, 0.64, 0.2, 3.69, 4.17, 3.52, 2.16, 0.72, 0.14, 1.05]}
|
annotations_filtered/i4M2tehIejI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[29.0, 33.22], [34.0, 41.0], [43.0, 45.18], [48.0, 57.35], [58.0, 97.63]], "keep_status": [false, false, false, false, false], "silence_prob": [72.6, 96.77, 99.76, 58.72, 0.0], "audiomae_on_audioset": [null, null, null, null, null], "duration": [4.22, 7.0, 2.18, 9.35, 39.63]}
|
annotations_filtered/i4NIiCSEiTg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.48], [6.0, 6.08], [11.0, 11.65], [20.0, 20.83], [21.0, 30.48], [32.0, 32.44], [35.0, 41.69], [44.0, 45.0], [46.0, 47.71], [48.0, 48.86], [49.0, 52.07], [53.0, 54.09], [54.0, 54.67], [55.0, 56.13], [57.0, 57.82], [59.0, 61.33], [64.0, 65.67], [68.0, 73.84], [79.0, 87.59], [88.0, 91.74], [93.0, 96.15], [97.0, 98.54], [100.0, 111.79], [113.0, 114.12], [115.0, 123.87], [126.0, 126.1], [129.0, 130.77]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 66.15, 0.0, 49.87, 0.0, 0.0, 0.0, 58.98, 0.0, 0.0, 0.0, 0.0, 73.97, 0.0, 84.07, 61.97, 76.53, 83.7, 0.0, 38.72, 0.0, 40.59, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 46.88], ["guitar", 9.8], ["singing bowl", 6.34]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 43.5], ["hum", 7.69], ["cello", 4.24]], null, [["music", 45.64], ["hum", 4.52], ["musical instrument", 3.6]], null, null], "duration": [0.48, 0.08, 0.65, 0.83, 9.48, 0.44, 6.69, 1.0, 1.71, 0.86, 3.07, 1.09, 0.67, 1.13, 0.82, 2.33, 1.67, 5.84, 8.59, 3.74, 3.15, 1.54, 11.79, 1.12, 8.87, 0.1, 1.77]}
|
annotations_filtered/i4NRgUeziqA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 79.54], [84.0, 116.07], [117.0, 120.28], [121.0, 121.54], [124.0, 125.05]], "keep_status": [false, false, true, false, false], "silence_prob": [0.0, 0.0, 29.3, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["speech", 46.52], ["eruption", 11.52], ["explosion", 5.15]], null, null], "duration": [75.54, 32.07, 3.28, 0.54, 1.05]}
|
annotations_filtered/i4h9xcdtyrE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 12.88], [16.0, 16.34], [17.0, 18.42], [25.0, 25.29], [33.0, 33.89], [36.0, 36.27], [39.0, 44.73], [45.0, 45.5], [46.0, 45.93], [50.0, 51.11], [52.0, 52.0], [52.0, 52.32], [53.0, 53.3], [61.0, 61.05], [81.0, 81.72], [83.0, 83.44], [84.0, 84.27], [111.0, 111.27], [111.0, 116.19], [118.0, 119.18], [120.0, 120.34], [123.0, 123.85]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 47.12, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 79.2], ["sidetone", 2.38], ["inside, small room", 1.45]], null, null, null], "duration": [0.88, 0.34, 1.42, 0.29, 0.89, 0.27, 5.73, 0.5, -0.07, 1.11, 0.0, 0.32, 0.3, 0.05, 0.72, 0.44, 0.27, 0.27, 5.19, 1.18, 0.34, 0.85]}
|
annotations_filtered/i5Y6BTlx37s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 15.33], [16.0, 17.05], [19.0, 19.63], [26.0, 37.83], [39.0, 40.58], [46.0, 50.5], [53.0, 58.45], [60.0, 60.42]], "keep_status": [false, false, false, true, false, true, false, false], "silence_prob": [76.7, 0.0, 0.0, 37.62, 0.0, 35.89, 35.22, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 26.19], ["hum", 14.35], ["gong", 11.92]], null, [["cattle, bovinae", 23.37], ["moo", 17.69], ["livestock, farm animals, working animals", 14.29]], [["music", 58.48], ["theremin", 9.94], ["speech", 6.05]], null], "duration": [7.33, 1.05, 0.63, 11.83, 1.58, 4.5, 5.45, 0.42]}
|
annotations_filtered/i5dTE5dgWOw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[84.0, 93.66], [95.0, 179.34], [184.0, 184.5], [187.0, 188.03], [189.0, 190.38], [191.0, 208.95]], "keep_status": [false, false, false, false, false, false], "silence_prob": [29.22, 0.0, 0.0, 0.0, 0.0, 30.58], "audiomae_on_audioset": [[["music", 40.22], ["hum", 24.9], ["throbbing", 9.57]], null, null, null, null, [["speech", 63.8], ["hum", 11.05], ["music", 7.46]]], "duration": [9.66, 84.34, 0.5, 1.03, 1.38, 17.95]}
|
annotations_filtered/i5j1wWY-qus_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.5], [12.0, 11.67], [12.0, 13.49], [16.0, 16.28], [17.0, 19.3], [23.0, 23.55], [28.0, 33.47], [36.0, 37.54], [39.0, 40.12], [43.0, 44.66], [48.0, 49.52], [50.0, 52.0], [56.0, 57.1], [65.0, 65.11]], "keep_status": [true, false, false, false, true, false, false, false, false, false, false, true, false, false], "silence_prob": [34.88, 0.0, 0.0, 0.0, 34.23, 0.0, 42.15, 0.0, 0.0, 0.0, 0.0, 48.56, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 44.91], ["hum", 7.62], ["mains hum", 4.51]], null, null, null, [["cattle, bovinae", 24.86], ["livestock, farm animals, working animals", 16.11], ["moo", 15.28]], null, [["livestock, farm animals, working animals", 35.7], ["cattle, bovinae", 24.48], ["moo", 15.94]], null, null, null, null, [["livestock, farm animals, working animals", 20.39], ["cattle, bovinae", 9.35], ["sidetone", 6.12]], null, null], "duration": [2.5, -0.33, 1.49, 0.28, 2.3, 0.55, 5.47, 1.54, 1.12, 1.66, 1.52, 2.0, 1.1, 0.11]}
|
annotations_filtered/i5jTH89HjTA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.1], [21.0, 22.17], [23.0, 26.42], [28.0, 28.59], [31.0, 31.46], [43.0, 44.19], [45.0, 49.94], [53.0, 53.2], [59.0, 79.95], [81.0, 90.19], [92.0, 94.1], [117.0, 118.37], [119.0, 123.16], [124.0, 126.18], [127.0, 128.39], [132.0, 132.88], [136.0, 136.88]], "keep_status": [false, false, false, false, false, false, true, false, true, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 45.4, 0.0, 0.0, 0.0, 43.77, 0.0, 31.21, 42.42, 53.78, 0.0, 64.86, 65.91, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["hum", 32.88], ["mains hum", 29.34], ["throbbing", 8.61]], null, null, null, [["fly, housefly", 29.01], ["frog", 25.17], ["insect", 12.64]], null, [["music", 49.38], ["theremin", 8.11], ["brass instrument", 5.95]], [["bee, wasp, etc.", 40.04], ["fly, housefly", 14.39], ["speech", 13.82]], null, null, null, null, null, null, null], "duration": [0.1, 1.17, 3.42, 0.59, 0.46, 1.19, 4.94, 0.2, 20.95, 9.19, 2.1, 1.37, 4.16, 2.18, 1.39, 0.88, 0.88]}
|
annotations_filtered/i5mSHPKEbas_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 0.5], [10.0, 13.12], [16.0, 16.58], [17.0, 17.79], [19.0, 19.87], [21.0, 21.02], [23.0, 48.68], [50.0, 50.43], [51.0, 51.6], [52.0, 54.14], [55.0, 56.07], [57.0, 58.4], [59.0, 62.13], [64.0, 67.0], [68.0, 69.48], [70.0, 70.26], [78.0, 78.7], [80.0, 81.18], [85.0, 84.97], [86.0, 87.61], [88.0, 88.92], [90.0, 91.5], [93.0, 94.15], [94.0, 94.69], [98.0, 98.79], [100.0, 101.01], [102.0, 105.63], [106.0, 108.18], [109.0, 109.83], [111.0, 112.51], [114.0, 117.75], [118.0, 119.89], [121.0, 122.72], [124.0, 125.95]], "keep_status": [false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 44.29, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 100.0, 0.0, 0.0, 100.0, 40.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.31, 98.36, 0.0, 0.0, 92.8, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["sidetone", 25.59], ["fly, housefly", 11.73], ["radio", 7.79]], null, null, null, null, null, null, null, null, null, null, null, [["radio", 22.56], ["moo", 11.1], ["cattle, bovinae", 9.85]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.5, 3.12, 0.58, 0.79, 0.87, 0.02, 25.68, 0.43, 0.6, 2.14, 1.07, 1.4, 3.13, 3.0, 1.48, 0.26, 0.7, 1.18, -0.03, 1.61, 0.92, 1.5, 1.15, 0.69, 0.79, 1.01, 3.63, 2.18, 0.83, 1.51, 3.75, 1.89, 1.72, 1.95]}
|
annotations_filtered/i6OCtSqrOQ0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[14.0, 15.94], [18.0, 18.49], [20.0, 21.79], [23.0, 25.96], [29.0, 51.12], [53.0, 55.93], [63.0, 64.05], [65.0, 65.11], [67.0, 67.29], [68.0, 69.77], [71.0, 71.09], [72.0, 77.6], [80.0, 80.77], [83.0, 84.18], [85.0, 85.62], [88.0, 89.9], [90.0, 90.1], [95.0, 100.95], [105.0, 106.17], [107.0, 107.77], [116.0, 116.88], [118.0, 118.37], [121.0, 123.42], [124.0, 125.27], [127.0, 137.35], [138.0, 142.57], [144.0, 144.17], [152.0, 160.19]], "keep_status": [false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 36.32, 41.91, 99.9, 0.0, 0.0, 0.0, 0.0, 0.0, 94.22, 0.0, 0.0, 0.0, 0.0, 0.0, 44.6, 0.0, 0.0, 0.0, 0.0, 55.31, 0.0, 36.25, 33.75, 0.0, 33.26], "audiomae_on_audioset": [null, null, null, [["hum", 21.47], ["creak", 16.66], ["throbbing", 10.75]], [["synthesizer", 23.99], ["music", 16.39], ["effects unit", 7.47]], null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 28.97], ["beatboxing", 28.26], ["vocal music", 7.02]], null, null, null, null, null, null, [["dial tone", 42.47], ["speech", 33.27], ["burping, eructation", 6.19]], [["speech", 50.23], ["moo", 8.31], ["cattle, bovinae", 8.28]], null, [["speech", 66.96], ["sidetone", 6.69], ["radio", 4.21]]], "duration": [1.94, 0.49, 1.79, 2.96, 22.12, 2.93, 1.05, 0.11, 0.29, 1.77, 0.09, 5.6, 0.77, 1.18, 0.62, 1.9, 0.1, 5.95, 1.17, 0.77, 0.88, 0.37, 2.42, 1.27, 10.35, 4.57, 0.17, 8.19]}
|
annotations_filtered/i6klSHVWbrk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 16.95], [23.0, 31.94], [42.0, 75.17]], "keep_status": [false, true, false], "silence_prob": [30.62, 31.85, 0.0], "audiomae_on_audioset": [[["whale vocalization", 79.55], ["noise", 3.06], ["stomach rumble", 2.88]], [["hum", 31.95], ["speech", 12.99], ["music", 12.65]], null], "duration": [4.95, 8.94, 33.17]}
|
annotations_filtered/i6n8VyqaCQ4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 5.29], [6.0, 6.64], [9.0, 8.94], [17.0, 16.95], [18.0, 18.93], [20.0, 21.25], [23.0, 24.7], [26.0, 27.41], [34.0, 34.72], [37.0, 37.66], [38.0, 38.64], [40.0, 40.76], [42.0, 42.04], [45.0, 49.71], [51.0, 52.03], [55.0, 55.75], [64.0, 75.1], [78.0, 79.69], [82.0, 83.02], [84.0, 92.08], [93.0, 94.29], [95.0, 105.44], [106.0, 107.92], [110.0, 115.52], [117.0, 117.56]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.94, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.92, 0.0, 0.0, 52.98, 0.0, 0.0, 90.08, 0.0, 66.63, 0.0, 91.81, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.29, 0.64, -0.06, -0.05, 0.93, 1.25, 1.7, 1.41, 0.72, 0.66, 0.64, 0.76, 0.04, 4.71, 1.03, 0.75, 11.1, 1.69, 1.02, 8.08, 1.29, 10.44, 1.92, 5.52, 0.56]}
|
annotations_filtered/i6oNzS6kCR8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 8.41], [9.0, 11.57], [14.0, 13.88], [15.0, 16.01], [19.0, 20.78], [23.0, 23.7], [25.0, 28.71], [31.0, 31.97], [33.0, 33.52], [34.0, 100.47], [102.0, 101.83]], "keep_status": [true, true, false, false, false, false, true, false, false, false, false], "silence_prob": [35.46, 42.74, 0.0, 0.0, 0.0, 0.0, 44.26, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["fly, housefly", 24.62], ["insect", 16.06], ["mains hum", 9.96]], [["speech", 14.46], ["music", 12.95], ["electric shaver, electric razor", 12.1]], null, null, null, null, [["music", 29.52], ["speech", 18.63], ["ding", 4.24]], null, null, null, null], "duration": [5.41, 2.57, -0.12, 1.01, 1.78, 0.7, 3.71, 0.97, 0.52, 66.47, -0.17]}
|
annotations_filtered/i6ymVjU5hno_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 6.72], [9.0, 9.0], [10.0, 11.5], [13.0, 14.62], [16.0, 19.23], [21.0, 22.65], [24.0, 26.92], [28.0, 30.47], [31.0, 43.11], [45.0, 45.25], [48.0, 48.74], [50.0, 50.57], [52.0, 52.29], [53.0, 54.41], [55.0, 56.71], [59.0, 61.48], [64.0, 66.56], [69.0, 70.44], [72.0, 72.72], [74.0, 76.3], [79.0, 81.33], [84.0, 85.43], [89.0, 89.51], [90.0, 93.31], [95.0, 96.3], [97.0, 98.47], [101.0, 102.64], [104.0, 105.36], [107.0, 107.43], [109.0, 109.48], [110.0, 111.65], [113.0, 113.8], [116.0, 115.67], [118.0, 119.74], [121.0, 123.6], [127.0, 129.03]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [64.52, 0.0, 0.0, 0.0, 92.48, 0.0, 73.36, 85.9, 44.84, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.12, 55.89, 0.0, 0.0, 59.42, 83.88, 0.0, 0.0, 79.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.95, 37.98], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["music", 44.59], ["effects unit", 24.91], ["guitar", 8.76]], null, null, null, null, null, null, [["mains hum", 32.97], ["sidetone", 22.37], ["hum", 21.12]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["mains hum", 30.55], ["speech", 16.95], ["hum", 13.4]], [["speech", 62.34], ["mains hum", 5.85], ["noise", 4.55]]], "duration": [3.72, 0.0, 1.5, 1.62, 3.23, 1.65, 2.92, 2.47, 12.11, 0.25, 0.74, 0.57, 0.29, 1.41, 1.71, 2.48, 2.56, 1.44, 0.72, 2.3, 2.33, 1.43, 0.51, 3.31, 1.3, 1.47, 1.64, 1.36, 0.43, 0.48, 1.65, 0.8, -0.33, 1.74, 2.6, 2.03]}
|
annotations_filtered/i7AUpGXLDdk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[25.0, 25.22], [36.0, 37.72], [53.0, 52.91], [63.0, 63.71]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [0.22, 1.72, -0.09, 0.71]}
|
annotations_filtered/i7Jg_6-fYF8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 5.24], [6.0, 11.72], [14.0, 32.58], [35.0, 36.88], [41.0, 41.05], [42.0, 42.72], [45.0, 45.08], [46.0, 46.5], [48.0, 50.04], [54.0, 54.08], [56.0, 56.57], [62.0, 67.98], [69.0, 76.77], [80.0, 81.26], [86.0, 86.31], [88.0, 118.2], [120.0, 152.44]], "keep_status": [false, true, true, false, false, false, false, false, true, false, false, true, false, false, false, false, false], "silence_prob": [0.0, 32.58, 30.45, 0.0, 0.0, 0.0, 0.0, 0.0, 33.56, 0.0, 0.0, 46.29, 51.02, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 44.86], ["livestock, farm animals, working animals", 6.42], ["fly, housefly", 5.37]], [["speech", 27.71], ["explosion", 21.89], ["artillery fire", 11.64]], null, null, null, null, null, [["speech", 32.85], ["whack, thwack", 23.92], ["hum", 5.26]], null, null, [["speech", 49.74], ["hum", 11.0], ["mains hum", 5.22]], null, null, null, null, null], "duration": [0.24, 5.72, 18.58, 1.88, 0.05, 0.72, 0.08, 0.5, 2.04, 0.08, 0.57, 5.98, 7.77, 1.26, 0.31, 30.2, 32.44]}
|
annotations_filtered/i7KcAEPxDwQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.16], [10.0, 10.18], [11.0, 34.86], [43.0, 43.93], [48.0, 56.94], [59.0, 74.76], [78.0, 78.83]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 32.04, 0.0, 35.53, 69.74, 0.0], "audiomae_on_audioset": [null, null, [["speech", 73.25], ["music", 16.39], ["didgeridoo", 2.4]], null, [["hum", 50.14], ["mains hum", 27.54], ["music", 4.37]], null, null], "duration": [0.16, 0.18, 23.86, 0.93, 8.94, 15.76, 0.83]}
|
annotations_filtered/i7hF7BAKV_I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[31.0, 55.81], [56.0, 56.81], [57.0, 62.33], [65.0, 82.26], [83.0, 83.91], [84.0, 86.36]], "keep_status": [true, false, false, false, false, false], "silence_prob": [28.66, 0.0, 34.68, 36.37, 0.0, 53.72], "audiomae_on_audioset": [[["music", 45.12], ["speech", 12.72], ["beatboxing", 6.75]], null, [["music", 52.12], ["speech", 14.91], ["didgeridoo", 4.78]], [["speech", 34.56], ["fart", 29.46], ["fly, housefly", 12.8]], null, null], "duration": [24.81, 0.81, 5.33, 17.26, 0.91, 2.36]}
|
annotations_filtered/i7hk-TupE5g_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.37], [6.0, 9.0], [14.0, 15.67], [17.0, 18.18], [20.0, 19.8], [21.0, 21.47], [23.0, 26.08], [27.0, 29.02], [35.0, 37.1], [40.0, 42.3], [46.0, 47.07], [50.0, 52.07], [54.0, 55.38], [62.0, 62.95], [64.0, 65.45], [68.0, 68.45], [70.0, 70.73], [72.0, 74.49], [76.0, 76.28], [77.0, 77.43], [80.0, 81.06], [83.0, 83.52], [85.0, 85.75], [87.0, 88.74], [98.0, 98.24], [101.0, 102.27], [105.0, 105.68], [110.0, 112.01], [114.0, 115.15], [120.0, 121.32], [127.0, 127.41], [131.0, 131.46]], "keep_status": [false, false, false, false, false, false, false, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 32.42, 0.0, 0.0, 0.0, 0.0, 38.37, 37.3, 33.25, 36.02, 0.0, 33.4, 0.0, 0.0, 0.0, 0.0, 0.0, 33.87, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.27, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 47.06], ["didgeridoo", 33.52], ["hum", 3.51]], null, null, null, null, [["mains hum", 47.05], ["hum", 35.17], ["music", 4.99]], [["wind instrument, woodwind instrument", 9.45], ["shofar", 8.14], ["speech", 7.77]], [["music", 23.11], ["mains hum", 11.33], ["hum", 10.84]], [["mains hum", 13.68], ["hum", 11.76], ["music", 7.19]], null, [["speech", 54.41], ["music", 13.1], ["hum", 7.43]], null, null, null, null, null, [["sidetone", 62.51], ["mains hum", 23.0], ["hum", 6.4]], null, null, null, null, null, null, null, null, null, [["music", 42.51], ["didgeridoo", 5.5], ["speech", 5.18]], null, null, null, null], "duration": [0.37, 3.0, 1.67, 1.18, -0.2, 0.47, 3.08, 2.02, 2.1, 2.3, 1.07, 2.07, 1.38, 0.95, 1.45, 0.45, 0.73, 2.49, 0.28, 0.43, 1.06, 0.52, 0.75, 1.74, 0.24, 1.27, 0.68, 2.01, 1.15, 1.32, 0.41, 0.46]}
|
annotations_filtered/i7tGEEWQIhQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.47], [3.0, 5.56], [6.0, 7.18], [9.0, 12.38], [14.0, 14.12], [15.0, 18.79], [20.0, 21.78], [23.0, 25.34], [28.0, 29.67], [31.0, 31.72], [32.0, 33.17], [37.0, 37.67], [38.0, 38.92], [40.0, 40.63], [44.0, 49.1], [50.0, 51.76], [52.0, 53.77], [54.0, 56.56], [59.0, 60.27], [63.0, 63.14], [69.0, 70.06], [71.0, 71.93], [95.0, 99.57], [101.0, 103.47]], "keep_status": [false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 59.68, 0.0, 82.07, 0.0, 90.08, 0.0, 49.54, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 81.89, 0.0, 0.0, 43.38, 0.0, 0.0, 0.0, 0.0, 100.0, 82.97], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["speech", 29.34], ["sine wave", 16.85], ["sidetone", 10.24]], null, null, null, null, null, null, null, null, null, [["sidetone", 58.48], ["speech", 23.71], ["dial tone", 3.99]], null, null, null, null, null, null], "duration": [0.47, 2.56, 1.18, 3.38, 0.12, 3.79, 1.78, 2.34, 1.67, 0.72, 1.17, 0.67, 0.92, 0.63, 5.1, 1.76, 1.77, 2.56, 1.27, 0.14, 1.06, 0.93, 4.57, 2.47]}
|
annotations_filtered/i7vKbmKF9VI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.48], [11.0, 12.9], [13.0, 14.84], [16.0, 17.83], [29.0, 30.06], [34.0, 34.16], [40.0, 43.11], [44.0, 44.71], [50.0, 57.4], [59.0, 60.64], [62.0, 63.41], [70.0, 71.05], [73.0, 73.3], [77.0, 78.51], [91.0, 91.25], [93.0, 97.41], [98.0, 99.42], [101.0, 104.21]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.99, 0.0, 99.48], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.48, 1.9, 1.84, 1.83, 1.06, 0.16, 3.11, 0.71, 7.4, 1.64, 1.41, 1.05, 0.3, 1.51, 0.25, 4.41, 1.42, 3.21]}
|
annotations_filtered/i812ZsyyeLg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.57], [4.0, 7.96], [9.0, 10.74], [12.0, 12.88], [17.0, 17.52], [19.0, 19.55], [22.0, 23.41], [24.0, 24.63], [26.0, 46.16], [48.0, 65.74], [66.0, 66.45], [68.0, 68.59], [70.0, 69.82], [70.0, 70.33], [72.0, 73.13], [75.0, 74.92], [77.0, 77.41], [79.0, 78.97], [82.0, 84.5], [85.0, 85.63], [89.0, 89.7], [90.0, 92.13], [93.0, 94.56], [101.0, 101.66], [106.0, 114.59], [115.0, 116.34], [117.0, 123.74], [125.0, 125.27]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 50.86, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 49.82, 66.63, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 86.45, 0.0, 0.0, 96.29, 0.0, 0.0, 73.36, 0.0, 33.47, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["mosquito", 67.28], ["fly, housefly", 21.75], ["insect", 9.75]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["livestock, farm animals, working animals", 18.45], ["cattle, bovinae", 17.78], ["moo", 14.76]], null], "duration": [0.57, 3.96, 1.74, 0.88, 0.52, 0.55, 1.41, 0.63, 20.16, 17.74, 0.45, 0.59, -0.18, 0.33, 1.13, -0.08, 0.41, -0.03, 2.5, 0.63, 0.7, 2.13, 1.56, 0.66, 8.59, 1.34, 6.74, 0.27]}
|
annotations_filtered/i94ldGNNSQ0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 38.45], [40.0, 40.71], [41.0, 60.84], [63.0, 112.45], [112.0, 112.55]], "keep_status": [false, false, true, false, false], "silence_prob": [0.0, 0.0, 36.6, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["hum", 29.41], ["music", 17.12], ["mains hum", 13.52]], null, null], "duration": [35.45, 0.71, 19.84, 49.45, 0.55]}
|
annotations_filtered/i9DMpMCCxuE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[12.0, 13.17], [14.0, 15.15], [17.0, 17.17], [27.0, 28.09], [32.0, 33.88], [34.0, 38.97], [42.0, 42.99]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 31.35, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 52.34], ["speech", 33.56], ["synthesizer", 5.91]], null], "duration": [1.17, 1.15, 0.17, 1.09, 1.88, 4.97, 0.99]}
|
annotations_filtered/i9Iy9amffa4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.4], [13.0, 13.05], [15.0, 15.9], [16.0, 16.7], [17.0, 17.68], [19.0, 19.53], [21.0, 21.59], [23.0, 24.17], [27.0, 26.81], [28.0, 28.14], [29.0, 29.49], [32.0, 32.85], [45.0, 47.6], [60.0, 61.38], [62.0, 62.43], [70.0, 69.94], [91.0, 92.7], [94.0, 94.54], [98.0, 111.82], [113.0, 144.31], [148.0, 150.16], [152.0, 153.71]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.25, 0.0, 0.0, 0.0, 0.0, 0.0, 31.2, 0.0, 31.36, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, [["music", 39.94], ["trombone", 19.42], ["brass instrument", 17.97]], null, null, null, null, null, [["music", 55.24], ["whistling", 29.99], ["theremin", 6.47]], null, [["music", 22.85], ["cacophony", 6.41], ["radio", 6.04]], null], "duration": [0.4, 0.05, 0.9, 0.7, 0.68, 0.53, 0.59, 1.17, -0.19, 0.14, 0.49, 0.85, 2.6, 1.38, 0.43, -0.06, 1.7, 0.54, 13.82, 31.31, 2.16, 1.71]}
|
annotations_filtered/i9KJXFbkMH0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 1.92], [4.0, 3.91], [7.0, 7.2], [12.0, 80.15]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [-0.08, -0.09, 0.2, 68.15]}
|
annotations_filtered/i9NIwHKBqy0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 2.67], [25.0, 30.38], [31.0, 37.25], [39.0, 39.85], [53.0, 55.53], [61.0, 61.47], [64.0, 64.64], [70.0, 70.92], [74.0, 93.9], [95.0, 99.55], [114.0, 127.57], [130.0, 145.07], [146.0, 146.48], [148.0, 157.89], [158.0, 157.94], [158.0, 158.53]], "keep_status": [false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 34.65, 32.77, 0.0, 31.91, 0.0, 0.0, 0.0, 30.01, 29.58, 29.74, 30.02, 0.0, 29.83, 0.0, 0.0], "audiomae_on_audioset": [null, [["whale vocalization", 45.08], ["speech", 14.03], ["animal", 6.72]], [["whale vocalization", 15.04], ["theremin", 9.6], ["speech", 8.74]], null, [["music", 71.97], ["speech", 11.51], ["musical instrument", 2.81]], null, null, null, [["music", 51.49], ["speech", 18.0], ["hum", 8.15]], [["theremin", 53.37], ["music", 33.38], ["musical instrument", 3.0]], [["music", 65.04], ["speech", 7.64], ["didgeridoo", 4.63]], [["music", 69.53], ["musical instrument", 10.85], ["theremin", 5.6]], null, [["music", 69.01], ["speech", 15.6], ["musical instrument", 1.99]], null, null], "duration": [-0.33, 5.38, 6.25, 0.85, 2.53, 0.47, 0.64, 0.92, 19.9, 4.55, 13.57, 15.07, 0.48, 9.89, -0.06, 0.53]}
|
annotations_filtered/i9_lCyG67Rc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 10.98], [16.0, 16.93], [23.0, 24.02], [36.0, 36.56], [49.0, 49.57], [52.0, 52.44], [58.0, 61.2], [61.0, 66.88], [70.0, 69.94], [72.0, 71.98], [75.0, 77.18], [82.0, 84.65], [90.0, 92.15]], "keep_status": [false, false, false, false, false, false, true, false, false, false, true, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.16, 32.89, 0.0, 0.0, 31.06, 29.71, 29.9], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 40.74], ["foghorn", 15.5], ["theremin", 9.65]], [["foghorn", 82.07], ["music", 8.62], ["theremin", 2.12]], null, null, [["music", 22.46], ["synthesizer", 8.59], ["foghorn", 8.53]], [["music", 54.1], ["theremin", 28.32], ["musical instrument", 3.37]], [["speech", 41.88], ["vehicle", 5.78], ["crowd", 4.25]]], "duration": [-0.02, 0.93, 1.02, 0.56, 0.57, 0.44, 3.2, 5.88, -0.06, -0.02, 2.18, 2.65, 2.15]}
|