Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/0-0MyjmphsA_filtered.json +1 -0
- annotations_filtered/0-81bpcuz44_filtered.json +1 -0
- annotations_filtered/0-EcLwovpbU_filtered.json +1 -0
- annotations_filtered/0-HM2VCdrC0_filtered.json +1 -0
- annotations_filtered/0-Whu5Hlbz8_filtered.json +1 -0
- annotations_filtered/0-lcqIuVaR8_filtered.json +1 -0
- annotations_filtered/003kLKX8n3E_filtered.json +1 -0
- annotations_filtered/009tNfQRd4o_filtered.json +1 -0
- annotations_filtered/00I2Ofraf4A_filtered.json +1 -0
- annotations_filtered/00QMS3Ldb20_filtered.json +1 -0
- annotations_filtered/00rpUGdvcY0_filtered.json +1 -0
- annotations_filtered/01ClRWyf9I4_filtered.json +1 -0
- annotations_filtered/01OfrTMVeD8_filtered.json +1 -0
- annotations_filtered/01RWw-3AKaE_filtered.json +1 -0
- annotations_filtered/01ZWXIY1mcs_filtered.json +1 -0
- annotations_filtered/01ovMSvDohw_filtered.json +1 -0
- annotations_filtered/01qhgR0WsnA_filtered.json +1 -0
- annotations_filtered/02064E1SHtQ_filtered.json +1 -0
- annotations_filtered/029Mdp9jYiY_filtered.json +1 -0
- annotations_filtered/02A2a-aEvmI_filtered.json +1 -0
- annotations_filtered/02AyhONR_DQ_filtered.json +1 -0
- annotations_filtered/02DzpeBF4es_filtered.json +1 -0
- annotations_filtered/02Or-Hx3yqc_filtered.json +1 -0
- annotations_filtered/03L12Mqkzg8_filtered.json +1 -0
- annotations_filtered/03NoI9KiZOk_filtered.json +1 -0
- annotations_filtered/03QHVB_n6N8_filtered.json +1 -0
- annotations_filtered/03Rl5exupSo_filtered.json +1 -0
- annotations_filtered/03WbdaZCGAA_filtered.json +1 -0
- annotations_filtered/03a-vG6wHDI_filtered.json +1 -0
- annotations_filtered/03jGqiF-0Gg_filtered.json +1 -0
- annotations_filtered/03uEq5dKcFs_filtered.json +1 -0
- annotations_filtered/049R_wOazQI_filtered.json +1 -0
- annotations_filtered/04BZh6E-Nck_filtered.json +1 -0
- annotations_filtered/04s96zDt1RE_filtered.json +1 -0
- annotations_filtered/04uN57jOg-Q_filtered.json +1 -0
- annotations_filtered/04xSMg03sZ0_filtered.json +1 -0
- annotations_filtered/04zHzVrubHk_filtered.json +1 -0
- annotations_filtered/05-e-YTw4r8_filtered.json +1 -0
- annotations_filtered/056HlHORCIU_filtered.json +1 -0
- annotations_filtered/05O77oX6bQE_filtered.json +1 -0
- annotations_filtered/05foBuX_brU_filtered.json +1 -0
- annotations_filtered/05nQ6FtAaYg_filtered.json +1 -0
- annotations_filtered/05qid4p_cfw_filtered.json +1 -0
- annotations_filtered/0668UNhYjXg_filtered.json +1 -0
- annotations_filtered/06B3m6L5fFw_filtered.json +1 -0
- annotations_filtered/06DLNzLaTlE_filtered.json +1 -0
- annotations_filtered/06Its9LhIHQ_filtered.json +1 -0
- annotations_filtered/06L5y4Z9KcE_filtered.json +1 -0
- annotations_filtered/06lJhEc7zIo_filtered.json +1 -0
- annotations_filtered/06qgu4XoNL4_filtered.json +1 -0
annotations_filtered/0-0MyjmphsA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[24.0, 29.22], [31.0, 31.62], [39.0, 49.87], [52.0, 66.21], [67.0, 79.44], [87.0, 87.57], [88.0, 88.74], [94.0, 93.93], [94.0, 93.97]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [34.68, 0.0, 31.74, 32.94, 98.36, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 62.3], ["crowd", 3.99], ["cheering", 3.84]], null, [["speech", 33.18], ["explosion", 20.64], ["artillery fire", 17.56]], [["explosion", 56.22], ["burst, pop", 15.34], ["speech", 7.0]], null, null, null, null, null], "duration": [5.22, 0.62, 10.87, 14.21, 12.44, 0.57, 0.74, -0.07, -0.03]}
|
annotations_filtered/0-81bpcuz44_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.16], [5.0, 81.45], [85.0, 91.74], [94.0, 97.38], [101.0, 139.5], [140.0, 141.42], [142.0, 162.73]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 64.07, 40.93, 0.0, 0.0, 36.75], "audiomae_on_audioset": [null, null, null, [["music", 59.07], ["didgeridoo", 18.25], ["musical instrument", 7.92]], null, null, [["music", 76.76], ["boing", 5.26], ["didgeridoo", 3.37]]], "duration": [1.16, 76.45, 6.74, 3.38, 38.5, 1.42, 20.73]}
|
annotations_filtered/0-EcLwovpbU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[47.0, 47.04], [48.0, 73.89], [75.0, 95.35], [97.0, 99.5], [100.0, 126.87]], "keep_status": [false, false, false, true, false], "silence_prob": [0.0, 32.04, 30.73, 32.85, 32.24], "audiomae_on_audioset": [null, [["music", 72.36], ["didgeridoo", 7.21], ["musical instrument", 2.36]], [["music", 46.13], ["didgeridoo", 29.91], ["throbbing", 2.96]], [["music", 53.59], ["speech", 6.78], ["hum", 4.59]], [["hum", 55.11], ["throbbing", 20.92], ["mains hum", 8.2]]], "duration": [0.04, 25.89, 20.35, 2.5, 26.87]}
|
annotations_filtered/0-HM2VCdrC0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.72], [13.0, 13.83], [23.0, 23.73], [30.0, 31.18], [32.0, 32.68], [33.0, 33.83], [50.0, 51.24], [53.0, 54.03], [59.0, 59.71], [60.0, 61.05], [68.0, 69.33], [72.0, 72.55], [75.0, 75.96], [82.0, 95.12], [96.0, 98.58], [99.0, 100.58], [104.0, 104.58], [108.0, 109.44], [111.0, 117.71]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 95.64, 91.98, 0.0, 0.0, 0.0, 96.04], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.72, 0.83, 0.73, 1.18, 0.68, 0.83, 1.24, 1.03, 0.71, 1.05, 1.33, 0.55, 0.96, 13.12, 2.58, 1.58, 0.58, 1.44, 6.71]}
|
annotations_filtered/0-Whu5Hlbz8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 18.33], [22.0, 30.75], [31.0, 30.79], [33.0, 34.32], [36.0, 36.8], [39.0, 39.41], [40.0, 40.81], [42.0, 43.07], [45.0, 46.38], [48.0, 48.49], [51.0, 52.22], [53.0, 54.21], [57.0, 56.83], [57.0, 59.88], [60.0, 61.48], [64.0, 65.85], [69.0, 71.04], [72.0, 72.62], [74.0, 75.59], [77.0, 77.5], [79.0, 79.2], [80.0, 80.25], [81.0, 82.12], [83.0, 83.67], [84.0, 85.77], [88.0, 88.94], [90.0, 89.88], [92.0, 108.6], [109.0, 117.73]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 64.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 72.9, 0.0, 0.0, 99.21, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 89.19, 47.62], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 68.71], ["fly, housefly", 4.16], ["explosion", 2.49]]], "duration": [0.33, 8.75, -0.21, 1.32, 0.8, 0.41, 0.81, 1.07, 1.38, 0.49, 1.22, 1.21, -0.17, 2.88, 1.48, 1.85, 2.04, 0.62, 1.59, 0.5, 0.2, 0.25, 1.12, 0.67, 1.77, 0.94, -0.12, 16.6, 8.73]}
|
annotations_filtered/0-lcqIuVaR8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.98], [5.0, 5.36], [6.0, 6.56], [11.0, 11.16], [20.0, 20.63], [22.0, 22.08], [27.0, 27.45], [28.0, 28.86], [45.0, 45.57], [47.0, 46.75], [55.0, 58.83]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 31.46], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["vehicle", 20.66], ["car", 13.55], ["noise", 7.39]]], "duration": [0.98, 0.36, 0.56, 0.16, 0.63, 0.08, 0.45, 0.86, 0.57, -0.25, 3.83]}
|
annotations_filtered/003kLKX8n3E_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.25], [4.0, 4.38], [5.0, 5.81], [6.0, 11.26], [12.0, 13.05], [14.0, 14.54], [16.0, 32.24], [33.0, 34.59], [37.0, 43.11], [46.0, 48.96], [55.0, 55.9], [57.0, 60.0], [60.0, 61.18], [62.0, 63.81], [66.0, 66.51], [68.0, 76.81], [77.0, 76.96], [77.0, 80.27], [81.0, 87.34], [89.0, 89.53], [90.0, 90.17], [91.0, 91.45], [92.0, 92.25], [93.0, 92.91], [94.0, 95.18], [96.0, 99.49], [100.0, 100.11], [100.0, 101.36], [102.0, 105.44], [106.0, 107.76], [109.0, 112.04], [113.0, 115.4], [117.0, 118.49], [120.0, 120.28], [121.0, 121.44], [122.0, 122.66], [123.0, 123.28], [125.0, 124.88], [125.0, 125.74]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 60.89, 0.0, 0.0, 40.68, 0.0, 58.13, 75.55, 0.0, 57.4, 0.0, 0.0, 0.0, 51.23, 0.0, 70.86, 53.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 47.43, 0.0, 0.0, 64.52, 0.0, 56.18, 56.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 64.46], ["didgeridoo", 8.5], ["musical instrument", 5.71]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 45.04], ["hum", 12.35], ["throbbing", 6.37]], null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.25, 0.38, 0.81, 5.26, 1.05, 0.54, 16.24, 1.59, 6.11, 2.96, 0.9, 3.0, 1.18, 1.81, 0.51, 8.81, -0.04, 3.27, 6.34, 0.53, 0.17, 0.45, 0.25, -0.09, 1.18, 3.49, 0.11, 1.36, 3.44, 1.76, 3.04, 2.4, 1.49, 0.28, 0.44, 0.66, 0.28, -0.12, 0.74]}
|
annotations_filtered/009tNfQRd4o_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 17.39], [35.0, 35.51], [54.0, 54.58], [56.0, 56.1], [58.0, 58.48], [63.0, 64.3], [68.0, 68.76], [74.0, 73.84], [77.0, 76.72], [91.0, 90.61], [96.0, 96.09], [98.0, 97.97], [104.0, 104.55], [108.0, 108.28], [112.0, 113.44], [116.0, 116.41], [126.0, 128.97], [130.0, 131.58]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [32.98, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.26, 0.0], "audiomae_on_audioset": [[["music", 63.22], ["musical instrument", 6.87], ["clarinet", 4.07]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 46.69], ["didgeridoo", 8.81], ["theremin", 6.4]], null], "duration": [9.39, 0.51, 0.58, 0.1, 0.48, 1.3, 0.76, -0.16, -0.28, -0.39, 0.09, -0.03, 0.55, 0.28, 1.44, 0.41, 2.97, 1.58]}
|
annotations_filtered/00I2Ofraf4A_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 7.25], [10.0, 21.51], [22.0, 22.4], [24.0, 24.7], [25.0, 30.84], [32.0, 33.93], [34.0, 35.92], [36.0, 37.25], [40.0, 42.47], [43.0, 47.21], [49.0, 50.43], [53.0, 57.7], [62.0, 62.43], [65.0, 75.15], [78.0, 82.0], [86.0, 92.55], [98.0, 98.39], [99.0, 99.98], [105.0, 106.22], [111.0, 113.86], [114.0, 122.94], [125.0, 140.32], [142.0, 142.25]], "keep_status": [true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, false, false, false], "silence_prob": [41.2, 28.54, 0.0, 0.0, 35.62, 0.0, 0.0, 0.0, 54.36, 51.44, 0.0, 44.93, 0.0, 52.98, 55.11, 38.32, 0.0, 0.0, 0.0, 47.43, 39.94, 37.8, 0.0], "audiomae_on_audioset": [[["mechanical fan", 18.59], ["mains hum", 14.19], ["hum", 8.8]], [["music", 21.94], ["hum", 10.94], ["throbbing", 9.6]], null, null, [["sidetone", 40.89], ["speech", 37.8], ["music", 6.72]], null, null, null, null, null, null, [["sidetone", 43.6], ["speech", 20.46], ["hum", 6.8]], null, null, null, [["speech", 23.75], ["fly, housefly", 23.68], ["insect", 18.5]], null, null, null, [["speech", 20.82], ["hum", 18.78], ["rumble", 8.39]], [["speech", 64.62], ["music", 11.96], ["sidetone", 6.08]], [["speech", 56.82], ["hum", 16.89], ["sidetone", 7.17]], null], "duration": [2.25, 11.51, 0.4, 0.7, 5.84, 1.93, 1.92, 1.25, 2.47, 4.21, 1.43, 4.7, 0.43, 10.15, 4.0, 6.55, 0.39, 0.98, 1.22, 2.86, 8.94, 15.32, 0.25]}
|
annotations_filtered/00QMS3Ldb20_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[50.0, 57.5], [58.0, 59.88], [61.0, 64.22], [65.0, 65.91], [67.0, 70.65], [71.0, 73.95], [76.0, 105.14], [106.0, 136.36], [139.0, 156.24], [159.0, 203.03], [204.0, 204.42], [207.0, 209.16], [211.0, 215.13]], "keep_status": [false, false, true, false, false, false, false, false, true, false, false, true, false], "silence_prob": [29.2, 0.0, 29.97, 0.0, 29.52, 28.74, 28.53, 0.0, 28.74, 0.0, 0.0, 38.43, 33.42], "audiomae_on_audioset": [[["music", 53.55], ["speech", 21.69], ["hum", 4.38]], null, [["music", 25.39], ["hum", 17.56], ["mains hum", 16.62]], null, [["speech", 45.16], ["music", 44.0], ["musical instrument", 2.37]], [["throbbing", 30.63], ["music", 27.0], ["hum", 21.52]], [["music", 58.26], ["musical instrument", 11.4], ["effects unit", 2.72]], null, [["music", 46.43], ["mains hum", 17.38], ["hum", 5.1]], null, null, [["hum", 37.84], ["mains hum", 18.79], ["throbbing", 5.32]], [["hum", 45.32], ["mains hum", 18.49], ["throbbing", 9.37]]], "duration": [7.5, 1.88, 3.22, 0.91, 3.65, 2.95, 29.14, 30.36, 17.24, 44.03, 0.42, 2.16, 4.13]}
|
annotations_filtered/00rpUGdvcY0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 24.24], [28.0, 30.86], [38.0, 60.07], [61.0, 68.28], [73.0, 77.21], [78.0, 80.38], [88.0, 95.98], [97.0, 100.62]], "keep_status": [false, false, true, false, true, true, false, true], "silence_prob": [31.27, 30.37, 31.14, 31.47, 32.37, 30.23, 31.73, 36.17], "audiomae_on_audioset": [[["music", 63.73], ["smash, crash", 5.77], ["cacophony", 4.25]], [["music", 64.9], ["musical instrument", 5.34], ["smash, crash", 2.89]], [["music", 46.23], ["didgeridoo", 12.01], ["theremin", 10.29]], [["speech", 45.81], ["music", 21.66], ["thunk", 12.73]], [["music", 36.27], ["breaking", 16.28], ["smash, crash", 14.16]], [["music", 34.33], ["breaking", 27.74], ["crack", 4.61]], [["music", 55.63], ["hum", 10.76], ["mains hum", 4.77]], [["music", 51.15], ["fart", 6.72], ["musical instrument", 4.45]]], "duration": [14.24, 2.86, 22.07, 7.28, 4.21, 2.38, 7.98, 3.62]}
|
annotations_filtered/01ClRWyf9I4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 1.0], [2.0, 2.79], [5.0, 12.55], [13.0, 18.15], [20.0, 27.84], [29.0, 30.69], [31.0, 31.41], [33.0, 43.26], [49.0, 66.55], [68.0, 71.74], [73.0, 80.43], [81.0, 84.55], [85.0, 92.4], [95.0, 100.79], [104.0, 107.86], [110.0, 110.88], [113.0, 114.18], [122.0, 133.86], [135.0, 136.63], [140.0, 142.37], [144.0, 147.16], [148.0, 150.03], [152.0, 154.04], [157.0, 157.81], [162.0, 165.55], [168.0, 184.4], [186.0, 201.11], [204.0, 209.68], [210.0, 232.68], [234.0, 239.87], [241.0, 253.86]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 45.65, 81.71, 99.31, 0.0, 0.0, 83.88, 40.95, 80.82, 91.64, 78.89, 87.19, 89.01, 92.97, 0.0, 0.0, 91.3, 0.0, 75.55, 91.64, 89.01, 93.6, 0.0, 94.81, 83.52, 56.93, 58.64, 39.05, 88.64, 92.64], "audiomae_on_audioset": [null, null, [["music", 72.43], ["plucked string instrument", 2.37], ["guitar", 2.31]], null, null, null, null, null, [["music", 57.53], ["theremin", 18.38], ["whale vocalization", 8.56]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 82.77], ["scary music", 3.53], ["theremin", 2.28]], null, null], "duration": [1.0, 0.79, 7.55, 5.15, 7.84, 1.69, 0.41, 10.26, 17.55, 3.74, 7.43, 3.55, 7.4, 5.79, 3.86, 0.88, 1.18, 11.86, 1.63, 2.37, 3.16, 2.03, 2.04, 0.81, 3.55, 16.4, 15.11, 5.68, 22.68, 5.87, 12.86]}
|
annotations_filtered/01OfrTMVeD8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 4.06], [6.0, 5.81], [7.0, 8.5], [11.0, 12.55], [14.0, 15.23], [17.0, 17.86], [19.0, 19.18], [20.0, 21.46], [23.0, 24.24], [25.0, 25.52], [27.0, 28.07], [31.0, 31.55], [33.0, 33.47], [36.0, 36.19], [38.0, 37.89], [39.0, 39.07], [41.0, 42.35], [43.0, 43.98], [45.0, 46.18], [48.0, 48.17], [49.0, 49.59], [51.0, 52.42], [53.0, 53.3], [54.0, 55.17], [58.0, 57.96], [60.0, 60.94], [64.0, 64.62], [66.0, 66.36], [68.0, 68.0], [70.0, 70.09], [72.0, 71.68], [74.0, 74.36], [78.0, 78.29], [80.0, 80.28], [83.0, 84.03], [85.0, 85.67], [87.0, 87.13], [89.0, 88.86], [91.0, 91.93], [94.0, 94.53], [96.0, 96.8], [97.0, 97.65], [102.0, 103.55], [105.0, 106.25], [107.0, 107.86], [109.0, 109.27], [110.0, 111.21], [113.0, 114.76], [117.0, 117.34], [119.0, 120.45], [121.0, 121.93], [124.0, 124.31], [126.0, 127.3], [128.0, 128.53], [130.0, 131.13], [134.0, 134.81], [135.0, 135.46], [137.0, 136.95], [140.0, 140.88], [144.0, 144.15], [147.0, 147.33], [152.0, 151.82], [153.0, 154.4], [162.0, 163.16], [164.0, 164.81], [165.0, 165.77], [167.0, 167.27], [169.0, 169.11], [170.0, 169.94], [172.0, 172.15], [174.0, 174.82], [175.0, 175.8], [178.0, 178.92], [180.0, 181.72], [183.0, 187.0], [188.0, 188.64], [191.0, 191.98], [194.0, 195.07], [196.0, 196.94]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.06, -0.19, 1.5, 1.55, 1.23, 0.86, 0.18, 1.46, 1.24, 0.52, 1.07, 0.55, 0.47, 0.19, -0.11, 0.07, 1.35, 0.98, 1.18, 0.17, 0.59, 1.42, 0.3, 1.17, -0.04, 0.94, 0.62, 0.36, 0.0, 0.09, -0.32, 0.36, 0.29, 0.28, 1.03, 0.67, 0.13, -0.14, 0.93, 0.53, 0.8, 0.65, 1.55, 1.25, 0.86, 0.27, 1.21, 1.76, 0.34, 1.45, 0.93, 0.31, 1.3, 0.53, 1.13, 0.81, 0.46, -0.05, 0.88, 0.15, 0.33, -0.18, 1.4, 1.16, 0.81, 0.77, 0.27, 0.11, -0.06, 0.15, 0.82, 0.8, 0.92, 1.72, 4.0, 0.64, 0.98, 1.07, 0.94]}
|
annotations_filtered/01RWw-3AKaE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/01ZWXIY1mcs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 8.75], [10.0, 10.69], [11.0, 29.0], [31.0, 31.83], [34.0, 43.8], [46.0, 90.88]], "keep_status": [false, false, true, false, false, false], "silence_prob": [0.0, 0.0, 35.06, 0.0, 33.41, 0.0], "audiomae_on_audioset": [null, null, [["didgeridoo", 50.65], ["music", 12.04], ["throbbing", 6.76]], null, [["music", 47.13], ["gong", 20.76], ["timpani", 7.14]], null], "duration": [0.75, 0.69, 18.0, 0.83, 9.8, 44.88]}
|
annotations_filtered/01ovMSvDohw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[21.0, 33.02], [35.0, 35.73], [36.0, 65.1], [65.0, 80.08], [81.0, 81.82], [82.0, 86.78], [88.0, 88.28], [89.0, 89.36], [90.0, 92.67], [94.0, 94.14], [94.0, 95.17], [96.0, 96.57], [98.0, 99.74], [102.0, 104.99]], "keep_status": [true, false, true, false, false, true, false, false, true, false, false, false, false, false], "silence_prob": [42.11, 0.0, 33.29, 34.92, 0.0, 37.06, 0.0, 0.0, 36.53, 0.0, 0.0, 0.0, 0.0, 51.82], "audiomae_on_audioset": [[["speech", 28.06], ["hum", 17.86], ["music", 9.98]], null, [["music", 48.39], ["speech", 10.94], ["buzz", 4.4]], [["speech", 57.0], ["music", 14.08], ["electric shaver, electric razor", 6.24]], null, [["speech", 20.12], ["hum", 6.43], ["horse", 5.42]], null, null, [["speech", 27.23], ["insect", 11.54], ["bee, wasp, etc.", 11.19]], null, null, null, null, null], "duration": [12.02, 0.73, 29.1, 15.08, 0.82, 4.78, 0.28, 0.36, 2.67, 0.14, 1.17, 0.57, 1.74, 2.99]}
|
annotations_filtered/01qhgR0WsnA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 38.45], [38.0, 48.96], [52.0, 52.74], [55.0, 92.96], [97.0, 97.78], [100.0, 113.44], [116.0, 141.96]], "keep_status": [false, true, false, false, false, true, true], "silence_prob": [0.0, 32.06, 0.0, 0.0, 0.0, 31.08, 30.85], "audiomae_on_audioset": [null, [["music", 28.72], ["speech", 21.6], ["hum", 18.25]], null, null, null, [["speech", 29.02], ["buzz", 19.39], ["music", 16.9]], [["music", 28.64], ["throbbing", 10.14], ["hum", 9.47]]], "duration": [36.45, 10.96, 0.74, 37.96, 0.78, 13.44, 25.96]}
|
annotations_filtered/02064E1SHtQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 11.08], [27.0, 28.63], [31.0, 31.82], [33.0, 34.69], [36.0, 37.05], [40.0, 41.44], [46.0, 48.88], [51.0, 90.36], [94.0, 95.2], [98.0, 98.9], [100.0, 103.47], [104.0, 105.33], [106.0, 106.3], [107.0, 108.77], [109.0, 110.86], [111.0, 115.84], [118.0, 126.4]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 54.36, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 99.98, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.08, 1.63, 0.82, 1.69, 1.05, 1.44, 2.88, 39.36, 1.2, 0.9, 3.47, 1.33, 0.3, 1.77, 1.86, 4.84, 8.4]}
|
annotations_filtered/029Mdp9jYiY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.91], [5.0, 12.24], [14.0, 15.04], [16.0, 16.9], [17.0, 20.87], [21.0, 22.18], [23.0, 26.62], [27.0, 31.65], [34.0, 34.25], [35.0, 36.0], [37.0, 38.18], [39.0, 39.29], [40.0, 42.55], [43.0, 53.43], [55.0, 56.3], [58.0, 65.31], [67.0, 69.67], [71.0, 72.33], [74.0, 80.82], [83.0, 83.39], [84.0, 85.82], [89.0, 89.85], [94.0, 98.25], [99.0, 118.13], [120.0, 133.71]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 96.04, 0.0, 0.0, 79.76, 0.0, 97.22, 98.99, 0.0, 0.0, 0.0, 0.0, 99.96, 96.77, 0.0, 99.82, 99.36, 0.0, 99.62, 0.0, 0.0, 0.0, 99.62, 91.98, 97.73], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.91, 7.24, 1.04, 0.9, 3.87, 1.18, 3.62, 4.65, 0.25, 1.0, 1.18, 0.29, 2.55, 10.43, 1.3, 7.31, 2.67, 1.33, 6.82, 0.39, 1.82, 0.85, 4.25, 19.13, 13.71]}
|
annotations_filtered/02A2a-aEvmI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.1], [4.0, 4.33], [7.0, 7.33], [16.0, 17.19], [18.0, 18.35], [19.0, 20.8], [22.0, 22.52], [25.0, 28.24], [29.0, 30.43], [31.0, 31.77], [38.0, 43.04], [46.0, 46.63], [47.0, 48.98], [50.0, 49.89], [51.0, 51.58], [54.0, 54.7], [56.0, 55.8], [56.0, 55.98], [70.0, 72.27], [73.0, 73.08], [74.0, 74.61], [75.0, 74.66], [75.0, 75.37], [75.0, 75.57], [79.0, 79.86], [81.0, 80.59], [81.0, 80.65], [81.0, 81.89], [84.0, 84.06], [85.0, 85.01], [86.0, 88.37], [89.0, 88.87], [91.0, 90.81], [91.0, 94.68], [98.0, 98.14], [98.0, 98.17], [98.0, 99.72], [100.0, 101.9], [104.0, 104.46], [108.0, 108.72], [119.0, 119.23], [127.0, 128.43], [130.0, 132.31], [132.0, 132.53], [137.0, 138.79], [157.0, 157.52], [174.0, 174.33], [176.0, 177.13], [180.0, 180.47], [186.0, 186.56], [202.0, 202.04], [204.0, 204.2]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 41.78, 0.0, 0.0, 36.68, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 41.44, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.28, 0.0, 0.0, 31.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.82, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["music", 66.46], ["effects unit", 8.58], ["distortion", 4.68]], null, null, [["hum", 37.08], ["mains hum", 24.71], ["music", 11.21]], null, null, null, null, null, null, null, [["music", 21.35], ["whale vocalization", 9.24], ["theremin", 7.88]], null, null, null, null, null, null, null, null, null, null, null, [["radio", 48.02], ["speech", 46.87], ["telephone", 0.95]], null, null, [["radio", 61.6], ["speech", 30.85], ["sidetone", 2.92]], null, null, null, null, null, null, null, null, [["radio", 49.37], ["speech", 39.15], ["sidetone", 6.91]], null, null, null, null, null, null, null, null, null], "duration": [0.1, 0.33, 0.33, 1.19, 0.35, 1.8, 0.52, 3.24, 1.43, 0.77, 5.04, 0.63, 1.98, -0.11, 0.58, 0.7, -0.2, -0.02, 2.27, 0.08, 0.61, -0.34, 0.37, 0.57, 0.86, -0.41, -0.35, 0.89, 0.06, 0.01, 2.37, -0.13, -0.19, 3.68, 0.14, 0.17, 1.72, 1.9, 0.46, 0.72, 0.23, 1.43, 2.31, 0.53, 1.79, 0.52, 0.33, 1.13, 0.47, 0.56, 0.04, 0.2]}
|
annotations_filtered/02AyhONR_DQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.04], [7.0, 7.37], [8.0, 8.41], [15.0, 20.33], [28.0, 27.99], [32.0, 32.71], [35.0, 35.46], [37.0, 38.15], [38.0, 40.34], [43.0, 44.59], [45.0, 46.36], [51.0, 58.24], [60.0, 76.01], [76.0, 76.45], [77.0, 100.4], [105.0, 106.52], [113.0, 113.71], [115.0, 115.94], [118.0, 118.44], [119.0, 121.54]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 35.1, 0.0, 0.0, 0.0, 0.0, 42.06, 0.0, 0.0, 42.74, 39.05, 0.0, 31.44, 0.0, 0.0, 0.0, 0.0, 41.36], "audiomae_on_audioset": [null, null, null, [["speech", 47.38], ["music", 24.17], ["animal", 8.14]], null, null, null, null, [["music", 28.5], ["burping, eructation", 22.16], ["effects unit", 14.45]], null, null, [["animal", 38.69], ["music", 23.98], ["dog", 9.46]], [["animal", 24.79], ["ding", 10.85], ["music", 10.73]], null, [["music", 67.66], ["electronic music", 5.3], ["boing", 5.22]], null, null, null, null, [["music", 38.73], ["musical instrument", 6.99], ["effects unit", 6.34]]], "duration": [0.04, 0.37, 0.41, 5.33, -0.01, 0.71, 0.46, 1.15, 2.34, 1.59, 1.36, 7.24, 16.01, 0.45, 23.4, 1.52, 0.71, 0.94, 0.44, 2.54]}
|
annotations_filtered/02DzpeBF4es_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 3.16], [4.0, 5.34], [7.0, 7.25], [9.0, 9.73], [13.0, 13.31], [20.0, 21.22], [23.0, 23.65], [25.0, 25.54], [26.0, 26.74], [28.0, 31.56], [32.0, 32.88], [36.0, 36.69], [40.0, 42.33], [48.0, 48.24], [55.0, 56.79], [65.0, 64.91], [66.0, 67.49], [71.0, 71.47], [79.0, 80.7], [84.0, 85.7], [86.0, 92.48], [97.0, 98.37], [101.0, 101.29], [105.0, 106.2], [108.0, 108.78]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 76.2, 0.0, 0.0, 85.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64.52, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.16, 1.34, 0.25, 0.73, 0.31, 1.22, 0.65, 0.54, 0.74, 3.56, 0.88, 0.69, 2.33, 0.24, 1.79, -0.09, 1.49, 0.47, 1.7, 1.7, 6.48, 1.37, 0.29, 1.2, 0.78]}
|
annotations_filtered/02Or-Hx3yqc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.22], [10.0, 14.27], [15.0, 17.9], [21.0, 21.09], [22.0, 23.67], [26.0, 26.69], [31.0, 31.06], [52.0, 53.22], [57.0, 57.97], [63.0, 76.08], [79.0, 81.58], [83.0, 83.86], [86.0, 98.54], [99.0, 100.62], [101.0, 105.75], [106.0, 109.53], [110.0, 113.27], [114.0, 115.77], [117.0, 117.96], [119.0, 119.8], [120.0, 125.96], [126.0, 126.98]], "keep_status": [false, true, true, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 34.5, 30.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.62, 36.17, 0.0, 55.46, 0.0, 59.42, 57.97, 76.37, 0.0, 0.0, 0.0, 46.54, 0.0], "audiomae_on_audioset": [null, [["speech", 49.9], ["music", 13.08], ["hum", 6.32]], [["music", 32.64], ["speech", 12.52], ["bow-wow", 4.51]], null, null, null, null, null, null, [["mains hum", 52.12], ["hum", 21.12], ["speech", 12.05]], [["fart", 17.55], ["music", 11.33], ["creak", 7.07]], null, null, null, null, null, null, null, null, null, [["speech", 56.14], ["mains hum", 10.2], ["telephone", 7.34]], null], "duration": [0.22, 4.27, 2.9, 0.09, 1.67, 0.69, 0.06, 1.22, 0.97, 13.08, 2.58, 0.86, 12.54, 1.62, 4.75, 3.53, 3.27, 1.77, 0.96, 0.8, 5.96, 0.98]}
|
annotations_filtered/03L12Mqkzg8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 20.16], [23.0, 29.93], [31.0, 32.36], [34.0, 38.26], [39.0, 40.51], [41.0, 56.47], [59.0, 60.39], [62.0, 62.67], [65.0, 64.74], [66.0, 82.46], [84.0, 84.25], [86.0, 87.1], [88.0, 93.85], [96.0, 106.32], [107.0, 108.23], [110.0, 111.89], [115.0, 117.42]], "keep_status": [false, false, false, true, false, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [91.81, 99.98, 0.0, 48.95, 0.0, 39.12, 0.0, 0.0, 0.0, 33.05, 0.0, 0.0, 44.63, 32.7, 0.0, 0.0, 36.34], "audiomae_on_audioset": [null, null, null, [["music", 33.34], ["speech", 23.71], ["musical instrument", 7.33]], null, [["music", 51.48], ["speech", 7.18], ["musical instrument", 5.96]], null, null, null, [["music", 37.94], ["synthesizer", 28.54], ["hum", 7.21]], null, null, [["music", 65.44], ["synthesizer", 9.84], ["drum machine", 6.67]], [["music", 64.75], ["speech", 9.08], ["throbbing", 3.92]], null, null, [["hum", 32.7], ["mains hum", 25.97], ["music", 13.33]]], "duration": [2.16, 6.93, 1.36, 4.26, 1.51, 15.47, 1.39, 0.67, -0.26, 16.46, 0.25, 1.1, 5.85, 10.32, 1.23, 1.89, 2.42]}
|
annotations_filtered/03NoI9KiZOk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.86], [9.0, 10.13], [13.0, 14.39], [16.0, 22.18], [26.0, 31.78], [32.0, 34.7], [35.0, 36.15], [36.0, 37.07], [38.0, 39.24], [40.0, 43.01], [45.0, 45.12], [48.0, 48.71], [50.0, 50.48], [52.0, 52.79], [59.0, 62.23], [65.0, 65.75], [73.0, 74.06], [77.0, 76.77], [80.0, 84.38], [86.0, 87.34], [88.0, 91.54], [96.0, 97.06], [99.0, 100.31], [102.0, 102.42], [105.0, 105.7], [107.0, 107.6], [110.0, 110.76], [115.0, 119.2], [119.0, 119.35], [119.0, 119.45], [120.0, 119.7]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 100.0, 98.73, 99.94, 0.0, 0.0, 0.0, 97.73, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 81.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.91, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.86, 1.13, 1.39, 6.18, 5.78, 2.7, 1.15, 1.07, 1.24, 3.01, 0.12, 0.71, 0.48, 0.79, 3.23, 0.75, 1.06, -0.23, 4.38, 1.34, 3.54, 1.06, 1.31, 0.42, 0.7, 0.6, 0.76, 4.2, 0.35, 0.45, -0.3]}
|
annotations_filtered/03QHVB_n6N8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.7], [5.0, 5.64], [7.0, 7.21], [8.0, 8.68], [10.0, 11.35], [15.0, 15.01], [24.0, 25.56], [50.0, 49.91], [51.0, 51.31], [52.0, 52.98], [61.0, 61.87], [62.0, 63.58], [65.0, 66.87], [71.0, 74.33], [79.0, 84.1], [101.0, 104.43], [108.0, 107.81], [110.0, 109.76], [113.0, 115.79], [117.0, 117.49], [118.0, 119.08]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.4, 44.29, 57.17, 0.0, 0.0, 50.56, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 31.33], ["livestock, farm animals, working animals", 8.76], ["honk", 8.07]], [["music", 51.62], ["didgeridoo", 14.47], ["speech", 10.27]], null, null, null, null, null, null], "duration": [0.7, 0.64, 0.21, 0.68, 1.35, 0.01, 1.56, -0.09, 0.31, 0.98, 0.87, 1.58, 1.87, 3.33, 5.1, 3.43, -0.19, -0.24, 2.79, 0.49, 1.08]}
|
annotations_filtered/03Rl5exupSo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 15.03], [16.0, 17.36], [18.0, 19.06], [20.0, 20.87], [24.0, 27.9], [29.0, 30.2], [32.0, 32.14], [37.0, 37.64], [41.0, 40.95], [47.0, 46.85], [48.0, 48.07], [49.0, 49.4], [49.0, 49.45], [64.0, 64.03], [65.0, 65.06], [67.0, 66.87], [68.0, 69.13], [70.0, 71.15], [77.0, 77.01], [78.0, 79.56], [80.0, 80.52], [82.0, 83.03], [86.0, 86.53], [88.0, 88.84], [100.0, 101.92]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [44.96, 0.0, 0.0, 0.0, 99.48, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 81.87], ["music", 4.05], ["electric shaver, electric razor", 1.46]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [11.03, 1.36, 1.06, 0.87, 3.9, 1.2, 0.14, 0.64, -0.05, -0.15, 0.07, 0.4, 0.45, 0.03, 0.06, -0.13, 1.13, 1.15, 0.01, 1.56, 0.52, 1.03, 0.53, 0.84, 1.92]}
|
annotations_filtered/03WbdaZCGAA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.5], [8.0, 31.95], [33.0, 34.21], [37.0, 49.27], [57.0, 57.28], [58.0, 57.87], [59.0, 67.31], [69.0, 69.25], [72.0, 78.0], [78.0, 78.73], [80.0, 80.01]], "keep_status": [false, false, false, true, false, false, false, false, true, false, false], "silence_prob": [0.0, 29.55, 0.0, 28.64, 0.0, 0.0, 29.84, 0.0, 28.52, 0.0, 0.0], "audiomae_on_audioset": [null, [["mains hum", 63.04], ["hum", 25.08], ["throbbing", 3.14]], null, [["burst, pop", 22.31], ["explosion", 19.99], ["fireworks", 10.51]], null, null, [["explosion", 35.26], ["speech", 24.99], ["burst, pop", 10.63]], null, [["music", 16.66], ["livestock, farm animals, working animals", 13.5], ["speech", 9.63]], null, null], "duration": [0.5, 23.95, 1.21, 12.27, 0.28, -0.13, 8.31, 0.25, 6.0, 0.73, 0.01]}
|
annotations_filtered/03a-vG6wHDI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.1], [9.0, 11.16], [12.0, 12.75], [20.0, 21.59], [25.0, 26.08], [26.0, 28.32], [30.0, 30.64], [31.0, 33.56], [35.0, 37.62], [41.0, 42.13], [42.0, 43.14], [43.0, 45.5], [63.0, 83.4], [84.0, 90.26], [92.0, 92.53], [96.0, 96.65], [100.0, 106.81], [113.0, 113.64], [116.0, 116.88], [126.0, 127.47], [134.0, 137.64], [141.0, 142.67], [144.0, 146.8], [148.0, 148.81], [151.0, 151.8], [154.0, 154.75], [159.0, 159.65], [162.0, 164.78], [168.0, 168.4], [171.0, 171.44]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 36.67, 0.0, 0.0, 0.0, 53.04, 0.0, 45.78, 89.54, 0.0, 0.0, 56.63, 38.7, 99.93, 0.0, 0.0, 39.78, 0.0, 0.0, 0.0, 81.71, 0.0, 32.03, 0.0, 0.0, 0.0, 0.0, 34.61, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 40.69], ["radio", 29.94], ["scratching (performance technique)", 8.77]], null, null, null, null, null, [["speech", 43.77], ["telephone", 30.2], ["inside, small room", 4.83]], null, null, null, null, [["hum", 46.6], ["mains hum", 10.66], ["throbbing", 5.62]], null, null, null, [["speech", 49.61], ["sidetone", 23.54], ["noise", 4.92]], null, null, null, null, null, [["speech", 68.88], ["thunk", 5.52], ["telephone", 2.75]], null, null, null, null, [["speech", 81.1], ["telephone", 4.22], ["busy signal", 2.94]], null, null], "duration": [0.1, 2.16, 0.75, 1.59, 1.08, 2.32, 0.64, 2.56, 2.62, 1.13, 1.14, 2.5, 20.4, 6.26, 0.53, 0.65, 6.81, 0.64, 0.88, 1.47, 3.64, 1.67, 2.8, 0.81, 0.8, 0.75, 0.65, 2.78, 0.4, 0.44]}
|
annotations_filtered/03jGqiF-0Gg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.4], [6.0, 9.34], [11.0, 16.11], [17.0, 18.6], [21.0, 23.14], [24.0, 25.39], [26.0, 29.05], [33.0, 33.07], [37.0, 37.5], [52.0, 58.02]], "keep_status": [false, true, true, false, true, false, true, false, false, true], "silence_prob": [64.63, 40.26, 46.54, 0.0, 48.91, 0.0, 49.4, 0.0, 0.0, 37.85], "audiomae_on_audioset": [null, [["sidetone", 37.46], ["music", 10.98], ["speech", 10.23]], [["fly, housefly", 26.41], ["insect", 13.93], ["hum", 9.34]], null, [["music", 24.91], ["hum", 12.38], ["throbbing", 6.21]], null, [["hum", 25.63], ["music", 12.27], ["throbbing", 6.23]], null, null, [["mains hum", 23.56], ["hum", 15.96], ["noise", 12.73]]], "duration": [2.4, 3.34, 5.11, 1.6, 2.14, 1.39, 3.05, 0.07, 0.5, 6.02]}
|
annotations_filtered/03uEq5dKcFs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 13.47], [15.0, 15.3], [19.0, 27.03], [32.0, 33.52], [34.0, 33.81], [34.0, 34.92], [36.0, 36.19], [38.0, 38.37], [39.0, 39.77], [41.0, 42.13], [47.0, 47.92], [49.0, 49.55], [50.0, 51.26], [52.0, 52.42], [54.0, 53.94], [56.0, 58.01], [59.0, 59.12], [65.0, 65.38], [69.0, 70.78], [73.0, 75.1], [78.0, 78.04], [87.0, 87.03], [89.0, 91.1], [93.0, 93.72], [100.0, 100.38], [106.0, 106.3], [116.0, 118.18], [123.0, 123.87], [125.0, 125.24], [126.0, 144.0]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 45.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 89.01, 0.0, 0.0, 0.0, 99.99, 0.0, 0.0, 97.54, 0.0, 0.0, 0.0, 99.85, 0.0, 0.0, 92.8], "audiomae_on_audioset": [null, null, [["speech", 90.01], ["radio", 1.0], ["inside, small room", 0.9]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.47, 0.3, 8.03, 1.52, -0.19, 0.92, 0.19, 0.37, 0.77, 1.13, 0.92, 0.55, 1.26, 0.42, -0.06, 2.01, 0.12, 0.38, 1.78, 2.1, 0.04, 0.03, 2.1, 0.72, 0.38, 0.3, 2.18, 0.87, 0.24, 18.0]}
|
annotations_filtered/049R_wOazQI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 16.19], [17.0, 17.27], [19.0, 21.86], [23.0, 24.04], [27.0, 27.92], [29.0, 29.62], [30.0, 33.56], [35.0, 35.75], [37.0, 61.16], [62.0, 62.26]], "keep_status": [false, false, false, false, false, false, false, false, false, false], "silence_prob": [63.42, 0.0, 98.01, 0.0, 0.0, 0.0, 100.0, 0.0, 80.46, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null], "duration": [9.19, 0.27, 2.86, 1.04, 0.92, 0.62, 3.56, 0.75, 24.16, 0.26]}
|
annotations_filtered/04BZh6E-Nck_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.87], [8.0, 9.31], [15.0, 15.6], [17.0, 18.54], [21.0, 21.68], [23.0, 47.9], [50.0, 50.38], [60.0, 62.26], [63.0, 64.47], [65.0, 66.8], [68.0, 77.09], [80.0, 94.54], [96.0, 100.43], [101.0, 101.9], [110.0, 125.56], [126.0, 127.06], [128.0, 136.44], [150.0, 149.67], [153.0, 154.43]], "keep_status": [false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 29.01, 0.0, 45.27, 0.0, 0.0, 28.97, 32.2, 52.45, 0.0, 29.77, 0.0, 29.82, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["speech", 38.43], ["music", 17.54], ["mains hum", 6.22]], null, [["music", 78.38], ["video game music", 3.01], ["soundtrack music", 2.47]], null, null, [["music", 62.46], ["synthesizer", 8.26], ["musical instrument", 2.95]], [["throbbing", 43.91], ["hum", 24.76], ["music", 22.28]], null, null, [["music", 53.11], ["speech", 14.69], ["throbbing", 9.0]], null, [["cattle, bovinae", 22.69], ["livestock, farm animals, working animals", 21.94], ["music", 15.8]], null, null], "duration": [0.87, 1.31, 0.6, 1.54, 0.68, 24.9, 0.38, 2.26, 1.47, 1.8, 9.09, 14.54, 4.43, 0.9, 15.56, 1.06, 8.44, -0.33, 1.43]}
|
annotations_filtered/04s96zDt1RE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 3.1], [5.0, 6.78], [9.0, 10.17], [11.0, 12.77], [15.0, 14.96], [16.0, 17.29], [19.0, 19.48], [20.0, 40.98], [43.0, 51.68], [53.0, 63.63], [66.0, 82.36]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 78.21, 98.99, 63.42, 35.94], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["music", 41.05], ["hum", 17.51], ["throbbing", 12.6]]], "duration": [2.1, 1.78, 1.17, 1.77, -0.04, 1.29, 0.48, 20.98, 8.68, 10.63, 16.36]}
|
annotations_filtered/04uN57jOg-Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 10.74], [11.0, 12.39], [13.0, 13.0], [14.0, 14.86], [21.0, 21.69], [29.0, 30.16], [43.0, 43.9], [45.0, 44.59], [45.0, 46.25], [73.0, 74.73], [83.0, 86.44], [96.0, 109.31], [112.0, 113.58], [124.0, 125.79], [130.0, 131.85]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 58.98, 89.01, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.74, 1.39, 0.0, 0.86, 0.69, 1.16, 0.9, -0.41, 1.25, 1.73, 3.44, 13.31, 1.58, 1.79, 1.85]}
|
annotations_filtered/04xSMg03sZ0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 4.26], [8.0, 8.29], [11.0, 13.73], [18.0, 25.91], [31.0, 32.29], [33.0, 34.18], [36.0, 46.14], [47.0, 49.3], [51.0, 51.09], [54.0, 55.32], [57.0, 58.13], [59.0, 60.37], [61.0, 61.77], [62.0, 68.47], [70.0, 72.4], [80.0, 82.66], [92.0, 94.31], [96.0, 99.4], [103.0, 104.38], [105.0, 105.33], [108.0, 108.01], [114.0, 114.13], [117.0, 118.29], [122.0, 121.64], [123.0, 122.61], [126.0, 127.92], [132.0, 132.87]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [44.99, 0.0, 41.62, 61.47, 0.0, 0.0, 51.39, 67.89, 0.0, 0.0, 0.0, 0.0, 0.0, 56.4, 61.77, 31.32, 41.34, 34.83, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 47.85], ["speech", 14.12], ["hum", 8.75]], null, [["speech", 16.31], ["radio", 7.13], ["boing", 6.6]], null, null, null, null, null, null, null, null, null, null, null, null, [["music", 41.43], ["speech", 32.67], ["musical instrument", 3.04]], [["music", 45.38], ["breaking", 27.09], ["glass", 12.11]], [["music", 54.27], ["synthesizer", 12.56], ["speech", 9.18]], null, null, null, null, null, null, null, null, null], "duration": [3.26, 0.29, 2.73, 7.91, 1.29, 1.18, 10.14, 2.3, 0.09, 1.32, 1.13, 1.37, 0.77, 6.47, 2.4, 2.66, 2.31, 3.4, 1.38, 0.33, 0.01, 0.13, 1.29, -0.36, -0.39, 1.92, 0.87]}
|
annotations_filtered/04zHzVrubHk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[28.0, 48.32], [48.0, 51.92], [53.0, 55.36], [57.0, 58.06], [60.0, 60.88], [62.0, 64.62], [71.0, 72.86], [75.0, 76.74], [77.0, 78.73], [82.0, 82.44], [84.0, 86.61], [88.0, 115.79], [117.0, 118.17], [119.0, 119.64], [121.0, 128.06], [128.0, 130.45], [133.0, 140.44]], "keep_status": [true, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true], "silence_prob": [40.16, 35.16, 34.03, 0.0, 0.0, 77.7, 0.0, 0.0, 0.0, 0.0, 85.72, 50.81, 0.0, 0.0, 43.79, 34.15, 43.96], "audiomae_on_audioset": [[["hum", 27.26], ["mains hum", 16.03], ["fly, housefly", 11.92]], [["sidetone", 88.18], ["speech", 4.39], ["music", 2.84]], [["speech", 67.32], ["radio", 12.87], ["shuffling cards", 3.56]], null, null, null, null, null, null, null, null, null, null, null, [["echo", 36.12], ["hum", 8.59], ["fart", 7.84]], [["didgeridoo", 17.74], ["music", 14.5], ["theremin", 8.22]], [["music", 45.07], ["sonar", 12.15], ["synthesizer", 10.98]]], "duration": [20.32, 3.92, 2.36, 1.06, 0.88, 2.62, 1.86, 1.74, 1.73, 0.44, 2.61, 27.79, 1.17, 0.64, 7.06, 2.45, 7.44]}
|
annotations_filtered/05-e-YTw4r8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 18.91], [20.0, 20.85], [22.0, 21.81], [23.0, 24.58], [26.0, 27.9], [29.0, 32.61], [33.0, 37.15], [39.0, 42.01], [43.0, 43.65], [44.0, 62.04], [63.0, 64.64], [66.0, 66.38], [69.0, 69.09], [73.0, 75.74], [77.0, 78.83], [85.0, 85.73], [90.0, 92.87], [97.0, 98.04], [101.0, 101.53], [106.0, 116.06], [117.0, 118.12], [119.0, 125.44], [126.0, 129.47]], "keep_status": [false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 55.67, 95.64, 79.24, 0.0, 43.4, 0.0, 0.0, 0.0, 61.27, 0.0, 0.0, 48.78, 0.0, 0.0, 49.4, 0.0, 50.36, 55.46], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["vehicle", 19.42], ["car", 18.72], ["race car, auto racing", 8.22]], null, null, null, null, null, null, [["eruption", 16.38], ["hum", 15.3], ["gong", 10.61]], null, null, [["sidetone", 62.19], ["hum", 6.88], ["speech", 6.44]], null, null, null], "duration": [0.91, 0.85, -0.19, 1.58, 1.9, 3.61, 4.15, 3.01, 0.65, 18.04, 1.64, 0.38, 0.09, 2.74, 1.83, 0.73, 2.87, 1.04, 0.53, 10.06, 1.12, 6.44, 3.47]}
|
annotations_filtered/056HlHORCIU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 1.02], [4.0, 4.58], [7.0, 7.89], [9.0, 10.69], [11.0, 20.46], [21.0, 21.61], [24.0, 28.38], [29.0, 29.3], [32.0, 37.32], [38.0, 48.74]], "keep_status": [false, false, false, false, true, false, true, false, true, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 31.1, 0.0, 30.59, 0.0, 29.99, 29.66], "audiomae_on_audioset": [null, null, null, null, [["hum", 23.75], ["speech", 21.77], ["mains hum", 14.05]], null, [["music", 49.09], ["buzz", 7.38], ["noise", 4.81]], null, [["music", 14.84], ["cacophony", 12.98], ["synthesizer", 10.44]], [["buzz", 32.4], ["vehicle", 14.19], ["music", 7.63]]], "duration": [1.02, 0.58, 0.89, 1.69, 9.46, 0.61, 4.38, 0.3, 5.32, 10.74]}
|
annotations_filtered/05O77oX6bQE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.65], [20.0, 20.83], [24.0, 24.06], [67.0, 67.2], [110.0, 117.75], [120.0, 122.94], [124.0, 125.22], [129.0, 130.72], [132.0, 135.9], [137.0, 139.19], [143.0, 155.83]], "keep_status": [false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 30.98, 96.66, 0.0, 0.0, 80.64, 93.45, 55.89], "audiomae_on_audioset": [null, null, null, null, [["speech", 10.02], ["crowd", 9.26], ["livestock, farm animals, working animals", 6.91]], null, null, null, null, null, null], "duration": [0.65, 0.83, 0.06, 0.2, 7.75, 2.94, 1.22, 1.72, 3.9, 2.19, 12.83]}
|
annotations_filtered/05foBuX_brU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 9.48], [13.0, 13.9], [18.0, 18.18], [19.0, 20.09], [20.0, 20.58], [26.0, 26.05], [32.0, 33.03], [34.0, 34.7], [36.0, 36.71], [42.0, 42.57], [43.0, 44.56], [45.0, 46.14], [46.0, 46.7], [65.0, 66.43], [71.0, 72.0], [73.0, 72.96], [75.0, 76.3], [83.0, 82.88], [84.0, 85.04], [86.0, 86.19], [86.0, 86.32]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.48, 0.9, 0.18, 1.09, 0.58, 0.05, 1.03, 0.7, 0.71, 0.57, 1.56, 1.14, 0.7, 1.43, 1.0, -0.04, 1.3, -0.12, 1.04, 0.19, 0.32]}
|
annotations_filtered/05nQ6FtAaYg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[21.0, 21.17], [25.0, 24.81], [25.0, 26.05], [27.0, 28.09], [33.0, 33.94], [41.0, 44.44], [46.0, 46.82]], "keep_status": [false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 35.54, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 75.78], ["musical instrument", 5.3], ["theremin", 2.98]], null], "duration": [0.17, -0.19, 1.05, 1.09, 0.94, 3.44, 0.82]}
|
annotations_filtered/05qid4p_cfw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 5.36], [10.0, 10.23], [12.0, 11.97], [15.0, 16.04], [17.0, 18.57], [21.0, 21.84], [24.0, 26.06], [31.0, 32.93], [38.0, 39.16], [51.0, 51.12], [79.0, 79.57], [89.0, 90.59], [107.0, 106.79], [109.0, 110.4], [136.0, 137.19], [147.0, 148.07], [151.0, 156.1], [160.0, 162.19], [164.0, 168.12], [181.0, 183.51], [184.0, 185.52], [187.0, 192.06], [197.0, 198.56], [202.0, 203.77], [213.0, 212.8], [215.0, 215.0], [223.0, 224.4], [229.0, 230.77], [231.0, 234.67], [238.0, 238.84]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30.67, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30.72, 29.44, 29.05, 31.69, 0.0, 37.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.72, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 59.19], ["electronic music", 5.43], ["trance music", 3.77]], null, null, null, null, null, null, null, null, null, [["throbbing", 48.34], ["hum", 38.96], ["music", 6.13]], [["music", 61.62], ["speech", 5.42], ["electronic music", 4.35]], [["music", 70.83], ["electronic music", 5.54], ["static", 3.39]], [["music", 37.68], ["speech", 10.42], ["moo", 4.79]], null, [["music", 67.66], ["whack, thwack", 7.8], ["thunk", 2.1]], null, null, null, null, null, null, [["music", 67.71], ["lullaby", 1.74], ["singing", 1.74]], null], "duration": [0.36, 0.23, -0.03, 1.04, 1.57, 0.84, 2.06, 1.93, 1.16, 0.12, 0.57, 1.59, -0.21, 1.4, 1.19, 1.07, 5.1, 2.19, 4.12, 2.51, 1.52, 5.06, 1.56, 1.77, -0.2, 0.0, 1.4, 1.77, 3.67, 0.84]}
|
annotations_filtered/0668UNhYjXg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[23.0, 24.04], [25.0, 26.15], [30.0, 30.91], [35.0, 35.87], [36.0, 36.76], [37.0, 37.42], [41.0, 41.99], [45.0, 44.85], [48.0, 55.02], [56.0, 56.22], [57.0, 57.79], [61.0, 95.3]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.7, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["music", 76.81], ["ambient music", 5.4], ["hum", 2.18]], null, null, null], "duration": [1.04, 1.15, 0.91, 0.87, 0.76, 0.42, 0.99, -0.15, 7.02, 0.22, 0.79, 34.3]}
|
annotations_filtered/06B3m6L5fFw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 9.42], [10.0, 10.81], [14.0, 13.71], [15.0, 16.28], [18.0, 19.55], [21.0, 23.68], [25.0, 26.81], [28.0, 30.65], [31.0, 35.87], [41.0, 41.5], [43.0, 48.32], [50.0, 54.89], [56.0, 58.68], [62.0, 62.31], [63.0, 67.32], [68.0, 68.55], [70.0, 69.72], [73.0, 72.98], [74.0, 76.23], [77.0, 81.36], [88.0, 90.41], [92.0, 94.04], [95.0, 100.89], [102.0, 105.41], [106.0, 107.82], [108.0, 109.09], [109.0, 110.64], [111.0, 113.36], [114.0, 116.63]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 86.09, 0.0, 98.36, 70.58, 0.0, 35.58, 96.54, 93.45, 0.0, 36.94, 0.0, 0.0, 0.0, 97.92, 72.01, 66.51, 85.35, 65.44, 47.94, 0.0, 0.0, 0.0, 91.47, 37.25], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["speech", 33.98], ["hum", 14.66], ["mains hum", 3.77]], null, null, null, [["speech", 63.84], ["sidetone", 11.35], ["telephone", 4.85]], null, null, null, null, null, null, null, null, [["speech", 60.16], ["radio", 8.99], ["inside, small room", 3.07]], null, null, null, null, [["speech", 42.19], ["gong", 7.4], ["tuning fork", 7.26]]], "duration": [0.42, 0.81, -0.29, 1.28, 1.55, 2.68, 1.81, 2.65, 4.87, 0.5, 5.32, 4.89, 2.68, 0.31, 4.32, 0.55, -0.28, -0.02, 2.23, 4.36, 2.41, 2.04, 5.89, 3.41, 1.82, 1.09, 1.64, 2.36, 2.63]}
|
annotations_filtered/06DLNzLaTlE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.53], [8.0, 8.41], [9.0, 9.66], [14.0, 23.63], [24.0, 34.86], [40.0, 47.85], [49.0, 51.19], [53.0, 53.27], [59.0, 58.94], [60.0, 60.45], [61.0, 61.53], [66.0, 66.36], [68.0, 68.47], [69.0, 70.48], [72.0, 72.32], [75.0, 75.46], [78.0, 79.79], [82.0, 82.41], [84.0, 84.87], [88.0, 88.38]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 99.99, 31.5, 33.42, 61.57, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["music", 56.01], ["synthesizer", 10.4], ["theremin", 5.38]], [["music", 67.78], ["speech", 10.0], ["theremin", 4.21]], null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.53, 0.41, 0.66, 9.63, 10.86, 7.85, 2.19, 0.27, -0.06, 0.45, 0.53, 0.36, 0.47, 1.48, 0.32, 0.46, 1.79, 0.41, 0.87, 0.38]}
|
annotations_filtered/06Its9LhIHQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 26.91], [28.0, 37.77], [39.0, 68.81], [70.0, 76.72], [78.0, 89.99], [92.0, 97.16], [99.0, 104.04], [107.0, 107.32], [109.0, 118.22], [118.0, 118.39], [118.0, 131.11], [135.0, 145.44]], "keep_status": [true, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [28.42, 28.21, 28.92, 29.72, 28.68, 29.33, 30.6, 0.0, 29.54, 0.0, 28.85, 28.54], "audiomae_on_audioset": [[["hum", 16.44], ["speech", 14.18], ["throbbing", 13.85]], [["speech", 37.3], ["vehicle", 23.46], ["boat, water vehicle", 10.19]], [["speech", 64.25], ["music", 10.29], ["hum", 4.25]], [["speech", 74.67], ["music", 16.74], ["hum", 1.28]], [["speech", 61.21], ["music", 10.18], ["hum", 5.67]], [["speech", 64.16], ["music", 9.49], ["synthesizer", 4.09]], [["music", 33.8], ["hum", 21.03], ["throbbing", 19.92]], null, [["music", 42.1], ["whack, thwack", 18.2], ["speech", 8.54]], null, [["speech", 55.26], ["explosion", 12.77], ["burst, pop", 7.2]], [["speech", 50.83], ["eruption", 18.21], ["explosion", 15.18]]], "duration": [13.91, 9.77, 29.81, 6.72, 11.99, 5.16, 5.04, 0.32, 9.22, 0.39, 13.11, 10.44]}
|
annotations_filtered/06L5y4Z9KcE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 16.53], [21.0, 21.15], [28.0, 32.53], [34.0, 44.41], [44.0, 44.51], [45.0, 44.73], [45.0, 48.96], [50.0, 50.31], [52.0, 51.68], [54.0, 54.45], [69.0, 74.07], [75.0, 75.79], [81.0, 81.09], [83.0, 84.23], [86.0, 86.26], [88.0, 89.16], [92.0, 93.58], [95.0, 94.93], [97.0, 109.78], [112.0, 112.38], [113.0, 113.95], [124.0, 125.31], [126.0, 126.39], [128.0, 130.01], [132.0, 133.56], [138.0, 142.47], [145.0, 145.18], [146.0, 146.42], [147.0, 157.22], [158.0, 161.25], [165.0, 167.9], [168.0, 167.95], [169.0, 168.94]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 41.16, 33.87, 0.0, 0.0, 95.64, 0.0, 0.0, 0.0, 34.12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 33.85, 0.0, 0.0, 0.0, 0.0, 98.8, 0.0, 31.61, 0.0, 0.0, 33.0, 81.89, 82.97, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["speech", 43.04], ["mosquito", 12.57], ["insect", 12.29]], [["electric shaver, electric razor", 43.47], ["mains hum", 25.92], ["hum", 14.2]], null, null, null, null, null, null, [["music", 63.24], ["effects unit", 9.91], ["guitar", 6.26]], null, null, null, null, null, null, null, [["speech", 31.88], ["mains hum", 14.2], ["hum", 14.01]], null, null, null, null, null, null, [["fart", 17.88], ["music", 13.06], ["explosion", 12.87]], null, null, [["buzz", 50.24], ["fly, housefly", 17.78], ["insect", 10.61]], null, null, null, null], "duration": [1.53, 0.15, 4.53, 10.41, 0.51, -0.27, 3.96, 0.31, -0.32, 0.45, 5.07, 0.79, 0.09, 1.23, 0.26, 1.16, 1.58, -0.07, 12.78, 0.38, 0.95, 1.31, 0.39, 2.01, 1.56, 4.47, 0.18, 0.42, 10.22, 3.25, 2.9, -0.05, -0.06]}
|
annotations_filtered/06lJhEc7zIo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 15.62], [16.0, 18.71], [19.0, 20.43], [21.0, 22.28], [23.0, 32.29], [33.0, 35.51], [36.0, 47.26], [48.0, 49.37], [54.0, 54.41], [57.0, 57.54], [60.0, 66.04], [77.0, 76.77], [83.0, 83.78], [90.0, 91.96], [106.0, 107.86], [113.0, 113.68], [117.0, 117.44], [121.0, 120.75], [121.0, 122.72]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.98, 84.43, 0.0, 0.0, 99.92, 98.59, 84.8, 0.0, 0.0, 0.0, 65.91, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [10.62, 2.71, 1.43, 1.28, 9.29, 2.51, 11.26, 1.37, 0.41, 0.54, 6.04, -0.23, 0.78, 1.96, 1.86, 0.68, 0.44, -0.25, 1.72]}
|
annotations_filtered/06qgu4XoNL4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 5.17], [7.0, 8.36], [9.0, 9.69], [10.0, 11.08], [12.0, 13.22], [15.0, 17.86], [18.0, 19.2], [20.0, 21.98], [22.0, 22.23], [22.0, 24.56], [31.0, 34.21], [39.0, 41.98], [43.0, 51.97], [54.0, 54.51], [61.0, 62.58], [67.0, 69.89], [70.0, 71.22], [74.0, 77.57], [78.0, 78.87], [81.0, 81.87], [83.0, 85.56], [87.0, 87.18], [89.0, 89.94], [91.0, 94.1], [95.0, 95.89], [100.0, 100.26], [101.0, 101.53], [102.0, 102.03], [104.0, 104.6], [117.0, 116.88], [118.0, 118.81], [119.0, 120.04], [121.0, 121.27], [122.0, 129.9], [145.0, 146.42], [147.0, 153.86], [157.0, 157.05], [158.0, 158.38]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 32.48, 0.0, 0.0, 0.0, 32.94, 36.32, 32.13, 42.79, 0.0, 0.0, 39.86, 0.0, 42.08, 0.0, 0.0, 47.86, 0.0, 0.0, 45.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 31.44, 0.0, 41.26, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 83.86], ["musical instrument", 3.73], ["electronic music", 2.81]], null, null, null, [["speech", 41.07], ["music", 39.75], ["sonar", 2.58]], [["livestock, farm animals, working animals", 43.24], ["cattle, bovinae", 31.91], ["moo", 16.67]], [["cattle, bovinae", 22.43], ["speech", 22.12], ["livestock, farm animals, working animals", 17.92]], [["music", 25.76], ["didgeridoo", 19.8], ["speech", 15.61]], null, null, [["speech", 87.55], ["radio", 3.48], ["speech synthesizer", 0.77]], null, [["speech", 38.27], ["music", 14.65], ["boing", 6.65]], null, null, [["heart sounds, heartbeat", 22.74], ["throbbing", 18.96], ["hum", 15.19]], null, null, [["speech", 57.35], ["sidetone", 11.02], ["music", 9.47]], null, null, null, null, null, null, null, null, null, [["music", 71.6], ["guitar", 6.11], ["bass guitar", 5.83]], null, [["music", 28.15], ["hum", 12.62], ["throbbing", 11.28]], null, null], "duration": [1.17, 1.36, 0.69, 1.08, 1.22, 2.86, 1.2, 1.98, 0.23, 2.56, 3.21, 2.98, 8.97, 0.51, 1.58, 2.89, 1.22, 3.57, 0.87, 0.87, 2.56, 0.18, 0.94, 3.1, 0.89, 0.26, 0.53, 0.03, 0.6, -0.12, 0.81, 1.04, 0.27, 7.9, 1.42, 6.86, 0.05, 0.38]}
|