Spaces:
Build error
Build error
WIP
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- annotations_filtered/x-FLqiu9nTs_filtered.json +1 -0
- annotations_filtered/x-Vvl8gkZAw_filtered.json +1 -0
- annotations_filtered/x-_17t-v9dA_filtered.json +1 -0
- annotations_filtered/x03pWg-naqg_filtered.json +1 -0
- annotations_filtered/x0D4unitqpE_filtered.json +1 -0
- annotations_filtered/x0Ev2qiY08M_filtered.json +1 -0
- annotations_filtered/x0Fnxdv5rJ8_filtered.json +1 -0
- annotations_filtered/x0XE7KFZook_filtered.json +1 -0
- annotations_filtered/x0YLLkr7VfU_filtered.json +1 -0
- annotations_filtered/x0yNzsNUoK4_filtered.json +1 -0
- annotations_filtered/x1-axqBZdNk_filtered.json +1 -0
- annotations_filtered/x11OTizHwfE_filtered.json +1 -0
- annotations_filtered/x12Dai43I8Y_filtered.json +1 -0
- annotations_filtered/x1BpKIb7Ces_filtered.json +1 -0
- annotations_filtered/x1FhrhoudSE_filtered.json +1 -0
- annotations_filtered/x1H6pD3vNwQ_filtered.json +1 -0
- annotations_filtered/x1YvX61qS0Q_filtered.json +1 -0
- annotations_filtered/x1gEy9LSa4A_filtered.json +1 -0
- annotations_filtered/x1srznPx1qA_filtered.json +1 -0
- annotations_filtered/x2-MCPa_3rU_filtered.json +1 -0
- annotations_filtered/x21gkEu5lKc_filtered.json +1 -0
- annotations_filtered/x22ZX9dGaKk_filtered.json +1 -0
- annotations_filtered/x24Olya2NLk_filtered.json +1 -0
- annotations_filtered/x26YFcaLiNk_filtered.json +1 -0
- annotations_filtered/x2BuNuwZ9mg_filtered.json +1 -0
- annotations_filtered/x2CizSzk9s4_filtered.json +1 -0
- annotations_filtered/x2K8I28zejw_filtered.json +1 -0
- annotations_filtered/x2S78gnCkRg_filtered.json +1 -0
- annotations_filtered/x2W8BqPt7mI_filtered.json +1 -0
- annotations_filtered/x2WK_eWihdU_filtered.json +1 -0
- annotations_filtered/x2lBq3c3AIY_filtered.json +1 -0
- annotations_filtered/x2vhOIjmS2s_filtered.json +1 -0
- annotations_filtered/x2wH5RS58lo_filtered.json +1 -0
- annotations_filtered/x2yXtHyhu-k_filtered.json +1 -0
- annotations_filtered/x35VnGsGrFc_filtered.json +1 -0
- annotations_filtered/x39ZG34sn28_filtered.json +1 -0
- annotations_filtered/x3OTeacsT84_filtered.json +1 -0
- annotations_filtered/x3jT6tQ_gJk_filtered.json +1 -0
- annotations_filtered/x421Na9VfNE_filtered.json +1 -0
- annotations_filtered/x4CEkYJNir0_filtered.json +1 -0
- annotations_filtered/x4IKGG_2L6I_filtered.json +1 -0
- annotations_filtered/x4L81QLGYuM_filtered.json +1 -0
- annotations_filtered/x4QJwGTOny8_filtered.json +1 -0
- annotations_filtered/x4oAO_kDHTY_filtered.json +1 -0
- annotations_filtered/x4utH5uWK6c_filtered.json +1 -0
- annotations_filtered/x5Gwzy2FY10_filtered.json +1 -0
- annotations_filtered/x5ajdqqytyA_filtered.json +1 -0
- annotations_filtered/x5bONeuC6BY_filtered.json +1 -0
- annotations_filtered/x5z9VZO--G4_filtered.json +1 -0
- annotations_filtered/x6FDJAu5yMc_filtered.json +1 -0
annotations_filtered/x-FLqiu9nTs_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[27.0, 32.04], [36.0, 40.56], [42.0, 49.3], [56.0, 61.08], [62.0, 63.68], [66.0, 66.33], [67.0, 77.09], [78.0, 79.24], [80.0, 81.55], [82.0, 84.64], [86.0, 86.07], [87.0, 95.27], [97.0, 107.74], [108.0, 110.79], [113.0, 128.07]], "keep_status": [true, true, false, false, false, false, true, false, false, false, false, false, false, false, true], "silence_prob": [31.0, 37.97, 35.55, 33.41, 0.0, 0.0, 45.46, 0.0, 0.0, 77.2, 0.0, 65.32, 65.67, 64.86, 43.03], "audiomae_on_audioset": [[["speech", 55.62], ["vehicle", 5.09], ["livestock, farm animals, working animals", 4.88]], [["music", 46.85], ["foghorn", 7.8], ["speech", 5.13]], [["music", 59.41], ["hum", 6.81], ["scary music", 3.95]], [["music", 30.97], ["foghorn", 27.71], ["speech", 22.1]], null, null, [["hum", 32.89], ["music", 21.05], ["throbbing", 10.52]], null, null, null, null, null, null, null, [["hum", 21.09], ["speech", 8.98], ["throbbing", 6.06]]], "duration": [5.04, 4.56, 7.3, 5.08, 1.68, 0.33, 10.09, 1.24, 1.55, 2.64, 0.07, 8.27, 10.74, 2.79, 15.07]}
|
annotations_filtered/x-Vvl8gkZAw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 2.29], [3.0, 5.22], [7.0, 9.96], [11.0, 11.96], [24.0, 23.63], [24.0, 24.78], [35.0, 35.26], [36.0, 41.1], [42.0, 43.33], [45.0, 47.02], [48.0, 49.59], [50.0, 51.56], [53.0, 54.67], [55.0, 56.67], [58.0, 60.67], [61.0, 63.34], [66.0, 73.74], [77.0, 76.99], [85.0, 87.61], [90.0, 92.87], [94.0, 108.94], [110.0, 110.86], [112.0, 113.19], [113.0, 125.64], [127.0, 131.9]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, true, true, false, false, false, false], "silence_prob": [0.0, 35.43, 31.93, 0.0, 0.0, 0.0, 0.0, 99.99, 0.0, 99.16, 0.0, 0.0, 0.0, 0.0, 99.99, 100.0, 31.54, 0.0, 39.27, 31.75, 33.33, 0.0, 0.0, 35.37, 54.23], "audiomae_on_audioset": [null, [["sine wave", 70.32], ["chirp tone", 15.02], ["tuning fork", 4.82]], [["burping, eructation", 74.11], ["cough", 6.22], ["fart", 4.55]], null, null, null, null, null, null, null, null, null, null, null, null, null, [["animal", 16.2], ["speech", 9.75], ["whack, thwack", 7.63]], null, [["music", 30.93], ["mains hum", 15.58], ["hum", 13.0]], [["hum", 19.42], ["music", 16.44], ["mains hum", 11.72]], [["speech", 46.55], ["whack, thwack", 13.0], ["groan", 6.62]], null, null, [["hum", 44.96], ["mains hum", 22.54], ["electric shaver, electric razor", 9.81]], null], "duration": [1.29, 2.22, 2.96, 0.96, -0.37, 0.78, 0.26, 5.1, 1.33, 2.02, 1.59, 1.56, 1.67, 1.67, 2.67, 2.34, 7.74, -0.01, 2.61, 2.87, 14.94, 0.86, 1.19, 12.64, 4.9]}
|
annotations_filtered/x-_17t-v9dA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[17.0, 17.93], [24.0, 24.33], [25.0, 25.57], [26.0, 26.6], [47.0, 47.41], [111.0, 111.16], [122.0, 121.63], [122.0, 122.4], [132.0, 132.16]], "keep_status": [false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null], "duration": [0.93, 0.33, 0.57, 0.6, 0.41, 0.16, -0.37, 0.4, 0.16]}
|
annotations_filtered/x03pWg-naqg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 7.28], [8.0, 16.46], [19.0, 26.3], [36.0, 50.43], [51.0, 57.3], [57.0, 57.5], [58.0, 57.65], [58.0, 57.77], [58.0, 59.31], [62.0, 65.1], [67.0, 69.2], [70.0, 91.3], [100.0, 100.84], [101.0, 102.22], [104.0, 104.48], [106.0, 106.25], [109.0, 109.32], [112.0, 112.92], [116.0, 116.04], [118.0, 120.63], [122.0, 123.3], [125.0, 125.34], [126.0, 126.28], [127.0, 127.58]], "keep_status": [false, false, true, false, true, false, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 37.71, 33.22, 52.27, 45.72, 0.0, 0.0, 0.0, 0.0, 39.61, 32.39, 30.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 90.08, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 56.49], ["music", 19.29], ["noise", 2.35]], [["music", 40.26], ["hum", 13.78], ["mains hum", 8.45]], null, [["livestock, farm animals, working animals", 33.59], ["animal", 17.36], ["whale vocalization", 16.41]], null, null, null, null, [["speech", 55.19], ["roaring cats (lions, tigers)", 16.19], ["animal", 13.54]], [["sidetone", 38.79], ["speech", 25.37], ["music", 5.62]], [["grunt", 29.51], ["noise", 21.12], ["whale vocalization", 8.32]], null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.28, 8.46, 7.3, 14.43, 6.3, 0.5, -0.35, -0.23, 1.31, 3.1, 2.2, 21.3, 0.84, 1.22, 0.48, 0.25, 0.32, 0.92, 0.04, 2.63, 1.3, 0.34, 0.28, 0.58]}
|
annotations_filtered/x0D4unitqpE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 9.54], [10.0, 13.66], [15.0, 17.37], [19.0, 22.0], [24.0, 25.03], [27.0, 27.45], [29.0, 31.21], [35.0, 37.64], [38.0, 39.29], [40.0, 44.25], [45.0, 48.14], [49.0, 52.81], [54.0, 91.32], [93.0, 96.26], [98.0, 100.75], [102.0, 102.24], [104.0, 106.71], [107.0, 113.76]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, true, false, false, false, false, false, true], "silence_prob": [81.89, 97.43, 96.54, 99.94, 0.0, 0.0, 90.78, 92.97, 0.0, 79.24, 36.31, 46.19, 0.0, 47.31, 75.55, 0.0, 68.93, 39.21], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["dial tone", 25.91], ["sine wave", 9.41], ["telephone", 8.59]], [["hum", 30.71], ["sidetone", 29.59], ["mains hum", 7.15]], null, [["mains hum", 32.25], ["hum", 25.35], ["speech", 16.19]], null, null, null, [["music", 46.98], ["hum", 6.61], ["speech", 5.84]]], "duration": [2.54, 3.66, 2.37, 3.0, 1.03, 0.45, 2.21, 2.64, 1.29, 4.25, 3.14, 3.81, 37.32, 3.26, 2.75, 0.24, 2.71, 6.76]}
|
annotations_filtered/x0Ev2qiY08M_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[0.0, 1.65], [15.0, 16.16], [20.0, 20.83], [22.0, 22.81], [25.0, 25.96], [29.0, 29.84], [38.0, 39.29], [40.0, 40.88], [42.0, 43.71], [45.0, 45.0], [46.0, 51.07], [56.0, 55.88], [56.0, 55.92], [57.0, 57.16], [61.0, 61.21], [63.0, 64.39], [65.0, 65.08], [66.0, 73.11], [73.0, 74.11], [80.0, 81.78], [82.0, 82.97]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 66.27, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.65, 1.16, 0.83, 0.81, 0.96, 0.84, 1.29, 0.88, 1.71, 0.0, 5.07, -0.12, -0.08, 0.16, 0.21, 1.39, 0.08, 7.11, 1.11, 1.78, 0.97]}
|
annotations_filtered/x0Fnxdv5rJ8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 4.77], [13.0, 13.47], [22.0, 22.3], [23.0, 23.84], [26.0, 26.1], [28.0, 28.71], [30.0, 30.96], [35.0, 35.23], [36.0, 37.35], [40.0, 40.14], [49.0, 49.52], [52.0, 53.11]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.23, 0.47, 0.3, 0.84, 0.1, 0.71, 0.96, 0.23, 1.35, 0.14, 0.52, 1.11]}
|
annotations_filtered/x0XE7KFZook_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.0], [3.0, 3.67], [5.0, 5.59], [8.0, 9.12], [10.0, 11.04], [11.0, 11.42], [11.0, 13.78], [14.0, 14.44], [15.0, 29.93], [31.0, 33.89], [35.0, 35.63], [37.0, 40.42], [41.0, 43.43], [47.0, 49.49], [51.0, 58.89], [59.0, 61.03], [61.0, 64.3], [67.0, 77.13], [78.0, 82.65], [83.0, 96.52], [97.0, 97.92], [99.0, 101.12], [105.0, 105.19], [106.0, 106.17], [109.0, 129.27], [130.0, 131.87]], "keep_status": [false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, true, true, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 34.14, 0.0, 34.88, 42.19, 0.0, 66.76, 54.9, 61.18, 59.59, 45.08, 42.19, 40.55, 34.93, 31.57, 0.0, 40.19, 0.0, 0.0, 34.67, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["speech", 36.61], ["music", 22.28], ["drum machine", 3.85]], null, [["music", 83.56], ["musical instrument", 4.15], ["speech", 3.38]], [["music", 46.73], ["speech", 10.33], ["musical instrument", 6.87]], null, null, null, null, null, [["music", 36.78], ["musical instrument", 9.78], ["synthesizer", 5.5]], [["music", 26.33], ["didgeridoo", 20.69], ["speech", 15.1]], [["music", 68.72], ["musical instrument", 11.51], ["synthesizer", 3.43]], [["music", 72.73], ["musical instrument", 10.46], ["didgeridoo", 5.22]], [["cattle, bovinae", 41.45], ["moo", 18.67], ["livestock, farm animals, working animals", 14.38]], null, [["music", 48.07], ["moo", 6.96], ["cattle, bovinae", 5.77]], null, null, [["music", 54.27], ["didgeridoo", 11.54], ["livestock, farm animals, working animals", 7.76]], null], "duration": [0.0, 0.67, 0.59, 1.12, 1.04, 0.42, 2.78, 0.44, 14.93, 2.89, 0.63, 3.42, 2.43, 2.49, 7.89, 2.03, 3.3, 10.13, 4.65, 13.52, 0.92, 2.12, 0.19, 0.17, 20.27, 1.87]}
|
annotations_filtered/x0YLLkr7VfU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.56], [7.0, 9.85], [17.0, 23.36], [26.0, 51.22], [52.0, 53.86], [56.0, 62.95], [65.0, 66.75], [71.0, 71.93], [73.0, 80.79], [81.0, 95.89], [96.0, 96.8], [98.0, 98.44], [101.0, 110.79], [111.0, 130.1]], "keep_status": [false, false, false, true, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 31.01, 30.35, 30.42, 0.0, 31.06, 0.0, 0.0, 30.44, 32.59, 0.0, 0.0, 30.48, 30.43], "audiomae_on_audioset": [null, [["music", 54.11], ["musical instrument", 9.57], ["didgeridoo", 8.05]], [["music", 67.35], ["sidetone", 4.81], ["musical instrument", 4.36]], [["music", 53.26], ["musical instrument", 10.72], ["hum", 4.55]], null, [["mains hum", 47.62], ["hum", 31.19], ["throbbing", 4.83]], null, null, [["music", 43.5], ["trombone", 22.62], ["foghorn", 14.98]], [["music", 55.42], ["timpani", 5.36], ["throbbing", 4.58]], null, null, [["music", 80.8], ["musical instrument", 6.32], ["didgeridoo", 3.09]], [["music", 76.03], ["musical instrument", 6.89], ["effects unit", 3.95]]], "duration": [0.56, 2.85, 6.36, 25.22, 1.86, 6.95, 1.75, 0.93, 7.79, 14.89, 0.8, 0.44, 9.79, 19.1]}
|
annotations_filtered/x0yNzsNUoK4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.53], [15.0, 18.99], [21.0, 22.05], [25.0, 31.83], [34.0, 35.33], [37.0, 37.99], [43.0, 43.73], [47.0, 46.8], [47.0, 57.25], [58.0, 58.48], [62.0, 63.27], [67.0, 69.21], [73.0, 72.81], [76.0, 77.01], [78.0, 79.24], [83.0, 85.28], [85.0, 85.4], [85.0, 85.89], [89.0, 101.78], [107.0, 106.93], [111.0, 112.16], [113.0, 114.52]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 58.64, 0.0, 47.35, 0.0, 0.0, 0.0, 0.0, 35.24, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 60.79, 0.0, 0.0, 98.36, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["speech", 50.61], ["music", 18.99], ["tap", 3.35]], null, null, null, null, [["speech", 71.22], ["music", 8.31], ["sidetone", 4.24]], null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.53, 3.99, 1.05, 6.83, 1.33, 0.99, 0.73, -0.2, 10.25, 0.48, 1.27, 2.21, -0.19, 1.01, 1.24, 2.28, 0.4, 0.89, 12.78, -0.07, 1.16, 1.52]}
|
annotations_filtered/x1-axqBZdNk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 11.23], [15.0, 16.6], [18.0, 18.99], [31.0, 31.89], [51.0, 53.1], [54.0, 61.1], [62.0, 62.5], [67.0, 67.88], [73.0, 74.58], [80.0, 81.24], [88.0, 92.16], [98.0, 99.0], [104.0, 104.57], [106.0, 106.59], [108.0, 109.26], [110.0, 111.27], [112.0, 112.75], [114.0, 122.72], [125.0, 124.92], [127.0, 127.16], [130.0, 133.1]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false], "silence_prob": [100.0, 0.0, 0.0, 0.0, 100.0, 68.93, 0.0, 0.0, 0.0, 0.0, 49.68, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 51.71, 0.0, 0.0, 51.99], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["music", 34.2], ["theremin", 24.77], ["whale vocalization", 10.09]], null, null, null, null, null, null, null, null, null, null], "duration": [2.23, 1.6, 0.99, 0.89, 2.1, 7.1, 0.5, 0.88, 1.58, 1.24, 4.16, 1.0, 0.57, 0.59, 1.26, 1.27, 0.75, 8.72, -0.08, 0.16, 3.1]}
|
annotations_filtered/x11OTizHwfE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 17.86], [18.0, 17.9], [18.0, 17.93], [18.0, 18.1], [18.0, 25.84], [28.0, 34.72], [37.0, 37.57], [45.0, 46.01], [51.0, 51.44], [52.0, 53.4], [60.0, 60.88], [62.0, 63.17], [64.0, 64.77], [67.0, 68.42], [70.0, 71.71], [73.0, 76.84], [78.0, 79.88], [80.0, 82.43], [84.0, 85.63], [87.0, 88.4], [91.0, 95.81]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 51.44, 97.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 98.86, 0.0, 0.0, 99.98], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.14, -0.1, -0.07, 0.1, 7.84, 6.72, 0.57, 1.01, 0.44, 1.4, 0.88, 1.17, 0.77, 1.42, 1.71, 3.84, 1.88, 2.43, 1.63, 1.4, 4.81]}
|
annotations_filtered/x12Dai43I8Y_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[6.0, 6.35], [67.0, 67.41], [70.0, 71.69], [117.0, 117.85], [118.0, 118.76], [124.0, 128.66]], "keep_status": [false, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 32.03], "audiomae_on_audioset": [null, null, null, null, null, [["music", 28.77], ["brass instrument", 11.75], ["glass", 10.07]]], "duration": [0.35, 0.41, 1.69, 0.85, 0.76, 4.66]}
|
annotations_filtered/x1BpKIb7Ces_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 18.35], [20.0, 25.39], [27.0, 32.83], [34.0, 43.58], [44.0, 44.0], [44.0, 44.29], [45.0, 47.07], [53.0, 53.57], [54.0, 57.79], [64.0, 65.92], [76.0, 91.47], [95.0, 96.85]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [88.28, 47.27, 57.09, 47.62, 0.0, 0.0, 36.31, 0.0, 34.45, 0.0, 31.55, 0.0], "audiomae_on_audioset": [null, [["humming", 32.85], ["music", 32.27], ["lullaby", 7.53]], null, [["music", 59.02], ["theremin", 13.35], ["humming", 3.96]], null, null, [["music", 56.9], ["carnatic music", 7.47], ["didgeridoo", 6.39]], null, [["music", 83.06], ["yodeling", 3.28], ["lullaby", 1.47]], null, [["music", 71.58], ["jingle, tinkle", 3.43], ["carnatic music", 1.87]], null], "duration": [3.35, 5.39, 5.83, 9.58, 0.0, 0.29, 2.07, 0.57, 3.79, 1.92, 15.47, 1.85]}
|
annotations_filtered/x1FhrhoudSE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 17.81], [19.0, 19.09], [24.0, 26.03], [30.0, 30.43], [32.0, 32.14], [33.0, 33.86], [38.0, 38.38], [41.0, 45.15], [48.0, 48.95], [53.0, 53.54], [59.0, 60.57], [65.0, 65.45], [70.0, 70.23], [71.0, 71.66], [74.0, 75.02], [82.0, 81.99], [84.0, 83.88], [89.0, 91.74], [93.0, 93.77], [95.0, 95.81], [97.0, 96.9], [107.0, 107.3], [115.0, 116.51], [122.0, 122.74], [123.0, 123.23], [137.0, 138.52]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.99, 0.0, 99.26, 0.0, 0.0, 0.0, 0.0, 52.86, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [15.81, 0.09, 2.03, 0.43, 0.14, 0.86, 0.38, 4.15, 0.95, 0.54, 1.57, 0.45, 0.23, 0.66, 1.02, -0.01, -0.12, 2.74, 0.77, 0.81, -0.1, 0.3, 1.51, 0.74, 0.23, 1.52]}
|
annotations_filtered/x1H6pD3vNwQ_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[16.0, 31.65], [32.0, 38.75], [39.0, 39.07], [39.0, 52.79], [58.0, 67.63], [68.0, 67.68], [68.0, 79.95]], "keep_status": [false, true, false, false, true, false, true], "silence_prob": [76.04, 36.25, 0.0, 33.68, 34.66, 0.0, 36.05], "audiomae_on_audioset": [null, [["livestock, farm animals, working animals", 29.49], ["sheep", 17.53], ["music", 12.11]], null, [["speech", 64.5], ["music", 9.68], ["outside, rural or natural", 3.38]], [["music", 39.8], ["speech", 20.28], ["didgeridoo", 6.01]], null, [["speech", 36.97], ["fly, housefly", 13.86], ["cattle, bovinae", 6.59]]], "duration": [15.65, 6.75, 0.07, 13.79, 9.63, -0.32, 11.95]}
|
annotations_filtered/x1YvX61qS0Q_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.51], [2.0, 6.78], [9.0, 12.39], [15.0, 16.93], [18.0, 18.89], [21.0, 22.38], [24.0, 25.37], [26.0, 28.39], [33.0, 34.28], [35.0, 36.1], [37.0, 38.38], [39.0, 42.89], [43.0, 50.95], [51.0, 51.92], [52.0, 53.18], [58.0, 59.31], [61.0, 71.74]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 59.86, 58.81, 0.0, 0.0, 0.0, 0.0, 97.83, 0.0, 0.0, 0.0, 95.51, 55.67, 0.0, 0.0, 0.0, 37.47], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 42.39], ["frog", 8.97], ["fart", 5.64]]], "duration": [0.51, 4.78, 3.39, 1.93, 0.89, 1.38, 1.37, 2.39, 1.28, 1.1, 1.38, 3.89, 7.95, 0.92, 1.18, 1.31, 10.74]}
|
annotations_filtered/x1gEy9LSa4A_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[50.0, 51.65], [53.0, 53.4], [55.0, 59.48], [62.0, 127.79], [129.0, 132.0]], "keep_status": [false, false, false, false, false], "silence_prob": [0.0, 0.0, 31.66, 0.0, 44.49], "audiomae_on_audioset": [null, null, [["speech", 46.43], ["music", 30.9], ["vehicle", 3.06]], null, [["whale vocalization", 96.4], ["hum", 0.47], ["sine wave", 0.42]]], "duration": [1.65, 0.4, 4.48, 65.79, 3.0]}
|
annotations_filtered/x1srznPx1qA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [], "keep_status": [], "silence_prob": [], "audiomae_on_audioset": [], "duration": []}
|
annotations_filtered/x2-MCPa_3rU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[11.0, 12.29], [17.0, 21.17], [23.0, 25.14], [28.0, 29.96], [30.0, 45.59], [47.0, 80.45], [83.0, 84.5], [87.0, 87.27], [88.0, 90.32], [91.0, 91.89], [93.0, 94.22], [96.0, 110.44], [113.0, 112.87], [113.0, 121.1], [124.0, 129.91]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, true], "silence_prob": [0.0, 33.59, 38.23, 0.0, 47.08, 0.0, 0.0, 0.0, 55.04, 0.0, 0.0, 33.42, 0.0, 33.91, 36.26], "audiomae_on_audioset": [null, [["speech", 65.28], ["vehicle", 4.78], ["boat, water vehicle", 2.9]], [["speech", 60.9], ["fart", 6.23], ["fireworks", 4.19]], null, [["speech", 76.56], ["animal", 3.74], ["horse", 2.79]], null, null, null, null, null, null, [["music", 63.9], ["speech", 6.87], ["theremin", 3.5]], null, [["music", 47.11], ["theremin", 5.13], ["musical instrument", 4.95]], [["music", 38.27], ["musical instrument", 6.48], ["hum", 5.54]]], "duration": [1.29, 4.17, 2.14, 1.96, 15.59, 33.45, 1.5, 0.27, 2.32, 0.89, 1.22, 14.44, -0.13, 8.1, 5.91]}
|
annotations_filtered/x21gkEu5lKc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[18.0, 19.13], [20.0, 20.82], [22.0, 22.17], [25.0, 25.49], [26.0, 29.24], [33.0, 33.05], [37.0, 38.94], [51.0, 51.33], [55.0, 55.97], [94.0, 97.63], [100.0, 100.4]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 39.85, 0.0, 0.0, 0.0, 0.0, 31.67, 0.0], "audiomae_on_audioset": [null, null, null, null, [["speech", 71.9], ["explosion", 5.54], ["eruption", 3.61]], null, null, null, null, [["music", 44.6], ["speech", 24.56], ["cacophony", 8.94]], null], "duration": [1.13, 0.82, 0.17, 0.49, 3.24, 0.05, 1.94, 0.33, 0.97, 3.63, 0.4]}
|
annotations_filtered/x22ZX9dGaKk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.35], [4.0, 7.26], [15.0, 18.94], [21.0, 22.13], [29.0, 154.48], [155.0, 156.91]], "keep_status": [false, true, true, false, false, false], "silence_prob": [0.0, 41.81, 48.56, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 30.42], ["mantra", 10.44], ["hum", 5.38]], [["music", 31.66], ["animal", 17.75], ["duck", 10.14]], null, null, null], "duration": [0.35, 3.26, 3.94, 1.13, 125.48, 1.91]}
|
annotations_filtered/x24Olya2NLk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 7.26], [9.0, 9.66], [11.0, 12.39], [14.0, 35.63], [37.0, 37.15], [37.0, 41.22], [42.0, 41.57], [42.0, 63.1], [65.0, 72.57], [74.0, 74.55], [76.0, 192.87]], "keep_status": [false, false, false, false, false, true, false, true, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 28.22, 0.0, 28.26, 0.0, 28.5, 28.55, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 61.84], ["musical instrument", 5.29], ["hum", 4.61]], null, [["music", 24.51], ["whack, thwack", 15.93], ["whip", 7.21]], null, [["music", 18.31], ["hum", 17.61], ["mains hum", 10.57]], [["music", 31.81], ["sound effect", 9.48], ["hum", 7.66]], null, null], "duration": [0.26, 0.66, 1.39, 21.63, 0.15, 4.22, -0.43, 21.1, 7.57, 0.55, 116.87]}
|
annotations_filtered/x26YFcaLiNk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 4.5], [5.0, 11.55], [26.0, 41.45], [43.0, 43.16], [45.0, 45.4], [46.0, 53.28], [54.0, 54.57], [55.0, 55.9], [57.0, 59.66], [64.0, 67.74], [68.0, 69.18]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 39.25, 33.62, 0.0, 0.0, 33.85, 0.0, 0.0, 36.09, 33.91, 0.0], "audiomae_on_audioset": [null, [["sidetone", 51.59], ["music", 22.2], ["speech", 9.08]], [["speech", 34.64], ["music", 30.58], ["scratching (performance technique)", 3.77]], null, null, [["music", 43.51], ["scratching (performance technique)", 15.28], ["speech", 11.39]], null, null, [["music", 83.26], ["electronic music", 3.51], ["drum machine", 1.66]], [["music", 63.58], ["speech", 14.13], ["drum machine", 3.09]], null], "duration": [0.5, 6.55, 15.45, 0.16, 0.4, 7.28, 0.57, 0.9, 2.66, 3.74, 1.18]}
|
annotations_filtered/x2BuNuwZ9mg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 5.66], [6.0, 8.29], [12.0, 13.29], [16.0, 17.52], [20.0, 20.29], [21.0, 22.1], [24.0, 25.0], [26.0, 27.13], [30.0, 33.25], [35.0, 37.22], [38.0, 41.05], [43.0, 44.76], [46.0, 47.48], [49.0, 50.8], [58.0, 59.21], [60.0, 60.99], [65.0, 66.56], [68.0, 71.25], [75.0, 75.32], [78.0, 78.78], [80.0, 81.36], [84.0, 84.23], [86.0, 86.75]], "keep_status": [true, true, false, false, false, false, false, false, true, true, true, false, false, false, false, false, false, true, false, false, false, false, false], "silence_prob": [46.5, 42.88, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 48.06, 39.08, 45.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 46.54, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["vehicle", 11.1], ["mains hum", 9.53], ["fly, housefly", 4.98]], [["fly, housefly", 34.55], ["insect", 17.37], ["bee, wasp, etc.", 11.65]], null, null, null, null, null, null, [["hum", 29.44], ["mains hum", 28.41], ["music", 4.98]], [["chainsaw", 16.03], ["vehicle", 7.83], ["speech", 6.13]], [["whale vocalization", 48.39], ["cacophony", 4.85], ["motorboat, speedboat", 4.4]], null, null, null, null, null, null, [["water", 25.25], ["music", 10.25], ["whir", 5.54]], null, null, null, null, null], "duration": [2.66, 2.29, 1.29, 1.52, 0.29, 1.1, 1.0, 1.13, 3.25, 2.22, 3.05, 1.76, 1.48, 1.8, 1.21, 0.99, 1.56, 3.25, 0.32, 0.78, 1.36, 0.23, 0.75]}
|
annotations_filtered/x2CizSzk9s4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[8.0, 9.05], [16.0, 23.79], [25.0, 26.92], [28.0, 31.35], [67.0, 67.54], [70.0, 71.46], [72.0, 73.5], [75.0, 78.0], [80.0, 80.27], [85.0, 86.86], [88.0, 90.42], [92.0, 92.33]], "keep_status": [false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 69.88, 0.0, 49.78, 0.0, 0.0, 0.0, 61.57, 0.0, 0.0, 50.61, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 33.08], ["noise", 12.39], ["hum", 6.85]], null, null, null, null, null, null, null, null], "duration": [1.05, 7.79, 1.92, 3.35, 0.54, 1.46, 1.5, 3.0, 0.27, 1.86, 2.42, 0.33]}
|
annotations_filtered/x2K8I28zejw_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.56], [12.0, 12.5], [14.0, 21.84], [24.0, 25.66], [30.0, 31.45], [33.0, 33.72], [35.0, 35.29], [37.0, 37.4], [39.0, 39.38], [44.0, 44.15], [45.0, 48.51], [59.0, 59.17], [60.0, 60.32], [62.0, 62.87], [64.0, 67.44], [67.0, 70.16], [74.0, 76.5], [101.0, 101.92], [112.0, 112.85], [123.0, 123.06], [124.0, 124.55], [140.0, 141.34], [154.0, 155.19], [164.0, 165.33], [166.0, 165.86]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 35.16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 44.07, 0.0, 0.0, 0.0, 44.87, 50.66, 35.22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["music", 75.88], ["theremin", 3.56], ["throbbing", 2.85]], null, null, null, null, null, null, null, [["speech", 49.77], ["sidetone", 21.14], ["hum", 5.9]], null, null, null, [["music", 26.14], ["hum", 19.11], ["mains hum", 9.0]], null, [["speech", 51.59], ["mains hum", 7.29], ["buzz", 6.86]], null, null, null, null, null, null, null, null], "duration": [0.56, 0.5, 7.84, 1.66, 1.45, 0.72, 0.29, 0.4, 0.38, 0.15, 3.51, 0.17, 0.32, 0.87, 3.44, 3.16, 2.5, 0.92, 0.85, 0.06, 0.55, 1.34, 1.19, 1.33, -0.14]}
|
annotations_filtered/x2S78gnCkRg_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[40.0, 41.27], [43.0, 59.1], [61.0, 61.28], [63.0, 63.14], [69.0, 73.04], [74.0, 74.58], [80.0, 80.69], [81.0, 160.54]], "keep_status": [false, false, false, false, false, false, false, false], "silence_prob": [0.0, 37.31, 0.0, 0.0, 39.34, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 61.56], ["electronic music", 4.9], ["speech", 3.83]], null, null, [["music", 61.54], ["throbbing", 11.51], ["hum", 3.93]], null, null, null], "duration": [1.27, 16.1, 0.28, 0.14, 4.04, 0.58, 0.69, 79.54]}
|
annotations_filtered/x2W8BqPt7mI_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 13.37], [15.0, 14.93], [16.0, 16.73], [19.0, 20.02], [25.0, 25.42], [31.0, 31.8], [50.0, 50.75], [55.0, 55.53]], "keep_status": [false, false, false, false, false, false, false, false], "silence_prob": [72.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null], "duration": [8.37, -0.07, 0.73, 1.02, 0.42, 0.8, 0.75, 0.53]}
|
annotations_filtered/x2WK_eWihdU_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 5.04], [5.0, 7.23], [8.0, 8.87], [12.0, 13.49], [22.0, 22.94], [33.0, 33.98], [37.0, 39.01], [57.0, 58.02], [59.0, 60.25], [65.0, 65.69], [68.0, 69.23], [70.0, 70.21], [78.0, 78.83], [92.0, 92.79], [107.0, 107.59]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [89.54, 30.31, 0.0, 0.0, 0.0, 0.0, 29.87, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 60.99], ["fart", 23.46], ["boing", 1.95]], null, null, null, null, [["speech", 18.25], ["quack", 17.04], ["duck", 14.66]], null, null, null, null, null, null, null, null], "duration": [2.04, 2.23, 0.87, 1.49, 0.94, 0.98, 2.01, 1.02, 1.25, 0.69, 1.23, 0.21, 0.83, 0.79, 0.59]}
|
annotations_filtered/x2lBq3c3AIY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[19.0, 19.28], [21.0, 23.01], [26.0, 48.44], [52.0, 74.68], [76.0, 77.19], [80.0, 80.94], [81.0, 82.51], [84.0, 83.84], [89.0, 89.97], [94.0, 95.59], [99.0, 99.57], [101.0, 102.71], [104.0, 104.57], [109.0, 110.49], [112.0, 111.94], [113.0, 115.86], [118.0, 121.26], [123.0, 128.93], [130.0, 132.34], [136.0, 136.68], [138.0, 140.36], [143.0, 144.26], [145.0, 147.43], [154.0, 165.94], [170.0, 177.77]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "silence_prob": [0.0, 78.21, 90.08, 60.51, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 80.82, 69.61, 63.96, 65.67, 0.0, 59.77, 0.0, 76.37, 57.32, 46.29], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["music", 49.54], ["speech", 11.89], ["scary music", 7.87]]], "duration": [0.28, 2.01, 22.44, 22.68, 1.19, 0.94, 1.51, -0.16, 0.97, 1.59, 0.57, 1.71, 0.57, 1.49, -0.06, 2.86, 3.26, 5.93, 2.34, 0.68, 2.36, 1.26, 2.43, 11.94, 7.77]}
|
annotations_filtered/x2vhOIjmS2s_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[10.0, 12.11], [13.0, 13.26], [17.0, 20.46], [22.0, 49.89], [57.0, 95.23], [99.0, 100.16], [102.0, 102.3]], "keep_status": [true, false, true, false, false, false, false], "silence_prob": [32.51, 0.0, 32.4, 36.77, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["siren", 34.83], ["emergency vehicle", 14.29], ["vehicle", 10.73]], null, [["speech", 44.46], ["vehicle", 13.7], ["car", 6.72]], [["music", 68.78], ["hum", 8.65], ["noise", 3.37]], null, null, null], "duration": [2.11, 0.26, 3.46, 27.89, 38.23, 1.16, 0.3]}
|
annotations_filtered/x2wH5RS58lo_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[5.0, 6.51], [7.0, 7.5], [10.0, 11.11], [13.0, 14.22], [19.0, 21.76], [23.0, 28.97], [30.0, 32.93], [34.0, 35.87], [37.0, 39.61], [41.0, 46.3], [47.0, 49.54], [51.0, 53.82], [55.0, 56.52], [60.0, 65.53], [66.0, 68.77], [72.0, 72.62], [74.0, 79.89], [81.0, 85.38], [87.0, 89.68], [90.0, 93.73], [95.0, 101.34], [102.0, 103.45], [105.0, 112.4], [114.0, 114.81], [117.0, 129.24], [135.0, 135.77], [137.0, 137.22], [139.0, 152.83], [154.0, 156.07], [157.0, 158.7], [160.0, 160.49], [162.0, 164.67], [173.0, 176.49], [183.0, 187.52], [189.0, 191.47], [205.0, 205.98]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 100.0, 0.0, 100.0, 100.0, 100.0, 100.0, 0.0, 100.0, 100.0, 0.0, 100.0, 100.0, 100.0, 100.0, 100.0, 0.0, 100.0, 0.0, 100.0, 0.0, 0.0, 98.66, 100.0, 0.0, 0.0, 100.0, 99.87, 97.83, 95.23, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.51, 0.5, 1.11, 1.22, 2.76, 5.97, 2.93, 1.87, 2.61, 5.3, 2.54, 2.82, 1.52, 5.53, 2.77, 0.62, 5.89, 4.38, 2.68, 3.73, 6.34, 1.45, 7.4, 0.81, 12.24, 0.77, 0.22, 13.83, 2.07, 1.7, 0.49, 2.67, 3.49, 4.52, 2.47, 0.98]}
|
annotations_filtered/x2yXtHyhu-k_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 4.01], [5.0, 5.81], [9.0, 11.62], [12.0, 16.93], [18.0, 18.42], [20.0, 21.41], [23.0, 49.6], [50.0, 62.06], [63.0, 63.71], [65.0, 106.0]], "keep_status": [false, false, false, false, false, false, true, true, false, false], "silence_prob": [89.01, 0.0, 36.91, 30.38, 0.0, 0.0, 31.82, 29.89, 0.0, 0.0], "audiomae_on_audioset": [null, null, [["foghorn", 38.2], ["music", 27.36], ["brass instrument", 8.25]], [["moo", 44.44], ["cattle, bovinae", 23.24], ["music", 8.9]], null, null, [["hum", 27.48], ["mains hum", 22.61], ["music", 18.33]], [["buzz", 38.84], ["music", 15.17], ["electric shaver, electric razor", 10.29]], null, null], "duration": [2.01, 0.81, 2.62, 4.93, 0.42, 1.41, 26.6, 12.06, 0.71, 41.0]}
|
annotations_filtered/x35VnGsGrFc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[21.0, 21.1], [21.0, 22.18], [25.0, 25.08], [26.0, 26.0], [26.0, 50.67], [52.0, 53.79], [54.0, 102.84], [108.0, 127.99], [130.0, 131.8]], "keep_status": [false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 32.17, 0.0, 0.0, 30.22, 0.0], "audiomae_on_audioset": [null, null, null, null, [["music", 47.61], ["speech", 37.61], ["whack, thwack", 2.44]], null, null, [["music", 31.87], ["throbbing", 22.05], ["hum", 14.58]], null], "duration": [0.1, 1.18, 0.08, 0.0, 24.67, 1.79, 48.84, 19.99, 1.8]}
|
annotations_filtered/x39ZG34sn28_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[71.0, 71.95], [74.0, 77.33], [79.0, 91.42], [92.0, 95.94], [97.0, 101.31], [102.0, 104.87], [105.0, 111.64], [113.0, 114.54], [117.0, 117.02], [118.0, 121.64], [122.0, 122.94], [123.0, 124.12]], "keep_status": [false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 69.61, 68.28, 65.67, 75.39, 83.7, 77.03, 0.0, 0.0, 42.65, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["music", 46.55], ["speech", 17.48], ["hum", 3.06]], null, null], "duration": [0.95, 3.33, 12.42, 3.94, 4.31, 2.87, 6.64, 1.54, 0.02, 3.64, 0.94, 1.12]}
|
annotations_filtered/x3OTeacsT84_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.42], [5.0, 5.58], [25.0, 28.63], [30.0, 31.77], [33.0, 34.32], [35.0, 36.22], [39.0, 48.59], [50.0, 51.33], [53.0, 55.17], [58.0, 58.13], [59.0, 60.74], [62.0, 62.7], [67.0, 68.34], [72.0, 74.11], [76.0, 76.92], [78.0, 79.69], [81.0, 86.64], [87.0, 89.13], [92.0, 93.75], [96.0, 97.21], [98.0, 101.06], [105.0, 108.92], [109.0, 111.23], [112.0, 113.09], [114.0, 114.49], [117.0, 117.73], [119.0, 129.39], [130.0, 130.6], [132.0, 136.8], [138.0, 138.62], [142.0, 143.72], [145.0, 144.64], [145.0, 147.82], [149.0, 156.98], [162.0, 161.91], [164.0, 164.03], [166.0, 166.23], [167.0, 167.34], [176.0, 176.69], [178.0, 178.02], [183.0, 183.91], [185.0, 187.49], [188.0, 190.14], [191.0, 192.6], [195.0, 195.84], [199.0, 201.48], [203.0, 203.23], [203.0, 206.39], [210.0, 225.22]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 99.62, 0.0, 0.0, 0.0, 77.87, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 99.52, 0.0, 0.0, 99.95, 99.98, 0.0, 0.0, 90.95, 99.36, 47.86, 0.0, 0.0, 0.0, 98.51, 0.0, 99.73, 0.0, 0.0, 0.0, 32.6, 53.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 52.92, 53.65, 0.0, 0.0, 64.86, 0.0, 47.9, 38.91], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 39.22], ["whale vocalization", 15.77], ["hum", 5.46]], null, null, null, null, null, null, null, null, null, [["music", 36.81], ["snicker", 11.26], ["laughter", 10.83]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 37.13], ["music", 27.96], ["throbbing", 4.74]], [["music", 31.28], ["throbbing", 23.86], ["hum", 20.74]]], "duration": [0.42, 0.58, 3.63, 1.77, 1.32, 1.22, 9.59, 1.33, 2.17, 0.13, 1.74, 0.7, 1.34, 2.11, 0.92, 1.69, 5.64, 2.13, 1.75, 1.21, 3.06, 3.92, 2.23, 1.09, 0.49, 0.73, 10.39, 0.6, 4.8, 0.62, 1.72, -0.36, 2.82, 7.98, -0.09, 0.03, 0.23, 0.34, 0.69, 0.02, 0.91, 2.49, 2.14, 1.6, 0.84, 2.48, 0.23, 3.39, 15.22]}
|
annotations_filtered/x3jT6tQ_gJk_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.12], [2.0, 10.93], [12.0, 12.09], [13.0, 13.14], [20.0, 20.66], [24.0, 24.6], [33.0, 34.1], [36.0, 36.09], [38.0, 38.6], [52.0, 53.42], [54.0, 56.78], [65.0, 65.5], [67.0, 67.68], [73.0, 74.43], [82.0, 83.84], [87.0, 88.38], [89.0, 92.79], [93.0, 94.15], [96.0, 96.52], [98.0, 99.44], [103.0, 111.28], [112.0, 113.0], [122.0, 122.91]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 57.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 51.34, 0.0, 0.0, 0.0, 0.0, 0.0, 57.89, 0.0, 0.0, 0.0, 58.47, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.12, 8.93, 0.09, 0.14, 0.66, 0.6, 1.1, 0.09, 0.6, 1.42, 2.78, 0.5, 0.68, 1.43, 1.84, 1.38, 3.79, 1.15, 0.52, 1.44, 8.28, 1.0, 0.91]}
|
annotations_filtered/x421Na9VfNE_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[15.0, 33.44], [40.0, 40.19], [40.0, 46.43], [52.0, 57.87], [61.0, 64.32], [65.0, 82.04], [83.0, 85.5], [87.0, 114.61], [115.0, 173.38], [178.0, 177.94]], "keep_status": [true, false, true, true, true, true, false, true, false, false], "silence_prob": [32.36, 0.0, 36.17, 30.75, 35.49, 31.77, 34.32, 31.96, 0.0, 0.0], "audiomae_on_audioset": [[["music", 50.77], ["theremin", 10.98], ["brass instrument", 6.36]], null, [["noise", 12.63], ["music", 12.52], ["hum", 12.17]], [["groan", 22.78], ["cattle, bovinae", 9.01], ["livestock, farm animals, working animals", 8.36]], [["noise", 18.78], ["music", 16.32], ["fly, housefly", 11.52]], [["fly, housefly", 45.05], ["insect", 16.22], ["bee, wasp, etc.", 6.02]], [["music", 61.39], ["musical instrument", 5.24], ["guitar", 4.08]], [["music", 57.94], ["throbbing", 7.01], ["electronic music", 4.61]], null, null], "duration": [18.44, 0.19, 6.43, 5.87, 3.32, 17.04, 2.5, 27.61, 58.38, -0.06]}
|
annotations_filtered/x4CEkYJNir0_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.56], [4.0, 4.82], [6.0, 7.18], [12.0, 12.85], [21.0, 21.54], [24.0, 31.14], [33.0, 33.18], [37.0, 37.64], [39.0, 39.16], [41.0, 41.59], [45.0, 45.33], [49.0, 49.17], [50.0, 50.11], [52.0, 52.03], [59.0, 59.88], [61.0, 61.79], [71.0, 71.44], [73.0, 76.35], [78.0, 80.11], [85.0, 84.81], [109.0, 109.43], [114.0, 116.4], [117.0, 120.55], [123.0, 133.49], [134.0, 135.85], [140.0, 140.64], [142.0, 144.98]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 99.84, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 89.19, 98.01, 0.0, 0.0, 74.44, 97.22, 65.32, 0.0, 0.0, 100.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.56, 0.82, 1.18, 0.85, 0.54, 7.14, 0.18, 0.64, 0.16, 0.59, 0.33, 0.17, 0.11, 0.03, 0.88, 0.79, 0.44, 3.35, 2.11, -0.19, 0.43, 2.4, 3.55, 10.49, 1.85, 0.64, 2.98]}
|
annotations_filtered/x4IKGG_2L6I_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[4.0, 12.21], [13.0, 13.81], [18.0, 18.5], [25.0, 28.43], [29.0, 33.59], [34.0, 36.07], [39.0, 40.69], [43.0, 47.66], [48.0, 49.4], [50.0, 51.06], [53.0, 54.82], [64.0, 68.64], [73.0, 73.82], [78.0, 79.95], [83.0, 82.9], [83.0, 83.02], [83.0, 84.79], [86.0, 87.0], [89.0, 88.74], [89.0, 89.4], [92.0, 92.85], [95.0, 94.81], [97.0, 97.09], [98.0, 99.2], [100.0, 100.36], [101.0, 101.44], [102.0, 102.81], [107.0, 108.4], [110.0, 113.49], [116.0, 116.65], [118.0, 121.05], [123.0, 126.77], [129.0, 137.1], [139.0, 139.19], [159.0, 159.39], [172.0, 173.97], [176.0, 177.06], [178.0, 181.79]], "keep_status": [false, false, false, false, true, true, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, true], "silence_prob": [34.26, 0.0, 0.0, 53.22, 39.83, 39.66, 0.0, 49.82, 0.0, 0.0, 0.0, 30.39, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.2, 0.0, 61.47, 35.98, 42.13, 0.0, 0.0, 0.0, 0.0, 35.59], "audiomae_on_audioset": [[["music", 53.47], ["speech", 33.02], ["sidetone", 1.41]], null, null, null, [["music", 47.13], ["speech", 13.18], ["hum", 3.26]], [["speech", 50.76], ["hum", 9.61], ["music", 6.49]], null, [["sidetone", 67.38], ["speech", 17.51], ["hum", 3.24]], null, null, null, [["fly, housefly", 44.54], ["insect", 13.25], ["speech", 10.25]], null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 15.62], ["whack, thwack", 15.12], ["echo", 10.7]], null, null, [["music", 25.59], ["hum", 23.17], ["throbbing", 13.52]], [["throbbing", 36.49], ["hum", 25.06], ["music", 11.33]], null, null, null, null, [["whack, thwack", 10.54], ["music", 10.45], ["throbbing", 7.23]]], "duration": [8.21, 0.81, 0.5, 3.43, 4.59, 2.07, 1.69, 4.66, 1.4, 1.06, 1.82, 4.64, 0.82, 1.95, -0.1, 0.02, 1.79, 1.0, -0.26, 0.4, 0.85, -0.19, 0.09, 1.2, 0.36, 0.44, 0.81, 1.4, 3.49, 0.65, 3.05, 3.77, 8.1, 0.19, 0.39, 1.97, 1.06, 3.79]}
|
annotations_filtered/x4L81QLGYuM_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[7.0, 8.31], [10.0, 13.93], [16.0, 20.39], [24.0, 24.68], [27.0, 33.91], [34.0, 33.94], [36.0, 36.41], [38.0, 38.18], [39.0, 43.93], [50.0, 51.92], [53.0, 56.98], [60.0, 60.94], [62.0, 63.14], [67.0, 69.11], [69.0, 74.44], [77.0, 88.32], [90.0, 105.51], [107.0, 108.08], [109.0, 108.94], [110.0, 114.51], [116.0, 119.77], [121.0, 122.69], [123.0, 124.12], [125.0, 171.04], [177.0, 180.94], [185.0, 184.84], [185.0, 186.95], [188.0, 192.94], [195.0, 201.41], [202.0, 220.94]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, false, true, true, false, false, false, false, true, false, false, false, false, true, false, false, true, false, false], "silence_prob": [0.0, 29.71, 30.65, 0.0, 31.28, 0.0, 0.0, 0.0, 29.62, 0.0, 29.86, 0.0, 0.0, 34.66, 29.49, 30.28, 30.94, 0.0, 0.0, 31.6, 29.95, 0.0, 0.0, 0.0, 34.72, 0.0, 0.0, 28.66, 28.91, 28.97], "audiomae_on_audioset": [null, [["music", 61.79], ["electronic music", 5.32], ["hum", 3.92]], [["music", 54.42], ["throbbing", 14.14], ["hum", 12.71]], null, [["hum", 33.97], ["mains hum", 21.12], ["speech", 15.79]], null, null, null, [["speech", 40.66], ["music", 12.87], ["breaking", 5.61]], null, [["livestock, farm animals, working animals", 54.47], ["cattle, bovinae", 29.8], ["moo", 15.24]], null, null, [["mains hum", 26.58], ["hum", 23.86], ["music", 16.08]], [["speech", 28.8], ["explosion", 18.98], ["burst, pop", 7.04]], [["speech", 68.89], ["music", 14.27], ["sidetone", 1.85]], [["speech", 48.73], ["music", 16.26], ["whack, thwack", 8.57]], null, null, [["music", 55.32], ["hum", 5.19], ["throbbing", 4.75]], [["speech", 50.14], ["music", 12.28], ["thunk", 10.89]], null, null, null, [["music", 49.39], ["hum", 8.0], ["mains hum", 5.97]], null, null, [["music", 19.72], ["cattle, bovinae", 9.5], ["whack, thwack", 9.26]], [["music", 58.62], ["throbbing", 24.23], ["hum", 4.19]], [["music", 57.21], ["speech", 13.2], ["hum", 4.53]]], "duration": [1.31, 3.93, 4.39, 0.68, 6.91, -0.06, 0.41, 0.18, 4.93, 1.92, 3.98, 0.94, 1.14, 2.11, 5.44, 11.32, 15.51, 1.08, -0.06, 4.51, 3.77, 1.69, 1.12, 46.04, 3.94, -0.16, 1.95, 4.94, 6.41, 18.94]}
|
annotations_filtered/x4QJwGTOny8_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 1.76], [3.0, 4.02], [9.0, 48.57], [52.0, 52.22]], "keep_status": [false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [0.76, 1.02, 39.57, 0.22]}
|
annotations_filtered/x4oAO_kDHTY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[13.0, 14.81], [19.0, 20.75], [26.0, 26.97], [28.0, 32.0], [33.0, 33.72], [37.0, 38.65], [39.0, 40.32], [42.0, 43.98], [46.0, 75.69], [79.0, 79.32], [80.0, 83.15], [86.0, 86.51], [87.0, 121.78], [122.0, 126.13], [128.0, 129.05], [131.0, 135.72], [137.0, 138.38], [141.0, 158.85], [164.0, 165.86], [169.0, 170.41], [172.0, 176.96]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, false, true, false, true, false, false, false, false, true], "silence_prob": [0.0, 0.0, 0.0, 54.83, 0.0, 0.0, 0.0, 0.0, 30.24, 0.0, 31.71, 0.0, 0.0, 46.61, 0.0, 35.24, 0.0, 29.49, 0.0, 0.0, 28.78], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 26.31], ["music", 25.51], ["hum", 13.35]], null, [["mains hum", 40.86], ["hum", 21.81], ["music", 14.56]], null, null, [["music", 30.54], ["buzz", 11.86], ["hum", 5.74]], null, [["music", 29.63], ["speech", 18.2], ["creak", 13.35]], null, [["buzz", 54.69], ["speech", 8.89], ["mains hum", 8.75]], null, null, [["mains hum", 44.69], ["hum", 16.03], ["music", 5.75]]], "duration": [1.81, 1.75, 0.97, 4.0, 0.72, 1.65, 1.32, 1.98, 29.69, 0.32, 3.15, 0.51, 34.78, 4.13, 1.05, 4.72, 1.38, 17.85, 1.86, 1.41, 4.96]}
|
annotations_filtered/x4utH5uWK6c_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.54], [4.0, 4.51], [7.0, 7.84], [10.0, 10.47], [11.0, 15.82], [19.0, 19.72], [28.0, 27.87], [28.0, 29.81], [31.0, 31.33], [37.0, 40.49], [46.0, 47.36], [54.0, 55.22], [60.0, 61.42], [64.0, 64.81], [67.0, 73.67], [80.0, 79.84], [88.0, 89.4], [92.0, 92.96], [94.0, 96.18], [104.0, 104.28], [106.0, 113.19], [116.0, 116.7], [118.0, 119.18], [122.0, 122.91], [125.0, 125.81]], "keep_status": [false, false, false, false, true, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 35.38, 0.0, 0.0, 0.0, 0.0, 28.99, 0.0, 0.0, 0.0, 0.0, 32.46, 0.0, 0.0, 0.0, 89.54, 0.0, 31.12, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["speech", 32.3], ["fly, housefly", 5.32], ["cattle, bovinae", 5.09]], null, null, null, null, [["car", 12.2], ["vehicle", 9.15], ["speech", 7.97]], null, null, null, null, [["livestock, farm animals, working animals", 34.65], ["cattle, bovinae", 33.48], ["moo", 24.91]], null, null, null, null, null, [["music", 42.75], ["synthesizer", 20.24], ["sampler", 5.46]], null, null, null, null], "duration": [0.54, 0.51, 0.84, 0.47, 4.82, 0.72, -0.13, 1.81, 0.33, 3.49, 1.36, 1.22, 1.42, 0.81, 6.67, -0.16, 1.4, 0.96, 2.18, 0.28, 7.19, 0.7, 1.18, 0.91, 0.81]}
|
annotations_filtered/x5Gwzy2FY10_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[3.0, 3.82], [5.0, 7.79], [9.0, 11.64], [13.0, 14.07], [15.0, 15.13], [19.0, 18.99], [19.0, 20.58], [21.0, 22.67], [23.0, 23.45], [27.0, 28.16], [30.0, 32.19], [33.0, 34.21], [41.0, 40.9], [43.0, 43.85], [45.0, 46.62], [49.0, 51.73], [54.0, 54.99], [56.0, 68.3], [72.0, 73.11], [74.0, 74.63], [77.0, 77.53], [79.0, 93.8], [100.0, 100.38], [101.0, 101.14], [103.0, 104.06], [105.0, 105.71], [109.0, 108.9], [109.0, 111.18], [112.0, 113.16], [115.0, 115.53], [116.0, 117.81]], "keep_status": [false, true, true, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [0.0, 34.19, 38.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 46.75, 0.0, 0.0, 0.0, 0.0, 62.27, 0.0, 30.71, 0.0, 0.0, 0.0, 57.97, 0.0, 0.0, 0.0, 0.0, 0.0, 46.29, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 38.29], ["synthesizer", 12.46], ["electronic music", 7.01]], [["music", 34.77], ["speech", 11.61], ["synthesizer", 7.45]], null, null, null, null, null, null, null, [["music", 15.72], ["boing", 12.17], ["sidetone", 10.33]], null, null, null, null, null, null, [["music", 47.81], ["throbbing", 20.43], ["hum", 13.54]], null, null, null, null, null, null, null, null, null, [["speech", 62.55], ["fart", 3.3], ["burping, eructation", 3.23]], null, null, null], "duration": [0.82, 2.79, 2.64, 1.07, 0.13, -0.01, 1.58, 1.67, 0.45, 1.16, 2.19, 1.21, -0.1, 0.85, 1.62, 2.73, 0.99, 12.3, 1.11, 0.63, 0.53, 14.8, 0.38, 0.14, 1.06, 0.71, -0.1, 2.18, 1.16, 0.53, 1.81]}
|
annotations_filtered/x5ajdqqytyA_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[9.0, 11.03], [12.0, 23.11], [24.0, 27.48], [29.0, 31.97], [33.0, 43.97], [46.0, 59.8], [61.0, 62.87], [64.0, 64.67], [68.0, 73.2], [74.0, 76.69], [80.0, 83.12], [86.0, 97.9], [99.0, 102.15], [103.0, 105.98], [108.0, 113.04], [114.0, 124.65], [127.0, 127.38], [128.0, 128.7], [130.0, 130.94], [134.0, 134.74], [137.0, 138.0], [140.0, 141.83], [144.0, 144.12], [146.0, 147.36], [150.0, 151.66], [152.0, 152.31], [152.0, 152.51], [153.0, 153.66], [155.0, 155.95], [157.0, 158.28], [159.0, 160.51], [162.0, 163.44], [166.0, 167.66], [170.0, 171.71], [173.0, 173.72], [175.0, 175.86]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.88, 97.22, 98.73, 98.36, 96.77, 65.2, 0.0, 0.0, 55.89, 99.99, 99.91, 97.0, 81.71, 99.8, 99.98, 86.27, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [2.03, 11.11, 3.48, 2.97, 10.97, 13.8, 1.87, 0.67, 5.2, 2.69, 3.12, 11.9, 3.15, 2.98, 5.04, 10.65, 0.38, 0.7, 0.94, 0.74, 1.0, 1.83, 0.12, 1.36, 1.66, 0.31, 0.51, 0.66, 0.95, 1.28, 1.51, 1.44, 1.66, 1.71, 0.72, 0.86]}
|
annotations_filtered/x5bONeuC6BY_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[25.0, 26.15], [27.0, 28.61], [33.0, 34.79], [37.0, 41.81], [44.0, 51.78], [56.0, 55.97], [57.0, 62.16], [63.0, 63.44], [65.0, 66.21], [68.0, 71.02], [75.0, 75.64], [77.0, 79.07], [83.0, 85.02], [87.0, 89.8], [92.0, 92.45], [94.0, 97.33], [98.0, 110.0], [110.0, 126.96]], "keep_status": [false, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 96.29, 33.71, 0.0, 33.74, 0.0, 0.0, 64.29, 0.0, 80.82, 74.6, 48.95, 0.0, 63.42, 73.82, 32.12], "audiomae_on_audioset": [null, null, null, null, [["clang", 54.27], ["ding", 16.12], ["speech", 12.82]], null, [["music", 34.29], ["hum", 20.39], ["rumble", 6.64]], null, null, null, null, null, null, [["music", 32.65], ["theremin", 22.15], ["speech", 11.9]], null, null, null, [["speech", 38.47], ["music", 29.44], ["didgeridoo", 9.98]]], "duration": [1.15, 1.61, 1.79, 4.81, 7.78, -0.03, 5.16, 0.44, 1.21, 3.02, 0.64, 2.07, 2.02, 2.8, 0.45, 3.33, 12.0, 16.96]}
|
annotations_filtered/x5z9VZO--G4_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[2.0, 2.88], [3.0, 4.04], [8.0, 7.84], [10.0, 10.72], [20.0, 20.61], [25.0, 24.75], [27.0, 27.62], [28.0, 29.05], [37.0, 39.38], [41.0, 43.31], [44.0, 44.12], [45.0, 46.43], [50.0, 54.16], [55.0, 57.35], [58.0, 60.39], [63.0, 63.9], [67.0, 69.31], [71.0, 71.15], [72.0, 73.09], [75.0, 75.52], [79.0, 81.35], [83.0, 84.77], [87.0, 92.67], [93.0, 95.45], [102.0, 103.11], [105.0, 104.75], [107.0, 112.95]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.44, 59.77, 0.0, 0.0, 100.0, 100.0, 100.0, 0.0, 100.0, 0.0, 0.0, 0.0, 100.0, 0.0, 100.0, 97.73, 0.0, 0.0, 95.78], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.88, 1.04, -0.16, 0.72, 0.61, -0.25, 0.62, 1.05, 2.38, 2.31, 0.12, 1.43, 4.16, 2.35, 2.39, 0.9, 2.31, 0.15, 1.09, 0.52, 2.35, 1.77, 5.67, 2.45, 1.11, -0.25, 5.95]}
|
annotations_filtered/x6FDJAu5yMc_filtered.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"non_speech_segments": [[1.0, 7.1], [12.0, 12.93], [14.0, 14.57], [15.0, 16.02], [17.0, 22.92], [24.0, 26.62], [28.0, 31.24], [33.0, 33.67], [47.0, 47.39], [50.0, 52.79], [62.0, 63.46], [66.0, 67.22], [69.0, 72.13], [74.0, 77.46], [81.0, 92.06], [97.0, 97.58], [103.0, 103.15], [105.0, 106.68], [115.0, 118.07], [120.0, 120.6], [124.0, 125.0], [126.0, 129.64], [132.0, 134.1]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [80.64, 0.0, 0.0, 0.0, 97.43, 68.15, 100.0, 0.0, 0.0, 99.92, 0.0, 0.0, 100.0, 100.0, 93.76, 0.0, 0.0, 0.0, 80.29, 0.0, 0.0, 100.0, 97.83], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [6.1, 0.93, 0.57, 1.02, 5.92, 2.62, 3.24, 0.67, 0.39, 2.79, 1.46, 1.22, 3.13, 3.46, 11.06, 0.58, 0.15, 1.68, 3.07, 0.6, 1.0, 3.64, 2.1]}
|