bpiyush commited on
Commit
b9c85ed
·
1 Parent(s): 7115d9c
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. annotations_filtered/u-2jqTXKQyU_filtered.json +1 -0
  2. annotations_filtered/u-ApxFOpl28_filtered.json +1 -0
  3. annotations_filtered/u-BIr0fW5cU_filtered.json +1 -0
  4. annotations_filtered/u-M2Zb_B7BY_filtered.json +1 -0
  5. annotations_filtered/u-bWIkGa0QA_filtered.json +1 -0
  6. annotations_filtered/u-eTCyG0jpA_filtered.json +1 -0
  7. annotations_filtered/u-pvs7gVNHo_filtered.json +1 -0
  8. annotations_filtered/u-z5139CW1I_filtered.json +1 -0
  9. annotations_filtered/u0RqfETo2ok_filtered.json +1 -0
  10. annotations_filtered/u0fi902X3qo_filtered.json +1 -0
  11. annotations_filtered/u0kF24ceZMI_filtered.json +1 -0
  12. annotations_filtered/u0ttQ8Dn7LM_filtered.json +1 -0
  13. annotations_filtered/u0wNMcfOIXM_filtered.json +1 -0
  14. annotations_filtered/u17vCX9koaI_filtered.json +1 -0
  15. annotations_filtered/u1MRGbWEI9M_filtered.json +1 -0
  16. annotations_filtered/u1Pgftn5H94_filtered.json +1 -0
  17. annotations_filtered/u1QIbENq66w_filtered.json +1 -0
  18. annotations_filtered/u1pJJOaKdiQ_filtered.json +1 -0
  19. annotations_filtered/u2107BTcDbs_filtered.json +1 -0
  20. annotations_filtered/u2D0kDFKaxE_filtered.json +1 -0
  21. annotations_filtered/u2pu0m9iTo4_filtered.json +1 -0
  22. annotations_filtered/u3UyGrnv1-A_filtered.json +1 -0
  23. annotations_filtered/u3XXKF0oDtU_filtered.json +1 -0
  24. annotations_filtered/u3_3EUKbY00_filtered.json +1 -0
  25. annotations_filtered/u3mupIlFIYQ_filtered.json +1 -0
  26. annotations_filtered/u3oi4L5tWQg_filtered.json +1 -0
  27. annotations_filtered/u3xIs0aajN4_filtered.json +1 -0
  28. annotations_filtered/u455yxBv35A_filtered.json +1 -0
  29. annotations_filtered/u4T5X47MKm4_filtered.json +1 -0
  30. annotations_filtered/u4T7slD8Mq4_filtered.json +1 -0
  31. annotations_filtered/u4gz2yNW_Go_filtered.json +1 -0
  32. annotations_filtered/u56OqFjs1dg_filtered.json +1 -0
  33. annotations_filtered/u5hpQ0KeRgY_filtered.json +1 -0
  34. annotations_filtered/u6GTs78NHzQ_filtered.json +1 -0
  35. annotations_filtered/u6HHla9ApmI_filtered.json +1 -0
  36. annotations_filtered/u6IAct0ow4c_filtered.json +1 -0
  37. annotations_filtered/u6W5OFK9jpU_filtered.json +1 -0
  38. annotations_filtered/u73HoUZD7tc_filtered.json +1 -0
  39. annotations_filtered/u74DpEZeHbg_filtered.json +1 -0
  40. annotations_filtered/u7DV5coBXSA_filtered.json +1 -0
  41. annotations_filtered/u7IXETT9OEQ_filtered.json +1 -0
  42. annotations_filtered/u7kInn-7hcA_filtered.json +1 -0
  43. annotations_filtered/u7tSASIBz4Y_filtered.json +1 -0
  44. annotations_filtered/u7yQ7qs6Zew_filtered.json +1 -0
  45. annotations_filtered/u83fkqXPIGE_filtered.json +1 -0
  46. annotations_filtered/u8QMY9JKlDk_filtered.json +1 -0
  47. annotations_filtered/u8TwN5M1fEY_filtered.json +1 -0
  48. annotations_filtered/u8oHCJ8LxtY_filtered.json +1 -0
  49. annotations_filtered/u9A2CYMFfNo_filtered.json +1 -0
  50. annotations_filtered/u9O_Xs8wAZk_filtered.json +1 -0
annotations_filtered/u-2jqTXKQyU_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[5.0, 5.7], [7.0, 7.57], [12.0, 13.98], [15.0, 16.61], [19.0, 20.38], [22.0, 22.49], [27.0, 27.57], [29.0, 29.96], [36.0, 36.83], [44.0, 44.51], [45.0, 45.94], [48.0, 48.44], [54.0, 54.87], [62.0, 62.77], [64.0, 64.61], [83.0, 99.84], [101.0, 102.03], [103.0, 104.09], [106.0, 107.2], [110.0, 110.37]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 65.44, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.7, 0.57, 1.98, 1.61, 1.38, 0.49, 0.57, 0.96, 0.83, 0.51, 0.94, 0.44, 0.87, 0.77, 0.61, 16.84, 1.03, 1.09, 1.2, 0.37]}
annotations_filtered/u-ApxFOpl28_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[4.0, 41.62], [42.0, 44.37], [45.0, 48.56], [50.0, 58.8]], "keep_status": [false, true, false, false], "silence_prob": [0.0, 31.28, 29.78, 32.1], "audiomae_on_audioset": [null, [["music", 32.65], ["mains hum", 16.54], ["hum", 10.62]], [["speech", 63.14], ["explosion", 6.11], ["eruption", 4.38]], [["mains hum", 48.7], ["hum", 39.79], ["music", 4.65]]], "duration": [37.62, 2.37, 3.56, 8.8]}
annotations_filtered/u-BIr0fW5cU_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[19.0, 21.95], [23.0, 26.2], [27.0, 30.86], [32.0, 39.16], [40.0, 50.95], [51.0, 51.02], [52.0, 51.95], [52.0, 56.66], [58.0, 72.79], [77.0, 77.23], [80.0, 81.4], [82.0, 83.42], [85.0, 85.62], [87.0, 87.72], [89.0, 94.88], [100.0, 101.16], [103.0, 106.81], [108.0, 111.25], [116.0, 122.89], [125.0, 128.61]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, true, false, false], "silence_prob": [100.0, 100.0, 99.98, 98.51, 84.62, 0.0, 0.0, 38.94, 34.99, 0.0, 0.0, 0.0, 0.0, 0.0, 38.7, 0.0, 34.46, 42.88, 55.67, 75.72], "audiomae_on_audioset": [null, null, null, null, null, null, null, [["music", 73.96], ["musical instrument", 2.79], ["hum", 2.52]], [["music", 60.68], ["synthesizer", 6.87], ["chop", 5.16]], null, null, null, null, null, [["music", 54.19], ["musical instrument", 4.57], ["hum", 3.91]], null, [["speech", 40.49], ["music", 15.34], ["hum", 8.4]], [["music", 46.48], ["hum", 6.28], ["didgeridoo", 5.74]], null, null], "duration": [2.95, 3.2, 3.86, 7.16, 10.95, 0.02, -0.05, 4.66, 14.79, 0.23, 1.4, 1.42, 0.62, 0.72, 5.88, 1.16, 3.81, 3.25, 6.89, 3.61]}
annotations_filtered/u-M2Zb_B7BY_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[7.0, 10.62], [22.0, 28.29], [31.0, 31.94], [43.0, 47.61], [52.0, 58.94], [61.0, 61.03], [66.0, 76.52], [77.0, 81.23]], "keep_status": [false, false, false, false, false, false, false, true], "silence_prob": [70.58, 63.42, 0.0, 33.93, 34.86, 0.0, 48.95, 49.45], "audiomae_on_audioset": [null, null, null, [["music", 50.74], ["hum", 14.7], ["mains hum", 6.6]], [["hum", 47.23], ["music", 19.3], ["mains hum", 12.41]], null, [["music", 70.57], ["didgeridoo", 15.75], ["musical instrument", 3.01]], [["music", 21.77], ["hum", 21.63], ["mains hum", 15.53]]], "duration": [3.62, 6.29, 0.94, 4.61, 6.94, 0.03, 10.52, 4.23]}
annotations_filtered/u-bWIkGa0QA_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[10.0, 11.96], [20.0, 23.77], [33.0, 33.37], [34.0, 34.55], [35.0, 36.14], [46.0, 46.23], [59.0, 59.32], [59.0, 59.39], [59.0, 59.43], [59.0, 59.49], [77.0, 80.96], [88.0, 92.37], [101.0, 105.51], [110.0, 111.84], [116.0, 122.25], [123.0, 122.61], [123.0, 123.13]], "keep_status": [false, true, false, false, false, false, false, false, false, false, true, true, false, false, true, false, false], "silence_prob": [0.0, 31.69, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 31.61, 30.93, 31.56, 0.0, 32.73, 0.0, 0.0], "audiomae_on_audioset": [null, [["speech", 30.69], ["music", 14.86], ["beatboxing", 7.82]], null, null, null, null, null, null, null, null, [["speech", 36.71], ["noise", 14.62], ["groan", 8.51]], [["speech", 53.76], ["electric shaver, electric razor", 4.65], ["whimper", 4.19]], [["cattle, bovinae", 43.67], ["moo", 32.31], ["speech", 11.13]], null, [["speech", 41.73], ["noise", 12.29], ["radio", 9.51]], null, null], "duration": [1.96, 3.77, 0.37, 0.55, 1.14, 0.23, 0.32, 0.39, 0.43, 0.49, 3.96, 4.37, 4.51, 1.84, 6.25, -0.39, 0.13]}
annotations_filtered/u-eTCyG0jpA_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[11.0, 11.58], [16.0, 17.95], [27.0, 27.57], [34.0, 36.48], [37.0, 37.79], [40.0, 49.72], [70.0, 77.72], [82.0, 84.67], [85.0, 88.01], [90.0, 95.5], [98.0, 103.05], [106.0, 114.22], [116.0, 116.01], [126.0, 136.0], [144.0, 150.5], [165.0, 166.78], [169.0, 170.07]], "keep_status": [false, false, false, false, false, true, true, false, true, true, true, true, false, true, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 34.82, 0.0, 35.54, 29.5, 36.34, 33.77, 29.79, 29.37, 29.07, 0.0, 29.75, 30.05, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 51.76], ["theremin", 27.78], ["cacophony", 2.98]], null, [["music", 28.49], ["fly, housefly", 21.12], ["insect", 7.26]], [["music", 22.03], ["fart", 13.49], ["speech", 12.4]], [["speech", 40.22], ["music", 28.55], ["throbbing", 10.59]], [["music", 37.77], ["hum", 9.76], ["throbbing", 8.43]], [["music", 22.53], ["whack, thwack", 18.31], ["speech", 9.88]], [["speech", 19.04], ["music", 14.61], ["outside, rural or natural", 6.32]], [["livestock, farm animals, working animals", 25.47], ["moo", 21.82], ["cattle, bovinae", 21.65]], null, [["speech", 52.85], ["music", 9.4], ["outside, rural or natural", 3.59]], [["music", 21.33], ["moo", 20.45], ["livestock, farm animals, working animals", 16.94]], null, null], "duration": [0.58, 1.95, 0.57, 2.48, 0.79, 9.72, 7.72, 2.67, 3.01, 5.5, 5.05, 8.22, 0.01, 10.0, 6.5, 1.78, 1.07]}
annotations_filtered/u-pvs7gVNHo_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[0.0, 0.33], [7.0, 40.14], [41.0, 43.02], [43.0, 46.6], [48.0, 48.19], [54.0, 54.87], [56.0, 59.64], [62.0, 62.94], [65.0, 68.52], [69.0, 70.77], [72.0, 71.76], [72.0, 97.6], [98.0, 98.58], [99.0, 110.02], [113.0, 123.9], [126.0, 143.6]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true], "silence_prob": [0.0, 0.0, 62.07, 67.89, 0.0, 0.0, 58.55, 0.0, 91.64, 0.0, 0.0, 42.76, 0.0, 38.33, 28.28, 37.81], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["hum", 41.02], ["mains hum", 40.73], ["speech", 3.89]], null, [["rumble", 36.86], ["speech", 18.92], ["hum", 17.48]], [["explosion", 21.57], ["music", 11.17], ["hum", 9.69]], [["hum", 32.01], ["throbbing", 16.66], ["speech", 10.28]]], "duration": [0.33, 33.14, 2.02, 3.6, 0.19, 0.87, 3.64, 0.94, 3.52, 1.77, -0.24, 25.6, 0.58, 11.02, 10.9, 17.6]}
annotations_filtered/u-z5139CW1I_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[2.0, 7.26], [9.0, 11.55], [18.0, 22.49], [27.0, 33.47], [34.0, 36.73], [37.0, 36.95], [37.0, 36.98], [37.0, 37.02], [37.0, 45.91], [47.0, 49.47], [63.0, 73.62], [75.0, 92.37], [93.0, 117.49], [118.0, 117.75]], "keep_status": [false, true, true, false, true, false, false, false, false, true, true, false, false, false], "silence_prob": [49.73, 34.37, 31.65, 32.17, 32.04, 0.0, 0.0, 0.0, 30.85, 31.37, 37.27, 31.65, 91.47, 0.0], "audiomae_on_audioset": [[["fly, housefly", 51.91], ["insect", 23.96], ["mosquito", 13.88]], [["music", 22.27], ["speech", 16.26], ["sonar", 8.91]], [["throbbing", 32.32], ["hum", 28.1], ["mains hum", 7.11]], [["speech", 64.77], ["music", 10.58], ["whack, thwack", 3.99]], [["sidetone", 28.28], ["whack, thwack", 19.62], ["music", 6.65]], null, null, null, [["speech", 68.8], ["music", 4.84], ["sound effect", 2.9]], [["music", 24.1], ["whale vocalization", 21.71], ["synthesizer", 6.56]], [["bee, wasp, etc.", 22.09], ["fly, housefly", 20.42], ["insect", 19.0]], [["fly, housefly", 51.59], ["insect", 18.3], ["mosquito", 14.4]], null, null], "duration": [5.26, 2.55, 4.49, 6.47, 2.73, -0.05, -0.02, 0.02, 8.91, 2.47, 10.62, 17.37, 24.49, -0.25]}
annotations_filtered/u0RqfETo2ok_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[4.0, 5.75], [13.0, 13.68], [17.0, 17.22], [19.0, 23.23], [26.0, 26.97], [36.0, 42.11], [44.0, 46.77], [51.0, 51.26], [53.0, 53.6], [59.0, 59.8], [61.0, 61.45], [63.0, 64.47], [67.0, 67.49], [69.0, 69.62], [80.0, 80.82], [90.0, 91.22], [104.0, 106.17], [112.0, 114.32]], "keep_status": [false, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, true, true], "silence_prob": [0.0, 0.0, 0.0, 30.13, 0.0, 28.42, 29.17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.32, 32.79], "audiomae_on_audioset": [null, null, null, [["speech", 45.76], ["music", 25.83], ["didgeridoo", 4.77]], null, [["speech", 55.85], ["music", 7.03], ["throbbing", 4.83]], [["speech", 49.02], ["crowd", 7.65], ["cheering", 6.03]], null, null, null, null, null, null, null, null, null, [["theremin", 31.45], ["whale vocalization", 20.98], ["music", 15.3]], [["music", 32.63], ["didgeridoo", 5.71], ["gong", 5.69]]], "duration": [1.75, 0.68, 0.22, 4.23, 0.97, 6.11, 2.77, 0.26, 0.6, 0.8, 0.45, 1.47, 0.49, 0.62, 0.82, 1.22, 2.17, 2.32]}
annotations_filtered/u0fi902X3qo_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[14.0, 15.16], [16.0, 16.85], [18.0, 20.75], [22.0, 22.62], [24.0, 35.77], [37.0, 38.47], [39.0, 40.22], [41.0, 47.22], [48.0, 63.96], [65.0, 77.89], [79.0, 83.98], [85.0, 86.51], [88.0, 89.68], [90.0, 92.53], [94.0, 94.32], [99.0, 101.82], [103.0, 137.13], [139.0, 141.32], [142.0, 144.31]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 99.94, 0.0, 99.93, 0.0, 0.0, 99.97, 96.54, 72.6, 66.76, 0.0, 0.0, 60.05, 0.0, 60.98, 0.0, 78.04, 91.81], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.16, 0.85, 2.75, 0.62, 11.77, 1.47, 1.22, 6.22, 15.96, 12.89, 4.98, 1.51, 1.68, 2.53, 0.32, 2.82, 34.13, 2.32, 2.31]}
annotations_filtered/u0kF24ceZMI_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[9.0, 12.04], [13.0, 13.73], [20.0, 25.49], [27.0, 33.99], [47.0, 51.39], [52.0, 53.72], [55.0, 55.29], [57.0, 57.79], [59.0, 59.81], [61.0, 61.53], [64.0, 64.72], [69.0, 69.62], [78.0, 78.17], [82.0, 83.05], [84.0, 83.88]], "keep_status": [false, false, true, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [30.94, 0.0, 32.01, 31.14, 31.13, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["music", 61.36], ["speech", 5.02], ["crowd", 5.01]], null, [["music", 52.35], ["crowd", 9.85], ["speech", 7.67]], [["moo", 29.14], ["music", 20.63], ["cattle, bovinae", 20.47]], [["music", 67.7], ["speech", 3.51], ["crowd", 2.66]], null, null, null, null, null, null, null, null, null, null], "duration": [3.04, 0.73, 5.49, 6.99, 4.39, 1.72, 0.29, 0.79, 0.81, 0.53, 0.72, 0.62, 0.17, 1.05, -0.12]}
annotations_filtered/u0ttQ8Dn7LM_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[8.0, 15.36], [16.0, 17.22], [20.0, 20.19], [21.0, 21.93], [24.0, 24.09], [24.0, 31.68], [33.0, 42.16], [47.0, 46.87], [47.0, 52.57], [55.0, 68.96], [72.0, 74.65], [76.0, 80.03], [83.0, 83.71], [87.0, 89.41], [91.0, 93.09], [94.0, 96.5], [99.0, 100.08], [102.0, 103.1], [105.0, 107.49], [108.0, 110.13], [112.0, 113.31], [116.0, 117.85], [119.0, 120.77], [122.0, 130.28], [131.0, 139.6], [142.0, 151.5], [152.0, 154.92], [160.0, 162.33], [163.0, 164.4], [165.0, 165.52], [168.0, 168.51], [170.0, 170.75], [171.0, 171.8], [173.0, 173.01], [177.0, 177.67]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [99.84, 0.0, 0.0, 0.0, 0.0, 100.0, 99.99, 0.0, 100.0, 99.94, 100.0, 100.0, 0.0, 100.0, 100.0, 100.0, 0.0, 0.0, 100.0, 100.0, 0.0, 0.0, 0.0, 100.0, 100.0, 100.0, 100.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [7.36, 1.22, 0.19, 0.93, 0.09, 7.68, 9.16, -0.13, 5.57, 13.96, 2.65, 4.03, 0.71, 2.41, 2.09, 2.5, 1.08, 1.1, 2.49, 2.13, 1.31, 1.85, 1.77, 8.28, 8.6, 9.5, 2.92, 2.33, 1.4, 0.52, 0.51, 0.75, 0.8, 0.01, 0.67]}
annotations_filtered/u0wNMcfOIXM_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[6.0, 14.25], [16.0, 16.33], [18.0, 18.42], [25.0, 24.63], [28.0, 27.85], [29.0, 30.15], [34.0, 34.08], [37.0, 39.61], [42.0, 47.34], [48.0, 48.46], [49.0, 57.7], [59.0, 60.15], [61.0, 61.5], [67.0, 68.49], [69.0, 73.2], [76.0, 77.87], [81.0, 82.44], [84.0, 87.39]], "keep_status": [true, false, false, false, false, false, false, true, true, false, true, false, false, false, false, false, false, true], "silence_prob": [40.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.02, 34.77, 0.0, 37.43, 0.0, 0.0, 0.0, 36.77, 0.0, 0.0, 31.15], "audiomae_on_audioset": [[["music", 29.79], ["clip-clop", 11.99], ["horse", 9.65]], null, null, null, null, null, null, [["music", 35.07], ["coin (dropping)", 6.91], ["radio", 3.53]], [["music", 55.15], ["synthesizer", 5.76], ["effects unit", 4.94]], null, [["music", 36.39], ["echo", 15.64], ["effects unit", 8.86]], null, null, null, [["music", 49.13], ["throbbing", 12.15], ["hum", 8.81]], null, null, [["fart", 46.21], ["whir", 4.91], ["effects unit", 3.14]]], "duration": [8.25, 0.33, 0.42, -0.37, -0.15, 1.15, 0.08, 2.61, 5.34, 0.46, 8.7, 1.15, 0.5, 1.49, 4.2, 1.87, 1.44, 3.39]}
annotations_filtered/u17vCX9koaI_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[6.0, 6.59], [9.0, 9.26], [15.0, 15.25]], "keep_status": [false, false, false], "silence_prob": [0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null], "duration": [0.59, 0.26, 0.25]}
annotations_filtered/u1MRGbWEI9M_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[6.0, 9.76], [11.0, 20.09], [21.0, 22.5], [23.0, 24.16], [25.0, 25.64], [27.0, 29.2], [30.0, 31.19], [33.0, 37.0], [38.0, 40.29], [41.0, 81.24], [83.0, 119.58], [120.0, 120.63]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false], "silence_prob": [76.37, 98.99, 0.0, 0.0, 0.0, 99.99, 0.0, 82.07, 45.05, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["speech", 49.66], ["burping, eructation", 11.09], ["wail, moan", 6.49]], null, null, null], "duration": [3.76, 9.09, 1.5, 1.16, 0.64, 2.2, 1.19, 4.0, 2.29, 40.24, 36.58, 0.63]}
annotations_filtered/u1Pgftn5H94_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[13.0, 13.95], [17.0, 19.18], [25.0, 25.34], [31.0, 46.5], [49.0, 53.91], [62.0, 62.68], [64.0, 68.34], [71.0, 75.51], [86.0, 87.02], [88.0, 88.97], [96.0, 96.77], [97.0, 111.92], [115.0, 119.99], [121.0, 121.44], [122.0, 128.11], [130.0, 131.5], [140.0, 141.0], [149.0, 151.7], [157.0, 156.76]], "keep_status": [false, false, false, true, true, false, false, false, false, false, false, true, true, false, true, false, false, true, false], "silence_prob": [0.0, 99.84, 0.0, 44.2, 47.01, 0.0, 57.25, 30.54, 0.0, 0.0, 0.0, 32.42, 35.95, 0.0, 37.51, 0.0, 0.0, 32.42, 0.0], "audiomae_on_audioset": [null, null, null, [["music", 49.43], ["speech", 15.9], ["outside, rural or natural", 2.28]], [["music", 37.96], ["hum", 12.44], ["throbbing", 9.18]], null, null, [["speech", 67.55], ["hum", 7.26], ["rumble", 4.96]], null, null, null, [["music", 26.36], ["speech", 19.75], ["honk", 13.4]], [["music", 45.37], ["speech", 17.13], ["musical instrument", 4.29]], null, [["music", 58.19], ["double bass", 5.6], ["cello", 4.1]], null, null, [["music", 44.64], ["fart", 9.63], ["fly, housefly", 5.25]], null], "duration": [0.95, 2.18, 0.34, 15.5, 4.91, 0.68, 4.34, 4.51, 1.02, 0.97, 0.77, 14.92, 4.99, 0.44, 6.11, 1.5, 1.0, 2.7, -0.24]}
annotations_filtered/u1QIbENq66w_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[68.0, 75.15], [75.0, 76.6], [79.0, 97.83]], "keep_status": [false, false, true], "silence_prob": [33.02, 0.0, 31.66], "audiomae_on_audioset": [[["boing", 78.03], ["music", 8.06], ["fly, housefly", 2.27]], null, [["boing", 46.23], ["music", 15.22], ["frog", 6.09]]], "duration": [7.15, 1.6, 18.83]}
annotations_filtered/u1pJJOaKdiQ_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[19.0, 21.93], [23.0, 23.72], [24.0, 25.15], [28.0, 29.61], [31.0, 32.27], [33.0, 34.15], [43.0, 43.06], [51.0, 53.65], [55.0, 57.52], [58.0, 58.82], [60.0, 61.11], [62.0, 63.37], [64.0, 65.79], [66.0, 66.88], [67.0, 68.55], [69.0, 70.29], [71.0, 72.05], [74.0, 83.81], [90.0, 91.77], [95.0, 97.63], [100.0, 101.26], [102.0, 102.49], [103.0, 104.11], [106.0, 106.34], [108.0, 109.24], [111.0, 110.73], [118.0, 118.4], [120.0, 119.94], [121.0, 120.77], [122.0, 125.46], [126.0, 125.73], [127.0, 127.43], [128.0, 131.5], [134.0, 134.97], [139.0, 139.4], [140.0, 140.41], [142.0, 143.46], [145.0, 145.42], [147.0, 148.47], [150.0, 150.38], [151.0, 151.43]], "keep_status": [true, false, false, false, false, false, false, true, true, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [37.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 47.16, 43.28, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 48.31, 0.0, 60.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 49.92, 0.0, 0.0, 58.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 20.37], ["radio", 18.24], ["horse", 4.48]], null, null, null, null, null, null, [["speech", 37.34], ["radio", 6.11], ["creak", 4.72]], [["frog", 17.3], ["whack, thwack", 13.75], ["radio", 6.29]], null, null, null, null, null, null, null, null, [["insect", 19.98], ["fly, housefly", 16.41], ["radio", 13.11]], null, null, null, null, null, null, null, null, null, null, null, [["livestock, farm animals, working animals", 13.68], ["insect", 6.24], ["animal", 6.2]], null, null, null, null, null, null, null, null, null, null, null], "duration": [2.93, 0.72, 1.15, 1.61, 1.27, 1.15, 0.06, 2.65, 2.52, 0.82, 1.11, 1.37, 1.79, 0.88, 1.55, 1.29, 1.05, 9.81, 1.77, 2.63, 1.26, 0.49, 1.11, 0.34, 1.24, -0.27, 0.4, -0.06, -0.23, 3.46, -0.27, 0.43, 3.5, 0.97, 0.4, 0.41, 1.46, 0.42, 1.47, 0.38, 0.43]}
annotations_filtered/u2107BTcDbs_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[37.0, 39.18], [40.0, 40.96], [43.0, 43.01], [44.0, 44.78], [46.0, 62.8], [69.0, 71.47], [74.0, 91.32], [92.0, 92.4], [94.0, 94.66], [97.0, 97.26], [104.0, 105.12], [112.0, 113.53], [115.0, 116.14]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [78.38, 0.0, 0.0, 0.0, 57.17, 75.72, 49.13, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, [["music", 73.47], ["speech", 8.23], ["music for children", 1.84]], null, null, null, null, null, null], "duration": [2.18, 0.96, 0.01, 0.78, 16.8, 2.47, 17.32, 0.4, 0.66, 0.26, 1.12, 1.53, 1.14]}
annotations_filtered/u2D0kDFKaxE_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[6.0, 7.28], [10.0, 10.94], [13.0, 14.76], [17.0, 17.98], [24.0, 24.73], [34.0, 35.26], [36.0, 37.62], [39.0, 50.19], [60.0, 60.93], [98.0, 100.8], [101.0, 100.84], [101.0, 100.87], [112.0, 118.44], [125.0, 131.62], [140.0, 140.8], [151.0, 150.7], [151.0, 153.99], [159.0, 165.75], [170.0, 175.39]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, true, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 72.01, 0.0, 31.1, 0.0, 0.0, 31.34, 30.45, 0.0, 0.0, 29.95, 30.32, 30.94], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, [["music", 53.16], ["didgeridoo", 15.84], ["synthesizer", 9.19]], null, null, [["music", 79.88], ["electronic music", 3.96], ["synthesizer", 3.7]], [["music", 53.99], ["throbbing", 7.97], ["hum", 7.67]], null, null, [["speech", 30.94], ["mains hum", 17.39], ["hum", 15.32]], [["music", 67.13], ["trance music", 11.16], ["electronic music", 7.88]], [["music", 66.18], ["synthesizer", 8.23], ["electronic music", 6.18]]], "duration": [1.28, 0.94, 1.76, 0.98, 0.73, 1.26, 1.62, 11.19, 0.93, 2.8, -0.16, -0.13, 6.44, 6.62, 0.8, -0.3, 2.99, 6.75, 5.39]}
annotations_filtered/u2pu0m9iTo4_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[42.0, 43.21], [45.0, 48.93], [50.0, 52.79], [54.0, 54.92], [56.0, 57.7], [59.0, 62.21], [64.0, 64.42], [66.0, 66.53], [67.0, 68.25], [69.0, 76.84], [77.0, 78.22], [84.0, 107.45], [109.0, 108.89], [109.0, 109.17], [110.0, 110.17], [110.0, 113.19]], "keep_status": [false, false, false, false, false, true, false, false, false, true, false, true, false, false, false, true], "silence_prob": [0.0, 49.31, 37.41, 0.0, 0.0, 30.14, 0.0, 0.0, 0.0, 33.53, 0.0, 36.21, 0.0, 0.0, 0.0, 31.3], "audiomae_on_audioset": [null, [["music", 76.12], ["didgeridoo", 3.63], ["musical instrument", 2.05]], [["cattle, bovinae", 32.25], ["moo", 25.43], ["music", 13.31]], null, null, [["music", 28.9], ["speech", 12.82], ["foghorn", 10.52]], null, null, null, [["wail, moan", 42.1], ["speech", 7.7], ["grunt", 4.76]], null, [["music", 38.72], ["fly, housefly", 19.53], ["insect", 7.59]], null, null, null, [["music", 48.27], ["theremin", 12.84], ["musical instrument", 7.46]]], "duration": [1.21, 3.93, 2.79, 0.92, 1.7, 3.21, 0.42, 0.53, 1.25, 7.84, 1.22, 23.45, -0.11, 0.17, 0.17, 3.19]}
annotations_filtered/u3UyGrnv1-A_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[8.0, 7.67], [10.0, 9.76], [11.0, 12.26], [14.0, 13.64], [18.0, 18.81], [19.0, 20.56], [26.0, 26.33], [28.0, 28.71], [34.0, 39.34], [42.0, 42.69], [46.0, 45.91], [52.0, 60.0], [60.0, 67.07], [67.0, 69.15], [70.0, 71.76], [73.0, 73.3], [76.0, 78.09], [80.0, 80.76], [84.0, 89.11], [93.0, 95.23], [98.0, 98.29], [101.0, 105.6], [108.0, 111.27], [116.0, 116.5], [117.0, 117.68], [121.0, 121.54], [122.0, 122.15], [126.0, 131.94]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, true, false, true, true, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.49, 0.0, 0.0, 57.01, 88.46, 60.42, 0.0, 0.0, 40.02, 0.0, 43.61, 43.53, 0.0, 36.41, 37.93, 0.0, 0.0, 0.0, 0.0, 29.86], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, [["music", 65.42], ["speech", 5.57], ["musical instrument", 4.25]], null, null, null, null, null, null, null, [["speech", 58.65], ["glass", 3.97], ["dishes, pots, and pans", 3.96]], null, [["speech", 73.07], ["fart", 3.93], ["music", 2.42]], [["music", 24.47], ["thunk", 10.77], ["fart", 7.74]], null, [["thunk", 52.9], ["whack, thwack", 8.02], ["breaking", 4.6]], [["livestock, farm animals, working animals", 10.33], ["music", 9.51], ["moo", 5.55]], null, null, null, null, [["machine gun", 76.57], ["speech", 9.89], ["gunshot, gunfire", 5.9]]], "duration": [-0.33, -0.24, 1.26, -0.36, 0.81, 1.56, 0.33, 0.71, 5.34, 0.69, -0.09, 8.0, 7.07, 2.15, 1.76, 0.3, 2.09, 0.76, 5.11, 2.23, 0.29, 4.6, 3.27, 0.5, 0.68, 0.54, 0.15, 5.94]}
annotations_filtered/u3XXKF0oDtU_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[3.0, 3.15], [4.0, 5.17], [8.0, 11.03], [13.0, 13.2], [17.0, 19.36], [23.0, 23.4], [25.0, 48.61], [50.0, 50.38], [52.0, 68.4], [74.0, 93.51], [95.0, 103.17], [103.0, 105.16], [106.0, 123.65], [124.0, 130.0], [132.0, 135.9], [138.0, 139.9], [142.0, 143.8], [145.0, 149.34]], "keep_status": [false, false, true, false, false, false, false, false, false, false, true, false, true, false, false, false, false, true], "silence_prob": [0.0, 0.0, 49.22, 0.0, 68.02, 0.0, 72.9, 0.0, 37.15, 36.92, 35.22, 33.32, 30.15, 31.91, 30.87, 0.0, 0.0, 35.73], "audiomae_on_audioset": [null, null, [["speech", 26.68], ["music", 25.2], ["hum", 3.8]], null, null, null, null, null, [["music", 61.26], ["theremin", 18.82], ["speech", 7.23]], [["music", 90.87], ["throbbing", 1.74], ["electronic music", 0.84]], [["music", 33.04], ["theremin", 28.57], ["synthesizer", 5.36]], [["music", 33.0], ["quack", 32.62], ["duck", 4.57]], [["music", 32.08], ["sound effect", 7.35], ["whoosh, swoosh, swish", 5.62]], [["speech", 61.12], ["music", 13.34], ["didgeridoo", 2.16]], [["music", 51.29], ["explosion", 15.63], ["burst, pop", 3.54]], null, null, [["mains hum", 33.4], ["hum", 22.33], ["speech", 12.67]]], "duration": [0.15, 1.17, 3.03, 0.2, 2.36, 0.4, 23.61, 0.38, 16.4, 19.51, 8.17, 2.16, 17.65, 6.0, 3.9, 1.9, 1.8, 4.34]}
annotations_filtered/u3_3EUKbY00_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[3.0, 3.62], [6.0, 6.59], [8.0, 10.52], [28.0, 29.51], [38.0, 39.88], [42.0, 48.79], [49.0, 50.6], [51.0, 61.43], [62.0, 63.17], [65.0, 67.86], [68.0, 70.75], [71.0, 84.89], [86.0, 89.26], [93.0, 93.51], [96.0, 101.46], [103.0, 104.68], [107.0, 115.08], [117.0, 117.63], [120.0, 121.42], [123.0, 125.81], [127.0, 128.16], [130.0, 129.91]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 99.71, 0.0, 0.0, 93.29, 0.0, 99.44, 0.0, 99.73, 94.37, 99.44, 97.92, 0.0, 98.44, 0.0, 94.95, 0.0, 0.0, 99.93, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.62, 0.59, 2.52, 1.51, 1.88, 6.79, 1.6, 10.43, 1.17, 2.86, 2.75, 13.89, 3.26, 0.51, 5.46, 1.68, 8.08, 0.63, 1.42, 2.81, 1.16, -0.09]}
annotations_filtered/u3mupIlFIYQ_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[3.0, 3.45], [4.0, 5.17], [6.0, 7.28], [8.0, 9.34], [10.0, 25.83], [26.0, 28.0], [29.0, 30.33], [31.0, 32.85], [35.0, 38.89], [42.0, 42.13], [43.0, 44.81], [45.0, 46.14], [47.0, 83.32], [84.0, 144.93], [146.0, 147.66], [149.0, 152.59], [153.0, 153.89], [154.0, 159.65], [161.0, 160.76], [162.0, 163.04], [166.0, 166.87], [172.0, 173.42], [175.0, 176.47], [178.0, 177.79]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 57.09, 35.78, 0.0, 0.0, 99.87, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 52.22, 0.0, 45.52, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["speech", 71.12], ["sidetone", 6.72], ["busy signal", 5.23]], null, null, null, null, null, null, null, null, null, null, null, [["speech", 35.16], ["whack, thwack", 11.26], ["crushing", 9.61]], null, null, null, null, null, null], "duration": [0.45, 1.17, 1.28, 1.34, 15.83, 2.0, 1.33, 1.85, 3.89, 0.13, 1.81, 1.14, 36.32, 60.93, 1.66, 3.59, 0.89, 5.65, -0.24, 1.04, 0.87, 1.42, 1.47, -0.21]}
annotations_filtered/u3oi4L5tWQg_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[17.0, 48.39], [49.0, 52.35], [53.0, 55.29], [57.0, 58.56], [61.0, 62.11], [64.0, 64.72], [65.0, 65.92], [68.0, 68.82], [78.0, 78.54], [83.0, 84.08], [85.0, 85.82]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 90.08, 99.56, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null], "duration": [31.39, 3.35, 2.29, 1.56, 1.11, 0.72, 0.92, 0.82, 0.54, 1.08, 0.82]}
annotations_filtered/u3xIs0aajN4_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[2.0, 6.45], [19.0, 22.59], [25.0, 38.79], [40.0, 40.42], [42.0, 45.42], [46.0, 46.26], [48.0, 48.59], [50.0, 54.04], [55.0, 55.61], [63.0, 64.17], [71.0, 74.53], [76.0, 76.67], [85.0, 90.04], [94.0, 95.17], [101.0, 101.09]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [62.58, 95.91, 91.64, 0.0, 76.53, 0.0, 0.0, 87.19, 0.0, 0.0, 48.35, 0.0, 84.8, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["ping", 11.96], ["music", 8.32], ["synthesizer", 5.47]], null, null, null, null], "duration": [4.45, 3.59, 13.79, 0.42, 3.42, 0.26, 0.59, 4.04, 0.61, 1.17, 3.53, 0.67, 5.04, 1.17, 0.09]}
annotations_filtered/u455yxBv35A_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[1.0, 1.93], [2.0, 6.24], [24.0, 24.56], [26.0, 26.23], [39.0, 43.29], [50.0, 64.25], [68.0, 68.67], [75.0, 75.1], [77.0, 77.58], [78.0, 81.62], [82.0, 92.65], [93.0, 94.19], [101.0, 104.48], [104.0, 104.57], [106.0, 122.61], [124.0, 127.45], [130.0, 137.56], [146.0, 146.75], [149.0, 156.79], [157.0, 158.19], [161.0, 161.92]], "keep_status": [false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false], "silence_prob": [0.0, 32.28, 0.0, 0.0, 42.98, 33.23, 0.0, 0.0, 0.0, 35.29, 30.43, 0.0, 30.56, 0.0, 31.97, 33.53, 30.58, 0.0, 31.76, 0.0, 0.0], "audiomae_on_audioset": [null, [["throbbing", 35.97], ["music", 26.97], ["hum", 16.15]], null, null, [["music", 46.8], ["speech", 21.33], ["foghorn", 5.68]], [["music", 42.19], ["speech", 15.91], ["musical instrument", 3.02]], null, null, null, [["music", 44.62], ["boing", 20.86], ["theremin", 6.74]], [["music", 76.77], ["sound effect", 2.22], ["hum", 2.0]], null, [["livestock, farm animals, working animals", 40.74], ["cattle, bovinae", 33.28], ["moo", 21.43]], null, [["music", 60.61], ["speech", 11.22], ["buzz", 2.14]], [["music", 45.35], ["theremin", 16.28], ["didgeridoo", 12.37]], [["music", 70.66], ["boing", 2.89], ["cacophony", 2.84]], null, [["music", 25.51], ["speech", 23.47], ["vehicle", 12.48]], null, null], "duration": [0.93, 4.24, 0.56, 0.23, 4.29, 14.25, 0.67, 0.1, 0.58, 3.62, 10.65, 1.19, 3.48, 0.57, 16.61, 3.45, 7.56, 0.75, 7.79, 1.19, 0.92]}
annotations_filtered/u4T5X47MKm4_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[4.0, 10.98], [13.0, 14.91], [15.0, 38.18], [39.0, 53.22]], "keep_status": [false, false, true, true], "silence_prob": [53.22, 0.0, 33.24, 30.41], "audiomae_on_audioset": [null, null, [["speech", 29.3], ["hum", 22.63], ["mains hum", 17.21]], [["speech", 39.18], ["music", 11.98], ["livestock, farm animals, working animals", 6.12]]], "duration": [6.98, 1.91, 23.18, 14.22]}
annotations_filtered/u4T7slD8Mq4_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[3.0, 51.06], [53.0, 83.0], [85.0, 97.24], [98.0, 105.14], [112.0, 122.05]], "keep_status": [false, true, false, false, false], "silence_prob": [0.0, 30.87, 30.2, 31.99, 35.71], "audiomae_on_audioset": [null, [["music", 50.95], ["speech", 7.71], ["breaking", 6.06]], [["music", 55.61], ["throbbing", 24.32], ["hum", 3.8]], [["music", 70.18], ["speech", 12.42], ["thunk", 1.65]], [["music", 55.17], ["speech", 10.46], ["didgeridoo", 10.04]]], "duration": [48.06, 30.0, 12.24, 7.14, 10.05]}
annotations_filtered/u4gz2yNW_Go_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[48.0, 51.56], [52.0, 75.05], [76.0, 88.92], [93.0, 95.27], [97.0, 97.02], [98.0, 99.3], [100.0, 102.66], [104.0, 105.22], [105.0, 107.82], [109.0, 114.24], [118.0, 132.44], [135.0, 140.63], [147.0, 151.38], [152.0, 152.74], [156.0, 163.56], [168.0, 168.27], [176.0, 175.98], [176.0, 176.01]], "keep_status": [true, false, false, true, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [32.44, 31.81, 32.09, 31.36, 0.0, 0.0, 32.5, 0.0, 32.9, 31.6, 30.99, 30.78, 30.62, 0.0, 30.53, 0.0, 0.0, 0.0], "audiomae_on_audioset": [[["speech", 21.6], ["duck", 14.95], ["quack", 10.51]], [["quack", 46.3], ["music", 18.85], ["animal", 9.53]], [["speech", 37.86], ["music", 26.37], ["fly, housefly", 7.02]], [["livestock, farm animals, working animals", 17.86], ["mosquito", 15.15], ["fly, housefly", 13.81]], null, null, [["music", 71.86], ["musical instrument", 5.42], ["drum", 2.23]], null, [["speech", 34.24], ["music", 23.73], ["fart", 15.55]], [["music", 19.63], ["speech", 13.46], ["fly, housefly", 11.72]], [["cattle, bovinae", 30.83], ["livestock, farm animals, working animals", 26.37], ["moo", 19.11]], [["honk", 42.34], ["goose", 21.57], ["music", 17.53]], [["music", 43.17], ["speech", 30.33], ["whack, thwack", 4.43]], null, [["cattle, bovinae", 53.16], ["moo", 20.71], ["livestock, farm animals, working animals", 18.58]], null, null, null], "duration": [3.56, 23.05, 12.92, 2.27, 0.02, 1.3, 2.66, 1.22, 2.82, 5.24, 14.44, 5.63, 4.38, 0.74, 7.56, 0.27, -0.02, 0.01]}
annotations_filtered/u56OqFjs1dg_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[1.0, 1.44], [2.0, 3.99], [5.0, 6.78], [8.0, 9.04], [11.0, 12.31], [14.0, 14.86], [16.0, 17.49], [18.0, 19.13], [20.0, 20.83], [22.0, 23.82], [25.0, 25.44], [28.0, 36.88], [38.0, 39.75], [40.0, 41.91], [43.0, 43.33], [49.0, 51.14], [52.0, 53.05], [54.0, 54.41], [64.0, 69.31], [71.0, 71.44]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.6, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 35.07, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 31.31], ["livestock, farm animals, working animals", 9.52], ["goat", 8.83]], null], "duration": [0.44, 1.99, 1.78, 1.04, 1.31, 0.86, 1.49, 1.13, 0.83, 1.82, 0.44, 8.88, 1.75, 1.91, 0.33, 2.14, 1.05, 0.41, 5.31, 0.44]}
annotations_filtered/u5hpQ0KeRgY_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[34.0, 34.37], [35.0, 35.68], [37.0, 47.88], [49.0, 55.58], [56.0, 57.94], [61.0, 61.72], [62.0, 64.25], [66.0, 67.53], [69.0, 70.01], [74.0, 75.52], [77.0, 77.62], [78.0, 79.47], [89.0, 89.53], [94.0, 94.53], [95.0, 95.0], [96.0, 109.14], [110.0, 117.53], [119.0, 127.2], [128.0, 130.08], [131.0, 131.95], [133.0, 133.42]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 94.22, 95.78, 0.0, 0.0, 61.18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 79.07, 53.47, 97.33, 97.73, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.37, 0.68, 10.88, 6.58, 1.94, 0.72, 2.25, 1.53, 1.01, 1.52, 0.62, 1.47, 0.53, 0.53, 0.0, 13.14, 7.53, 8.2, 2.08, 0.95, 0.42]}
annotations_filtered/u6GTs78NHzQ_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[2.0, 22.4], [29.0, 29.4], [45.0, 50.03]], "keep_status": [false, false, false], "silence_prob": [29.89, 0.0, 30.13], "audiomae_on_audioset": [[["speech", 45.44], ["music", 35.23], ["hum", 3.9]], null, [["livestock, farm animals, working animals", 71.25], ["cattle, bovinae", 16.12], ["moo", 6.78]]], "duration": [20.4, 0.4, 5.03]}
annotations_filtered/u6HHla9ApmI_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[5.0, 6.91], [23.0, 23.21], [23.0, 23.45], [29.0, 30.97], [32.0, 34.35], [36.0, 36.17], [40.0, 42.28], [47.0, 55.39], [61.0, 61.18], [62.0, 65.43], [68.0, 69.03], [79.0, 80.38], [83.0, 83.66], [86.0, 86.8], [87.0, 89.19], [92.0, 93.93], [104.0, 104.82], [110.0, 114.29], [116.0, 117.15], [118.0, 118.2], [124.0, 125.47], [126.0, 127.57], [129.0, 129.25], [137.0, 137.54], [141.0, 141.1], [141.0, 141.15]], "keep_status": [false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 30.79, 0.0, 30.92, 30.82, 0.0, 30.26, 0.0, 0.0, 0.0, 0.0, 35.64, 0.0, 0.0, 34.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, [["music", 47.73], ["speech", 23.03], ["boing", 4.98]], null, [["music", 47.2], ["boing", 17.78], ["livestock, farm animals, working animals", 3.69]], [["music", 75.15], ["boing", 5.49], ["speech", 4.9]], null, [["music", 38.51], ["speech", 18.3], ["mosquito", 5.39]], null, null, null, null, [["music", 38.53], ["speech", 25.83], ["boing", 17.83]], null, null, [["speech", 51.31], ["music", 8.1], ["sidetone", 7.08]], null, null, null, null, null, null, null, null], "duration": [1.91, 0.21, 0.45, 1.97, 2.35, 0.17, 2.28, 8.39, 0.18, 3.43, 1.03, 1.38, 0.66, 0.8, 2.19, 1.93, 0.82, 4.29, 1.15, 0.2, 1.47, 1.57, 0.25, 0.54, 0.1, 0.15]}
annotations_filtered/u6IAct0ow4c_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[12.0, 11.84], [24.0, 24.85], [40.0, 39.97], [42.0, 41.72], [50.0, 51.36], [54.0, 64.94], [67.0, 70.95], [73.0, 75.54], [77.0, 85.13], [89.0, 89.63], [93.0, 95.62], [102.0, 107.6], [115.0, 116.34], [117.0, 119.03], [123.0, 123.63], [130.0, 130.2], [131.0, 131.26], [132.0, 132.36], [133.0, 133.93], [143.0, 144.26]], "keep_status": [false, false, false, false, false, false, true, true, true, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 31.97, 28.28, 40.33, 30.51, 0.0, 33.16, 34.93, 0.0, 33.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["music", 31.35], ["speech", 24.0], ["mains hum", 15.3]], [["didgeridoo", 44.8], ["boing", 7.92], ["music", 7.58]], [["speech", 29.74], ["civil defense siren", 19.09], ["singing bowl", 10.71]], [["explosion", 16.81], ["music", 11.81], ["hum", 6.89]], null, [["speech", 75.91], ["music", 12.04], ["inside, large room or hall", 1.45]], [["music", 53.61], ["hum", 11.43], ["ambient music", 6.42]], null, [["hum", 31.0], ["mains hum", 16.94], ["speech", 11.41]], null, null, null, null, null, null], "duration": [-0.16, 0.85, -0.03, -0.28, 1.36, 10.94, 3.95, 2.54, 8.13, 0.63, 2.62, 5.6, 1.34, 2.03, 0.63, 0.2, 0.26, 0.36, 0.93, 1.26]}
annotations_filtered/u6W5OFK9jpU_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[8.0, 9.02], [13.0, 13.9], [21.0, 21.86], [25.0, 25.62], [45.0, 46.08], [49.0, 52.15], [54.0, 56.05], [60.0, 61.42]], "keep_status": [false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 30.18, 65.44, 0.0], "audiomae_on_audioset": [null, null, null, null, null, [["cattle, bovinae", 32.81], ["moo", 24.02], ["livestock, farm animals, working animals", 18.48]], null, null], "duration": [1.02, 0.9, 0.86, 0.62, 1.08, 3.15, 2.05, 1.42]}
annotations_filtered/u73HoUZD7tc_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[2.0, 3.03], [4.0, 5.39], [6.0, 7.5], [9.0, 11.06], [12.0, 13.02], [21.0, 21.09], [28.0, 28.41], [29.0, 28.83], [33.0, 33.03], [35.0, 36.91], [38.0, 38.11], [48.0, 50.16], [59.0, 59.56], [65.0, 66.48], [72.0, 73.87], [77.0, 77.33], [80.0, 82.02], [88.0, 92.28], [94.0, 95.39], [99.0, 99.91], [104.0, 105.54], [107.0, 107.45], [108.0, 108.3], [111.0, 110.73], [112.0, 113.46], [117.0, 117.42], [121.0, 121.74], [124.0, 126.23], [127.0, 127.8], [128.0, 128.73], [131.0, 130.65]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 99.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 99.73, 0.0, 0.0, 0.0, 0.0, 99.36, 99.97, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 98.27, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [1.03, 1.39, 1.5, 2.06, 1.02, 0.09, 0.41, -0.17, 0.03, 1.91, 0.11, 2.16, 0.56, 1.48, 1.87, 0.33, 2.02, 4.28, 1.39, 0.91, 1.54, 0.45, 0.3, -0.27, 1.46, 0.42, 0.74, 2.23, 0.8, 0.73, -0.35]}
annotations_filtered/u74DpEZeHbg_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[0.0, 1.0], [6.0, 7.15], [11.0, 11.31], [13.0, 12.78], [29.0, 29.47], [30.0, 30.38], [31.0, 31.8], [34.0, 34.35], [36.0, 36.39], [38.0, 42.09], [51.0, 50.82], [54.0, 54.72], [63.0, 64.29], [84.0, 84.48], [90.0, 91.05], [92.0, 106.89], [109.0, 110.51]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 58.38, 0.0, 0.0, 0.0, 0.0, 0.0, 36.75, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, [["speech", 54.0], ["hum", 11.54], ["music", 4.13]], null], "duration": [1.0, 1.15, 0.31, -0.22, 0.47, 0.38, 0.8, 0.35, 0.39, 4.09, -0.18, 0.72, 1.29, 0.48, 1.05, 14.89, 1.51]}
annotations_filtered/u7DV5coBXSA_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[11.0, 10.54], [11.0, 12.75], [15.0, 15.85], [18.0, 20.34], [21.0, 20.97], [23.0, 22.92], [24.0, 25.03], [31.0, 32.43], [34.0, 39.78], [40.0, 60.24], [61.0, 60.81], [65.0, 75.25]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 65.79, 0.0, 0.0, 0.0, 0.0, 99.16, 65.2, 0.0, 48.61], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["sine wave", 52.71], ["singing bowl", 21.99], ["chirp tone", 13.24]]], "duration": [-0.46, 1.75, 0.85, 2.34, -0.03, -0.08, 1.03, 1.43, 5.78, 20.24, -0.19, 10.25]}
annotations_filtered/u7IXETT9OEQ_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[5.0, 5.86], [8.0, 8.02], [11.0, 11.45], [13.0, 13.47], [17.0, 17.56], [18.0, 19.2], [21.0, 22.35], [26.0, 27.01], [28.0, 30.6], [32.0, 32.58], [33.0, 34.08], [35.0, 40.15], [41.0, 41.94], [45.0, 45.49], [49.0, 50.97], [54.0, 56.86], [60.0, 60.74], [66.0, 66.68], [68.0, 69.15], [71.0, 72.81], [74.0, 78.49], [79.0, 80.22], [81.0, 83.66], [88.0, 89.73], [92.0, 93.06]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 80.29, 0.0, 0.0, 0.0, 83.34, 0.0, 0.0, 0.0, 0.0, 71.72, 0.0, 58.81, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.86, 0.02, 0.45, 0.47, 0.56, 1.2, 1.35, 1.01, 2.6, 0.58, 1.08, 5.15, 0.94, 0.49, 1.97, 2.86, 0.74, 0.68, 1.15, 1.81, 4.49, 1.22, 2.66, 1.73, 1.06]}
annotations_filtered/u7kInn-7hcA_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[12.0, 12.31], [23.0, 25.66], [27.0, 28.09], [38.0, 38.43], [49.0, 50.01], [51.0, 51.0], [57.0, 61.48], [62.0, 65.4], [66.0, 74.9], [83.0, 84.75], [102.0, 101.97], [103.0, 105.24], [108.0, 108.99], [112.0, 114.67], [115.0, 122.99]], "keep_status": [false, false, false, false, false, false, false, false, true, false, false, false, false, false, false], "silence_prob": [0.0, 40.79, 0.0, 0.0, 0.0, 0.0, 33.88, 38.74, 42.33, 0.0, 0.0, 30.56, 0.0, 30.77, 30.15], "audiomae_on_audioset": [null, [["music", 55.37], ["theremin", 18.62], ["mantra", 3.13]], null, null, null, null, [["music", 45.24], ["theremin", 21.53], ["synthesizer", 4.93]], [["music", 54.11], ["theremin", 22.21], ["synthesizer", 2.23]], [["music", 36.22], ["effects unit", 10.95], ["chorus effect", 6.29]], null, null, [["music", 61.77], ["carnatic music", 8.25], ["musical instrument", 3.97]], null, [["music", 73.98], ["didgeridoo", 6.62], ["foghorn", 1.19]], [["music", 57.82], ["theremin", 10.02], ["yodeling", 7.94]]], "duration": [0.31, 2.66, 1.09, 0.43, 1.01, 0.0, 4.48, 3.4, 8.9, 1.75, -0.03, 2.24, 0.99, 2.67, 7.99]}
annotations_filtered/u7tSASIBz4Y_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[9.0, 12.09], [13.0, 62.23], [64.0, 64.94], [66.0, 100.01]], "keep_status": [false, false, false, false], "silence_prob": [78.55, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null], "duration": [3.09, 49.23, 0.94, 34.01]}
annotations_filtered/u7yQ7qs6Zew_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[12.0, 62.45], [65.0, 67.71], [68.0, 70.14], [71.0, 76.54], [81.0, 82.12], [92.0, 92.3], [98.0, 100.18], [102.0, 102.27], [103.0, 103.6], [105.0, 105.6], [109.0, 111.06], [112.0, 112.41], [116.0, 116.43], [117.0, 118.05], [124.0, 128.16], [128.0, 128.33], [129.0, 130.35], [134.0, 135.11], [139.0, 140.93], [143.0, 146.01], [148.0, 149.03], [150.0, 152.14], [155.0, 157.92], [159.0, 160.46], [162.0, 163.09], [165.0, 166.45], [167.0, 167.63]], "keep_status": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false, true, false, true, true, false, false, false, false], "silence_prob": [0.0, 78.21, 77.36, 69.2, 0.0, 0.0, 77.36, 0.0, 0.0, 0.0, 45.92, 0.0, 0.0, 0.0, 35.91, 0.0, 0.0, 0.0, 0.0, 39.05, 0.0, 47.74, 43.18, 0.0, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, [["whale vocalization", 28.85], ["cattle, bovinae", 9.64], ["theremin", 8.08]], null, null, null, [["music", 42.37], ["speech", 14.72], ["hum", 9.52]], null, null, null, null, [["music", 47.86], ["throbbing", 16.53], ["hum", 5.48]], null, [["music", 31.96], ["hum", 22.75], ["throbbing", 11.16]], [["music", 27.72], ["speech", 23.19], ["throbbing", 10.93]], null, null, null, null], "duration": [50.45, 2.71, 2.14, 5.54, 1.12, 0.3, 2.18, 0.27, 0.6, 0.6, 2.06, 0.41, 0.43, 1.05, 4.16, 0.33, 1.35, 1.11, 1.93, 3.01, 1.03, 2.14, 2.92, 1.46, 1.09, 1.45, 0.63]}
annotations_filtered/u83fkqXPIGE_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[4.0, 3.87], [5.0, 4.9], [6.0, 6.51], [8.0, 9.46], [10.0, 11.01], [11.0, 13.04], [16.0, 17.68], [19.0, 19.11], [20.0, 20.07], [21.0, 21.63], [22.0, 22.33], [23.0, 46.6], [48.0, 48.1], [50.0, 50.25], [51.0, 52.59], [55.0, 65.25], [71.0, 84.54], [85.0, 85.01], [92.0, 100.14], [102.0, 103.52], [104.0, 104.26], [105.0, 105.27], [107.0, 107.94], [109.0, 110.02], [112.0, 125.85]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 0.0, 71.72, 0.0, 0.0, 0.0, 0.0, 0.0, 36.59, 0.0, 0.0, 0.0, 55.46, 57.17, 0.0, 65.55, 0.0, 0.0, 0.0, 0.0, 0.0, 63.42], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, [["speech", 20.99], ["music", 16.31], ["electric shaver, electric razor", 8.82]], null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [-0.13, -0.1, 0.51, 1.46, 1.01, 2.04, 1.68, 0.11, 0.07, 0.63, 0.33, 23.6, 0.1, 0.25, 1.59, 10.25, 13.54, 0.01, 8.14, 1.52, 0.26, 0.27, 0.94, 1.02, 13.85]}
annotations_filtered/u8QMY9JKlDk_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[38.0, 42.8], [45.0, 72.74], [73.0, 88.4]], "keep_status": [true, true, false], "silence_prob": [30.99, 30.8, 33.14], "audiomae_on_audioset": [[["speech", 29.64], ["music", 13.11], ["animal", 9.92]], [["noise", 44.52], ["sidetone", 5.56], ["hum", 5.26]], [["music", 46.81], ["buzz", 16.26], ["hum", 9.37]]], "duration": [4.8, 27.74, 15.4]}
annotations_filtered/u8TwN5M1fEY_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[7.0, 7.65], [9.0, 9.93], [11.0, 11.6], [12.0, 13.61], [14.0, 16.48], [17.0, 17.64], [22.0, 23.13], [29.0, 29.88], [32.0, 32.39], [34.0, 34.79], [35.0, 36.41], [39.0, 38.97], [42.0, 42.21], [43.0, 43.77], [45.0, 45.79], [47.0, 48.3], [50.0, 51.85], [54.0, 54.68], [56.0, 56.05], [58.0, 58.29], [59.0, 59.95], [62.0, 63.76], [69.0, 69.13], [70.0, 70.38], [72.0, 72.55], [74.0, 74.55], [75.0, 75.51], [76.0, 76.96], [78.0, 80.96], [82.0, 82.27], [84.0, 84.92], [86.0, 88.13], [89.0, 90.9], [91.0, 94.31]], "keep_status": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], "silence_prob": [0.0, 0.0, 0.0, 0.0, 86.82, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 89.19, 0.0, 0.0, 100.0, 0.0, 74.29], "audiomae_on_audioset": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], "duration": [0.65, 0.93, 0.6, 1.61, 2.48, 0.64, 1.13, 0.88, 0.39, 0.79, 1.41, -0.03, 0.21, 0.77, 0.79, 1.3, 1.85, 0.68, 0.05, 0.29, 0.95, 1.76, 0.13, 0.38, 0.55, 0.55, 0.51, 0.96, 2.96, 0.27, 0.92, 2.13, 1.9, 3.31]}
annotations_filtered/u8oHCJ8LxtY_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[2.0, 2.24], [5.0, 7.5], [9.0, 12.18], [14.0, 20.16], [21.0, 24.76], [26.0, 28.98], [31.0, 33.51], [36.0, 38.77], [41.0, 46.43], [48.0, 58.9]], "keep_status": [false, true, true, true, true, false, false, false, true, false], "silence_prob": [0.0, 32.27, 44.93, 34.32, 41.36, 63.85, 47.82, 59.86, 49.87, 59.07], "audiomae_on_audioset": [null, [["music", 34.15], ["theremin", 5.66], ["hum", 4.63]], [["music", 40.46], ["effects unit", 11.03], ["synthesizer", 5.89]], [["music", 29.07], ["hum", 12.23], ["mains hum", 9.4]], [["music", 37.12], ["speech", 22.39], ["hum", 10.37]], null, [["speech", 54.65], ["music", 17.13], ["sidetone", 3.74]], null, [["music", 34.52], ["hum", 16.4], ["speech", 6.26]], null], "duration": [0.24, 2.5, 3.18, 6.16, 3.76, 2.98, 2.51, 2.77, 5.43, 10.9]}
annotations_filtered/u9A2CYMFfNo_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[10.0, 10.99], [11.0, 14.88], [16.0, 16.75], [18.0, 18.81], [21.0, 30.06], [31.0, 33.94], [35.0, 35.88], [37.0, 38.87], [39.0, 39.93], [41.0, 41.66], [42.0, 42.52], [46.0, 48.66], [51.0, 52.05], [54.0, 54.08], [54.0, 54.46], [55.0, 59.04], [60.0, 68.55], [74.0, 76.84], [78.0, 80.23], [83.0, 84.96], [90.0, 92.47], [96.0, 117.24], [119.0, 119.43], [129.0, 128.88], [131.0, 130.99]], "keep_status": [false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false], "silence_prob": [0.0, 40.93, 0.0, 0.0, 33.46, 37.88, 0.0, 0.0, 0.0, 0.0, 0.0, 45.82, 0.0, 0.0, 0.0, 35.62, 32.89, 52.16, 44.09, 0.0, 33.97, 38.06, 0.0, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 55.57], ["speech", 16.04], ["throbbing", 3.5]], null, null, [["music", 39.4], ["musical instrument", 10.32], ["brass instrument", 8.33]], [["croak", 23.46], ["music", 23.39], ["speech", 14.76]], null, null, null, null, null, [["music", 69.91], ["throbbing", 6.28], ["hum", 4.64]], null, null, null, [["music", 58.6], ["speech", 15.81], ["synthesizer", 4.78]], [["music", 58.13], ["speech", 12.35], ["musical instrument", 3.21]], null, [["music", 66.11], ["synthesizer", 5.14], ["speech", 3.94]], null, [["music", 18.17], ["speech", 15.11], ["fart", 4.86]], [["music", 65.63], ["quack", 13.13], ["animal", 4.96]], null, null, null], "duration": [0.99, 3.88, 0.75, 0.81, 9.06, 2.94, 0.88, 1.87, 0.93, 0.66, 0.52, 2.66, 1.05, 0.08, 0.46, 4.04, 8.55, 2.84, 2.23, 1.96, 2.47, 21.24, 0.43, -0.12, -0.01]}
annotations_filtered/u9O_Xs8wAZk_filtered.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"non_speech_segments": [[18.0, 52.1], [56.0, 64.1], [65.0, 71.1], [72.0, 72.44], [73.0, 80.77], [82.0, 156.95], [157.0, 158.35]], "keep_status": [false, true, true, false, false, false, false], "silence_prob": [0.0, 32.89, 31.03, 0.0, 31.93, 0.0, 0.0], "audiomae_on_audioset": [null, [["music", 27.79], ["speech", 10.52], ["fart", 8.25]], [["music", 47.93], ["didgeridoo", 11.55], ["musical instrument", 5.96]], null, [["speech", 31.19], ["mains hum", 21.07], ["hum", 18.85]], null, null], "duration": [34.1, 8.1, 6.1, 0.44, 7.77, 74.95, 1.35]}