{"segments": [[71.0, 72.1], [96.0, 101.0], [118.0, 119.87]], "p_music": [5.41, 0.0, 0.0], "p_speech": [47.69, 38.41, 53.97], "labels": ["P(~Music) = 5.41 | P(~Speech) = 47.69", "P(~Music) = 0.0 | P(~Speech) = 38.41", "P(~Music) = 0.0 | P(~Speech) = 53.97"]}