{"segments": [[4.0, 8.92]], "p_music": [0.0], "p_speech": [32.79], "labels": ["P(~Music) = 0.0 | P(~Speech) = 32.79"]}