whisper-timestamped / tests /expected /corner_cases /large_apollo11.mp3.words.json
acharyasagar's picture
Upload folder using huggingface_hub
89c4248 verified
{
"text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over. Go ahead. Okay, we'd like to have, say, a selected one or two on the helmet. We're going to have B-1. And you can put the other one on Mike's helmet. We're still seeing the bleeper, over. We got a little bit on the helmet and B1. The other one might go under sleep restraint. We got them in their helmet bags and I guess, excuse me, the helmet bags. The leaflet bags. Roger. Roger, I'm taking the next A out of the CLS. Okay, we weren't sure that this was a suggestion. We thought we'd, you could check it out. So I guess whatever you come up with just let us know. Okay, no problem. Okay, no problem, we'll let you know where the end of the line is.",
"segments": [
{
"id": 0,
"seek": 0,
"start": 0.36,
"end": 6.96,
"text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over.",
"tokens": [
50364,
25187,
2975,
11,
18717,
13,
492,
658,
257,
11879,
337,
291,
322,
428,
10699,
41,
311,
462,
12,
32,
30390,
11,
462,
12,
38,
12,
36,
12,
32,
311,
11,
670,
13,
50714
],
"temperature": 0.4,
"avg_logprob": -0.6358732926218134,
"compression_ratio": 1.443298969072165,
"no_speech_prob": 0.334128201007843,
"confidence": 0.595,
"words": [
{
"text": "Apollo",
"start": 0.36,
"end": 0.92,
"confidence": 0.791
},
{
"text": "11,",
"start": 0.92,
"end": 1.32,
"confidence": 0.878
},
{
"text": "Houston.",
"start": 1.5,
"end": 1.74,
"confidence": 0.829
},
{
"text": "We",
"start": 1.9,
"end": 1.94,
"confidence": 0.836
},
{
"text": "got",
"start": 1.94,
"end": 2.1,
"confidence": 0.645
},
{
"text": "a",
"start": 2.1,
"end": 2.3,
"confidence": 0.995
},
{
"text": "recommendation",
"start": 2.3,
"end": 3.08,
"confidence": 0.708
},
{
"text": "for",
"start": 3.08,
"end": 3.46,
"confidence": 0.856
},
{
"text": "you",
"start": 3.46,
"end": 3.62,
"confidence": 0.857
},
{
"text": "on",
"start": 3.62,
"end": 3.76,
"confidence": 0.87
},
{
"text": "your",
"start": 3.76,
"end": 4.06,
"confidence": 0.764
},
{
"text": "DOJ's",
"start": 4.06,
"end": 4.88,
"confidence": 0.162
},
{
"text": "E-A",
"start": 4.88,
"end": 5.3,
"confidence": 0.409
},
{
"text": "limb,",
"start": 5.3,
"end": 5.62,
"confidence": 0.263
},
{
"text": "E-G-E-A's,",
"start": 5.76,
"end": 6.8,
"confidence": 0.768
},
{
"text": "over.",
"start": 6.82,
"end": 6.96,
"confidence": 0.818
}
]
},
{
"id": 1,
"seek": 0,
"start": 10.8,
"end": 11.16,
"text": " Go ahead.",
"tokens": [
50914,
1037,
2286,
13,
50964
],
"temperature": 0.4,
"avg_logprob": -0.6358732926218134,
"compression_ratio": 1.443298969072165,
"no_speech_prob": 0.334128201007843,
"confidence": 0.781,
"words": [
{
"text": "Go",
"start": 10.8,
"end": 10.98,
"confidence": 0.706
},
{
"text": "ahead.",
"start": 10.98,
"end": 11.16,
"confidence": 0.863
}
]
},
{
"id": 2,
"seek": 0,
"start": 11.88,
"end": 19.12,
"text": " Okay, we'd like to have, say, a selected one or two on the helmet. We're going to have B-1.",
"tokens": [
50964,
1033,
11,
321,
1116,
411,
281,
362,
11,
584,
11,
257,
8209,
472,
420,
732,
322,
264,
15922,
13,
492,
434,
516,
281,
362,
363,
12,
16,
13,
51314
],
"temperature": 0.4,
"avg_logprob": -0.6358732926218134,
"compression_ratio": 1.443298969072165,
"no_speech_prob": 0.334128201007843,
"confidence": 0.465,
"words": [
{
"text": "Okay,",
"start": 11.88,
"end": 12.28,
"confidence": 0.467
},
{
"text": "we'd",
"start": 12.5,
"end": 13.16,
"confidence": 0.624
},
{
"text": "like",
"start": 13.16,
"end": 13.32,
"confidence": 0.876
},
{
"text": "to",
"start": 13.32,
"end": 13.56,
"confidence": 0.734
},
{
"text": "have,",
"start": 13.56,
"end": 14.24,
"confidence": 0.125
},
{
"text": "say,",
"start": 14.74,
"end": 15.0,
"confidence": 0.153
},
{
"text": "a",
"start": 15.1,
"end": 15.16,
"confidence": 0.061
},
{
"text": "selected",
"start": 15.16,
"end": 15.76,
"confidence": 0.143
},
{
"text": "one",
"start": 15.76,
"end": 16.08,
"confidence": 0.753
},
{
"text": "or",
"start": 16.08,
"end": 16.22,
"confidence": 0.662
},
{
"text": "two",
"start": 16.22,
"end": 16.36,
"confidence": 0.594
},
{
"text": "on",
"start": 16.36,
"end": 16.58,
"confidence": 0.877
},
{
"text": "the",
"start": 16.58,
"end": 16.82,
"confidence": 0.807
},
{
"text": "helmet.",
"start": 16.82,
"end": 17.32,
"confidence": 0.65
},
{
"text": "We're",
"start": 17.58,
"end": 17.78,
"confidence": 0.566
},
{
"text": "going",
"start": 17.78,
"end": 17.94,
"confidence": 0.493
},
{
"text": "to",
"start": 17.94,
"end": 18.06,
"confidence": 0.896
},
{
"text": "have",
"start": 18.06,
"end": 18.22,
"confidence": 0.841
},
{
"text": "B-1.",
"start": 18.22,
"end": 19.12,
"confidence": 0.413
}
]
},
{
"id": 3,
"seek": 0,
"start": 20.08,
"end": 24.9,
"text": " And you can put the other one on Mike's helmet. We're still seeing the bleeper, over.",
"tokens": [
51364,
400,
291,
393,
829,
264,
661,
472,
322,
6602,
311,
15922,
13,
492,
434,
920,
2577,
264,
5408,
595,
260,
11,
670,
13,
51614
],
"temperature": 0.4,
"avg_logprob": -0.6358732926218134,
"compression_ratio": 1.443298969072165,
"no_speech_prob": 0.334128201007843,
"confidence": 0.559,
"words": [
{
"text": "And",
"start": 20.08,
"end": 20.24,
"confidence": 0.834
},
{
"text": "you",
"start": 20.24,
"end": 20.34,
"confidence": 0.882
},
{
"text": "can",
"start": 20.34,
"end": 20.48,
"confidence": 0.476
},
{
"text": "put",
"start": 20.48,
"end": 20.66,
"confidence": 0.891
},
{
"text": "the",
"start": 20.66,
"end": 20.84,
"confidence": 0.816
},
{
"text": "other",
"start": 20.84,
"end": 21.02,
"confidence": 0.777
},
{
"text": "one",
"start": 21.02,
"end": 21.22,
"confidence": 0.778
},
{
"text": "on",
"start": 21.22,
"end": 21.46,
"confidence": 0.905
},
{
"text": "Mike's",
"start": 21.46,
"end": 22.72,
"confidence": 0.852
},
{
"text": "helmet.",
"start": 22.72,
"end": 22.86,
"confidence": 0.793
},
{
"text": "We're",
"start": 23.08,
"end": 23.14,
"confidence": 0.803
},
{
"text": "still",
"start": 23.14,
"end": 23.34,
"confidence": 0.819
},
{
"text": "seeing",
"start": 23.34,
"end": 23.64,
"confidence": 0.682
},
{
"text": "the",
"start": 23.64,
"end": 23.88,
"confidence": 0.103
},
{
"text": "bleeper,",
"start": 23.88,
"end": 24.28,
"confidence": 0.257
},
{
"text": "over.",
"start": 24.56,
"end": 24.9,
"confidence": 0.179
}
]
},
{
"id": 4,
"seek": 3000,
"start": 31.18,
"end": 35.04,
"text": " We got a little bit on the helmet and B1.",
"tokens": [
50364,
220,
4360,
658,
257,
707,
857,
322,
264,
15922,
293,
363,
16,
13,
50714
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.242,
"words": [
{
"text": "We",
"start": 31.18,
"end": 31.62,
"confidence": 0.075
},
{
"text": "got",
"start": 31.62,
"end": 31.84,
"confidence": 0.153
},
{
"text": "a",
"start": 31.84,
"end": 33.32,
"confidence": 0.226
},
{
"text": "little",
"start": 33.32,
"end": 33.54,
"confidence": 0.177
},
{
"text": "bit",
"start": 33.54,
"end": 33.74,
"confidence": 0.645
},
{
"text": "on",
"start": 33.74,
"end": 33.94,
"confidence": 0.438
},
{
"text": "the",
"start": 33.94,
"end": 34.02,
"confidence": 0.108
},
{
"text": "helmet",
"start": 34.02,
"end": 34.22,
"confidence": 0.661
},
{
"text": "and",
"start": 34.22,
"end": 34.46,
"confidence": 0.483
},
{
"text": "B1.",
"start": 34.46,
"end": 35.04,
"confidence": 0.346
}
]
},
{
"id": 5,
"seek": 3000,
"start": 37.56,
"end": 39.26,
"text": " The other one might go under sleep restraint.",
"tokens": [
50714,
440,
661,
472,
1062,
352,
833,
2817,
49281,
13,
50864
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.591,
"words": [
{
"text": "The",
"start": 37.56,
"end": 37.86,
"confidence": 0.529
},
{
"text": "other",
"start": 37.86,
"end": 37.9,
"confidence": 0.754
},
{
"text": "one",
"start": 37.9,
"end": 37.92,
"confidence": 0.693
},
{
"text": "might",
"start": 37.92,
"end": 37.94,
"confidence": 0.719
},
{
"text": "go",
"start": 37.94,
"end": 38.08,
"confidence": 0.717
},
{
"text": "under",
"start": 38.08,
"end": 38.34,
"confidence": 0.529
},
{
"text": "sleep",
"start": 38.34,
"end": 38.76,
"confidence": 0.416
},
{
"text": "restraint.",
"start": 38.76,
"end": 39.26,
"confidence": 0.476
}
]
},
{
"id": 6,
"seek": 3000,
"start": 39.78,
"end": 47.42,
"text": " We got them in their helmet bags and I guess, excuse me, the helmet bags.",
"tokens": [
50864,
492,
658,
552,
294,
641,
15922,
10405,
293,
286,
2041,
11,
8960,
385,
11,
264,
15922,
10405,
13,
51264
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.454,
"words": [
{
"text": "We",
"start": 39.78,
"end": 40.26,
"confidence": 0.752
},
{
"text": "got",
"start": 40.26,
"end": 40.5,
"confidence": 0.406
},
{
"text": "them",
"start": 40.5,
"end": 40.64,
"confidence": 0.503
},
{
"text": "in",
"start": 40.64,
"end": 40.78,
"confidence": 0.877
},
{
"text": "their",
"start": 40.78,
"end": 41.06,
"confidence": 0.695
},
{
"text": "helmet",
"start": 41.06,
"end": 41.58,
"confidence": 0.699
},
{
"text": "bags",
"start": 41.58,
"end": 42.08,
"confidence": 0.43
},
{
"text": "and",
"start": 42.08,
"end": 43.48,
"confidence": 0.264
},
{
"text": "I",
"start": 43.48,
"end": 44.02,
"confidence": 0.371
},
{
"text": "guess,",
"start": 44.02,
"end": 44.24,
"confidence": 0.447
},
{
"text": "excuse",
"start": 44.44,
"end": 45.06,
"confidence": 0.36
},
{
"text": "me,",
"start": 45.06,
"end": 45.3,
"confidence": 0.84
},
{
"text": "the",
"start": 46.14,
"end": 46.16,
"confidence": 0.122
},
{
"text": "helmet",
"start": 46.16,
"end": 46.5,
"confidence": 0.364
},
{
"text": "bags.",
"start": 46.5,
"end": 47.42,
"confidence": 0.435
}
]
},
{
"id": 7,
"seek": 3000,
"start": 48.18,
"end": 49.56,
"text": " The leaflet bags.",
"tokens": [
51264,
440,
476,
64,
69,
2631,
10405,
13,
51364
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.316,
"words": [
{
"text": "The",
"start": 48.18,
"end": 48.46,
"confidence": 0.618
},
{
"text": "leaflet",
"start": 48.46,
"end": 48.82,
"confidence": 0.278
},
{
"text": "bags.",
"start": 48.82,
"end": 49.56,
"confidence": 0.269
}
]
},
{
"id": 8,
"seek": 3000,
"start": 50.06,
"end": 50.5,
"text": " Roger.",
"tokens": [
51364,
17666,
13,
51414
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.538,
"words": [
{
"text": "Roger.",
"start": 50.06,
"end": 50.5,
"confidence": 0.538
}
]
},
{
"id": 9,
"seek": 3000,
"start": 51.38,
"end": 55.38,
"text": " Roger, I'm taking the next A out of the CLS.",
"tokens": [
51414,
17666,
11,
286,
478,
1940,
264,
958,
316,
484,
295,
264,
12855,
50,
13,
51664
],
"temperature": 0.4,
"avg_logprob": -0.8986218935483462,
"compression_ratio": 1.4807692307692308,
"no_speech_prob": 0.7222229242324829,
"confidence": 0.298,
"words": [
{
"text": "Roger,",
"start": 51.38,
"end": 51.86,
"confidence": 0.451
},
{
"text": "I'm",
"start": 51.98,
"end": 52.06,
"confidence": 0.765
},
{
"text": "taking",
"start": 52.06,
"end": 52.28,
"confidence": 0.611
},
{
"text": "the",
"start": 52.28,
"end": 53.84,
"confidence": 0.106
},
{
"text": "next",
"start": 53.84,
"end": 53.94,
"confidence": 0.128
},
{
"text": "A",
"start": 53.94,
"end": 54.1,
"confidence": 0.181
},
{
"text": "out",
"start": 54.1,
"end": 54.26,
"confidence": 0.178
},
{
"text": "of",
"start": 54.26,
"end": 54.38,
"confidence": 0.356
},
{
"text": "the",
"start": 54.38,
"end": 54.6,
"confidence": 0.169
},
{
"text": "CLS.",
"start": 54.6,
"end": 55.38,
"confidence": 0.342
}
]
},
{
"id": 10,
"seek": 5600,
"start": 56.0,
"end": 61.92,
"text": " Okay, we weren't sure that this was a suggestion.",
"tokens": [
50364,
1033,
11,
321,
4999,
380,
988,
300,
341,
390,
257,
16541,
13,
50714
],
"temperature": 0.4,
"avg_logprob": -0.7243160101083609,
"compression_ratio": 1.2635658914728682,
"no_speech_prob": 0.6283825635910034,
"confidence": 0.454,
"words": [
{
"text": "Okay,",
"start": 56.0,
"end": 56.62,
"confidence": 0.097
},
{
"text": "we",
"start": 56.72,
"end": 56.76,
"confidence": 0.236
},
{
"text": "weren't",
"start": 56.76,
"end": 57.16,
"confidence": 0.727
},
{
"text": "sure",
"start": 57.16,
"end": 58.84,
"confidence": 0.633
},
{
"text": "that",
"start": 58.84,
"end": 60.26,
"confidence": 0.436
},
{
"text": "this",
"start": 60.26,
"end": 61.28,
"confidence": 0.557
},
{
"text": "was",
"start": 61.28,
"end": 61.56,
"confidence": 0.599
},
{
"text": "a",
"start": 61.56,
"end": 61.6,
"confidence": 0.71
},
{
"text": "suggestion.",
"start": 61.6,
"end": 61.92,
"confidence": 0.472
}
]
},
{
"id": 11,
"seek": 5600,
"start": 65.2,
"end": 67.82,
"text": " We thought we'd, you could check it out.",
"tokens": [
50714,
492,
1194,
321,
1116,
11,
220,
5616,
727,
1520,
309,
484,
13,
50964
],
"temperature": 0.4,
"avg_logprob": -0.7243160101083609,
"compression_ratio": 1.2635658914728682,
"no_speech_prob": 0.6283825635910034,
"confidence": 0.526,
"words": [
{
"text": "We",
"start": 65.2,
"end": 65.38,
"confidence": 0.848
},
{
"text": "thought",
"start": 65.38,
"end": 65.56,
"confidence": 0.802
},
{
"text": "we'd,",
"start": 65.56,
"end": 65.86,
"confidence": 0.537
},
{
"text": "you",
"start": 66.32,
"end": 67.04,
"confidence": 0.154
},
{
"text": "could",
"start": 67.04,
"end": 67.22,
"confidence": 0.617
},
{
"text": "check",
"start": 67.22,
"end": 67.44,
"confidence": 0.774
},
{
"text": "it",
"start": 67.44,
"end": 67.58,
"confidence": 0.899
},
{
"text": "out.",
"start": 67.58,
"end": 67.82,
"confidence": 0.813
}
]
},
{
"id": 12,
"seek": 5600,
"start": 69.4,
"end": 72.44,
"text": " So I guess whatever you come up with just let us know.",
"tokens": [
50964,
407,
286,
2041,
2035,
291,
808,
493,
365,
445,
718,
505,
458,
13,
51214
],
"temperature": 0.4,
"avg_logprob": -0.7243160101083609,
"compression_ratio": 1.2635658914728682,
"no_speech_prob": 0.6283825635910034,
"confidence": 0.609,
"words": [
{
"text": "So",
"start": 69.4,
"end": 69.46,
"confidence": 0.173
},
{
"text": "I",
"start": 69.46,
"end": 69.84,
"confidence": 0.48
},
{
"text": "guess",
"start": 69.84,
"end": 70.68,
"confidence": 0.887
},
{
"text": "whatever",
"start": 70.68,
"end": 71.12,
"confidence": 0.433
},
{
"text": "you",
"start": 71.12,
"end": 71.26,
"confidence": 0.882
},
{
"text": "come",
"start": 71.26,
"end": 71.42,
"confidence": 0.646
},
{
"text": "up",
"start": 71.42,
"end": 71.62,
"confidence": 0.893
},
{
"text": "with",
"start": 71.62,
"end": 71.8,
"confidence": 0.808
},
{
"text": "just",
"start": 71.8,
"end": 71.98,
"confidence": 0.309
},
{
"text": "let",
"start": 71.98,
"end": 72.12,
"confidence": 0.901
},
{
"text": "us",
"start": 72.12,
"end": 72.26,
"confidence": 0.899
},
{
"text": "know.",
"start": 72.26,
"end": 72.44,
"confidence": 0.789
}
]
},
{
"id": 13,
"seek": 5600,
"start": 74.12,
"end": 75.11,
"text": " Okay, no problem.",
"tokens": [
51214,
1033,
11,
572,
1154,
13,
51314
],
"temperature": 0.4,
"avg_logprob": -0.7243160101083609,
"compression_ratio": 1.2635658914728682,
"no_speech_prob": 0.6283825635910034,
"confidence": 0.678,
"words": [
{
"text": "Okay,",
"start": 74.12,
"end": 74.5,
"confidence": 0.676
},
{
"text": "no",
"start": 74.6,
"end": 74.8,
"confidence": 0.591
},
{
"text": "problem.",
"start": 74.8,
"end": 75.11,
"confidence": 0.78
}
]
},
{
"id": 14,
"seek": 7500,
"start": 75.11,
"end": 78.48,
"text": " Okay, no problem, we'll let you know where the end of the line is.",
"tokens": [
50364,
1033,
11,
572,
1154,
11,
321,
603,
718,
291,
458,
689,
264,
917,
295,
264,
1622,
307,
13,
50564
],
"temperature": 0.4,
"avg_logprob": -0.5957319622948056,
"compression_ratio": 0.9565217391304348,
"no_speech_prob": 0.16674424707889557,
"confidence": 0.57,
"words": [
{
"text": "Okay,",
"start": 75.11,
"end": 75.84,
"confidence": 0.243
},
{
"text": "no",
"start": 76.0,
"end": 76.2,
"confidence": 0.336
},
{
"text": "problem,",
"start": 76.2,
"end": 76.54,
"confidence": 0.687
},
{
"text": "we'll",
"start": 76.64,
"end": 76.82,
"confidence": 0.753
},
{
"text": "let",
"start": 76.82,
"end": 76.98,
"confidence": 0.87
},
{
"text": "you",
"start": 76.98,
"end": 77.06,
"confidence": 0.667
},
{
"text": "know",
"start": 77.06,
"end": 77.2,
"confidence": 0.828
},
{
"text": "where",
"start": 77.2,
"end": 77.32,
"confidence": 0.261
},
{
"text": "the",
"start": 77.32,
"end": 77.42,
"confidence": 0.614
},
{
"text": "end",
"start": 77.42,
"end": 77.54,
"confidence": 0.593
},
{
"text": "of",
"start": 77.54,
"end": 77.82,
"confidence": 0.814
},
{
"text": "the",
"start": 77.82,
"end": 78.08,
"confidence": 0.26
},
{
"text": "line",
"start": 78.08,
"end": 78.3,
"confidence": 0.838
},
{
"text": "is.",
"start": 78.3,
"end": 78.48,
"confidence": 0.852
}
]
}
],
"language": "en"
}