codewithdark commited on
Commit
320f388
·
verified ·
1 Parent(s): 72cdd0e

Upload utility%2Fcaptions%20%2Ftimed_captions_generator.py

Browse files
utility/utility%2Fcaptions%20%2Ftimed_captions_generator.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper_timestamped as whisper
2
+ from whisper_timestamped import load_model, transcribe_timestamped
3
+ import re
4
+
5
+ def generate_timed_captions(audio_filename,model_size="base"):
6
+ WHISPER_MODEL = load_model(model_size)
7
+
8
+ gen = transcribe_timestamped(WHISPER_MODEL, audio_filename, verbose=False, fp16=False)
9
+
10
+ return getCaptionsWithTime(gen)
11
+
12
+ def splitWordsBySize(words, maxCaptionSize):
13
+
14
+ halfCaptionSize = maxCaptionSize / 2
15
+ captions = []
16
+ while words:
17
+ caption = words[0]
18
+ words = words[1:]
19
+ while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
20
+ caption += ' ' + words[0]
21
+ words = words[1:]
22
+ if len(caption) >= halfCaptionSize and words:
23
+ break
24
+ captions.append(caption)
25
+ return captions
26
+
27
+ def getTimestampMapping(whisper_analysis):
28
+
29
+ index = 0
30
+ locationToTimestamp = {}
31
+ for segment in whisper_analysis['segments']:
32
+ for word in segment['words']:
33
+ newIndex = index + len(word['text'])+1
34
+ locationToTimestamp[(index, newIndex)] = word['end']
35
+ index = newIndex
36
+ return locationToTimestamp
37
+
38
+ def cleanWord(word):
39
+
40
+ return re.sub(r'[^\w\s\-_"\'\']', '', word)
41
+
42
+ def interpolateTimeFromDict(word_position, d):
43
+
44
+ for key, value in d.items():
45
+ if key[0] <= word_position <= key[1]:
46
+ return value
47
+ return None
48
+
49
+ def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
50
+
51
+ wordLocationToTime = getTimestampMapping(whisper_analysis)
52
+ position = 0
53
+ start_time = 0
54
+ CaptionsPairs = []
55
+ text = whisper_analysis['text']
56
+
57
+ if considerPunctuation:
58
+ sentences = re.split(r'(?<=[.!?]) +', text)
59
+ words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
60
+ else:
61
+ words = text.split()
62
+ words = [cleanWord(word) for word in splitWordsBySize(words, maxCaptionSize)]
63
+
64
+ for word in words:
65
+ position += len(word) + 1
66
+ end_time = interpolateTimeFromDict(position, wordLocationToTime)
67
+ if end_time and word:
68
+ CaptionsPairs.append(((start_time, end_time), word))
69
+ start_time = end_time
70
+
71
+ return CaptionsPairs