Spaces:
Sleeping
Sleeping
LittleLirow
commited on
Commit
•
600bbf7
1
Parent(s):
c36acea
Add subtitles file
Browse files- subtitles.py +48 -0
subtitles.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from rev_ai import apiclient
|
4 |
+
|
5 |
+
def timestamp2frame(ts, fps=10):
|
6 |
+
ts = [int(x) for x in ts]
|
7 |
+
frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps
|
8 |
+
return str(int(frame))
|
9 |
+
|
10 |
+
def audio2subtitle(rev_ai_token, fps=10):
|
11 |
+
speech_file_path = "audio_out.mp3"
|
12 |
+
client = apiclient.RevAiAPIClient(rev_ai_token)
|
13 |
+
job = client.submit_job_local_file(speech_file_path)
|
14 |
+
job_details = client.get_job_details(job.id)
|
15 |
+
|
16 |
+
while str(job_details.status) != "JobStatus.TRANSCRIBED":
|
17 |
+
job_details = client.get_job_details(job.id)
|
18 |
+
transcript_srt = client.get_captions(job.id)
|
19 |
+
|
20 |
+
with open("audio_out.srt", "w") as f:
|
21 |
+
f.write(transcript_srt)
|
22 |
+
f.close()
|
23 |
+
|
24 |
+
timestamp_re = r"(\d+:\d+:\d+,\d+)"
|
25 |
+
string_re = r"^[a-zA-Z][^>]+$"
|
26 |
+
last_num_re = r"\d+"
|
27 |
+
|
28 |
+
deforum_str = ""
|
29 |
+
|
30 |
+
with open("audio_out.srt", "r") as f:
|
31 |
+
for line in f:
|
32 |
+
timestamps = re.findall(timestamp_re, line)
|
33 |
+
if timestamps:
|
34 |
+
timestamp = re.split(":|,", timestamps[0])
|
35 |
+
deforum_str += timestamp2frame(timestamp)
|
36 |
+
deforum_str += ": "
|
37 |
+
strings = re.findall(string_re, line)
|
38 |
+
if strings:
|
39 |
+
deforum_str += line
|
40 |
+
deforum_str += " | "
|
41 |
+
f.close()
|
42 |
+
|
43 |
+
deforum_str = deforum_str.replace("\n", "").strip()
|
44 |
+
deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str)
|
45 |
+
deforum_str = deforum_str[:-1]
|
46 |
+
last_timestamp = re.findall(last_num_re, deforum_str)[-1]
|
47 |
+
|
48 |
+
return deforum_str, int(last_timestamp)
|