Spaces:
Sleeping
Sleeping
import os | |
import re | |
from rev_ai import apiclient | |
def timestamp2frame(ts, fps=10): | |
ts = [int(x) for x in ts] | |
frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps | |
return str(int(frame)) | |
def audio2subtitle(rev_ai_token, fps=10): | |
speech_file_path = "audio_out.mp3" | |
client = apiclient.RevAiAPIClient(rev_ai_token) | |
job = client.submit_job_local_file(speech_file_path) | |
job_details = client.get_job_details(job.id) | |
while str(job_details.status) != "JobStatus.TRANSCRIBED": | |
job_details = client.get_job_details(job.id) | |
transcript_srt = client.get_captions(job.id) | |
with open("audio_out.srt", "w") as f: | |
f.write(transcript_srt) | |
f.close() | |
timestamp_re = r"(\d+:\d+:\d+,\d+)" | |
string_re = r"^[a-zA-Z][^>]+$" | |
last_num_re = r"\d+" | |
deforum_str = "" | |
with open("audio_out.srt", "r") as f: | |
for line in f: | |
timestamps = re.findall(timestamp_re, line) | |
if timestamps: | |
timestamp = re.split(":|,", timestamps[0]) | |
deforum_str += timestamp2frame(timestamp) | |
deforum_str += ": " | |
strings = re.findall(string_re, line) | |
if strings: | |
deforum_str += line | |
deforum_str += " | " | |
f.close() | |
deforum_str = deforum_str.replace("\n", "").strip() | |
deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str) | |
deforum_str = deforum_str[:-1] | |
last_timestamp = re.findall(last_num_re, deforum_str)[-1] | |
return deforum_str, int(last_timestamp) |