artificialguybr commited on
Commit
01c4960
·
verified ·
1 Parent(s): 60774d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -58,26 +58,27 @@ def process_video(Video, target_language):
58
  with open(transcript_file, "w+", encoding="utf-8") as f:
59
  counter = 1
60
  for line in transcript_with_timestamps:
61
- # Extract timestamp and word from the line
62
- timestamp, word = re.match(r"\[(.*?)s -> (.*?)s\] (.*)", line).groups()
63
- start_time, end_time = timestamp.split(' -> ')
 
64
 
65
- # Convert timestamps to SRT format
66
- formatted_start = str(datetime.timedelta(seconds=float(start_time)))
67
- formatted_end = str(datetime.timedelta(seconds=float(end_time)))
68
-
69
- # Write to SRT file
70
- f.write(f"{counter}\n")
71
- f.write(f"{formatted_start} --> {formatted_end}\n")
72
- f.write(f"{word}\n\n")
73
- counter += 1
74
-
75
-
76
 
 
 
 
 
 
 
 
 
77
  # Move the file pointer to the beginning of the file.
78
  f.seek(0)
79
 
80
- # Translating the SRT from Whisper with NLLB.
81
  target_language_code = google_lang_codes.get(target_language, "en")
82
  paragraph = ""
83
  for line in f:
 
58
  with open(transcript_file, "w+", encoding="utf-8") as f:
59
  counter = 1
60
  for line in transcript_with_timestamps:
61
+ # Use a regular expression to extract timestamp and word from the line
62
+ match = re.match(r"\[(.*?)s -> (.*?)s\] (.*)", line)
63
+ if match:
64
+ start_time, end_time, word = match.groups()
65
 
66
+ # Convert timestamps to SRT format
67
+ formatted_start = str(datetime.timedelta(seconds=float(start_time)))
68
+ formatted_end = str(datetime.timedelta(seconds=float(end_time)))
 
 
 
 
 
 
 
 
69
 
70
+ # Write to SRT file
71
+ f.write(f"{counter}\n")
72
+ f.write(f"{formatted_start} --> {formatted_end}\n")
73
+ f.write(f"{word}\n\n")
74
+ counter += 1
75
+ else:
76
+ # Handle the case where the line does not match the pattern
77
+ print(f"Line does not match expected format: {line}")
78
  # Move the file pointer to the beginning of the file.
79
  f.seek(0)
80
 
81
+ # Translating the SRT from Whisper with Google Translate.
82
  target_language_code = google_lang_codes.get(target_language, "en")
83
  paragraph = ""
84
  for line in f: