artificialguybr commited on
Commit
6acbc07
·
1 Parent(s): 43c56bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -76,14 +76,18 @@ def process_video(Video, target_language):
76
 
77
  # Translating the SRT from Whisper with NLLB.
78
  flores_code = lang_codes.get(target_language, "eng_Latn")
 
79
  for line in f:
80
  if line.strip().isnumeric() or "-->" in line:
 
 
 
 
 
 
81
  translated_lines.append(line)
82
  elif line.strip() != "":
83
- inputs = tokenizer(line.strip(), return_tensors="pt")
84
- translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
85
- translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
86
- translated_lines.append(translated_text + "\n")
87
  else:
88
  translated_lines.append("\n")
89
 
 
76
 
77
  # Translating the SRT from Whisper with NLLB.
78
  flores_code = lang_codes.get(target_language, "eng_Latn")
79
+ paragraph = ""
80
  for line in f:
81
  if line.strip().isnumeric() or "-->" in line:
82
+ if paragraph:
83
+ inputs = tokenizer(paragraph, return_tensors="pt")
84
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
85
+ translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
86
+ translated_lines.append(translated_text + "\n")
87
+ paragraph = ""
88
  translated_lines.append(line)
89
  elif line.strip() != "":
90
+ paragraph += " " + line.strip()
 
 
 
91
  else:
92
  translated_lines.append("\n")
93