minnehwg commited on
Commit
2878127
·
verified ·
1 Parent(s): bb82ac6

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +5 -1
util.py CHANGED
@@ -8,7 +8,6 @@ import torch
8
  import re
9
 
10
 
11
-
12
  def load_model(cp):
13
  tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")
14
  model = AutoModelForSeq2SeqLM.from_pretrained(cp)
@@ -44,6 +43,8 @@ def get_subtitles(video_url):
44
  except Exception as e:
45
  return [], f"An error occurred: {e}"
46
 
 
 
47
 
48
  def restore_punctuation(text):
49
  model = PunctuationModel()
@@ -115,12 +116,15 @@ def post_processing(text):
115
  text = " ".join(sentences)
116
  return text
117
 
 
118
  def display(text):
119
  sentences = re.split(r'(?<=[.!?])\s*', text)
120
  unique_sentences = list(dict.fromkeys(sentences[:-1]))
121
  formatted_sentences = [f"• {sentence}" for sentence in unique_sentences]
122
  return formatted_sentences
123
 
 
 
124
  def pipeline(url, model, tokenizer):
125
  trans, sub = get_subtitles(url)
126
  sub = restore_punctuation(sub)
 
8
  import re
9
 
10
 
 
11
  def load_model(cp):
12
  tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")
13
  model = AutoModelForSeq2SeqLM.from_pretrained(cp)
 
43
  except Exception as e:
44
  return [], f"An error occurred: {e}"
45
 
46
+ from youtube_transcript_api import YouTubeTranscriptApi
47
+
48
 
49
  def restore_punctuation(text):
50
  model = PunctuationModel()
 
116
  text = " ".join(sentences)
117
  return text
118
 
119
+
120
  def display(text):
121
  sentences = re.split(r'(?<=[.!?])\s*', text)
122
  unique_sentences = list(dict.fromkeys(sentences[:-1]))
123
  formatted_sentences = [f"• {sentence}" for sentence in unique_sentences]
124
  return formatted_sentences
125
 
126
+
127
+
128
  def pipeline(url, model, tokenizer):
129
  trans, sub = get_subtitles(url)
130
  sub = restore_punctuation(sub)