akshatsanghvi commited on
Commit
e06bf99
·
1 Parent(s): 5bc4016

Create data_loader.py

Browse files
Files changed (1) hide show
  1. utils/data_loader.py +28 -0
utils/data_loader.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from glob import glob
3
+
4
+ def load_subs(path):
5
+ path = path + "/*.srt"
6
+ subs = glob(path)
7
+ episodes = []
8
+ scripts = []
9
+
10
+ for sub in subs:
11
+ with open(sub, "r", encoding="utf-8") as f:
12
+ lines = f.readlines()
13
+ cnt = 0
14
+ con = []
15
+ for line in lines:
16
+ line = line.strip()
17
+ if line.isnumeric() or "-->" in line:
18
+ cnt += 1
19
+ else:
20
+ con.append(line)
21
+
22
+ script = " ".join(con)
23
+ epno = int(sub.split("-")[1].strip()[-1])
24
+ episodes.append(epno)
25
+ scripts.append(script)
26
+
27
+ df = pd.DataFrame({"episode": episodes, "script": scripts})
28
+ return df