File size: 756 Bytes
e06bf99
 
 
 
 
 
 
 
 
 
 
 
 
 
5ba7c45
e06bf99
5ba7c45
e06bf99
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
from glob import glob

def load_subs(path):
    path = path + "/*.srt"
    subs = glob(path)
    episodes = []
    scripts = []

    for sub in subs:
        with open(sub, "r", encoding="utf-8") as f:
            lines = f.readlines()
            con = []
            for line in lines:
                line = line.strip().replace("Sync", "").replace("vNaru", "")
                if line.isnumeric() or "-->" in line:
                    continue
                else:
                    con.append(line)
        
        script = " ".join(con)
        epno = int(sub.split("-")[1].strip()[-1])
        episodes.append(epno)
        scripts.append(script)

    df = pd.DataFrame({"episode": episodes, "script": scripts})
    return df