Spaces:
Sleeping
Sleeping
import pandas as pd | |
from glob import glob | |
def load_subs(path): | |
path = path + "/*.srt" | |
subs = glob(path) | |
episodes = [] | |
scripts = [] | |
for sub in subs: | |
with open(sub, "r", encoding="utf-8") as f: | |
lines = f.readlines() | |
con = [] | |
for line in lines: | |
line = line.strip().replace("Sync", "").replace("vNaru", "") | |
if line.isnumeric() or "-->" in line: | |
continue | |
else: | |
con.append(line) | |
script = " ".join(con) | |
epno = int(sub.split("-")[1].strip()[-1]) | |
episodes.append(epno) | |
scripts.append(script) | |
df = pd.DataFrame({"episode": episodes, "script": scripts}) | |
return df |