File size: 1,860 Bytes
cb22296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from src.scrape_char_details import scrape_char_details
from src.scraper_chap_appearance import scrape_chap_appearances
from src.scraper_char_list import scrape_char_links
from configparser import ConfigParser, ExtendedInterpolation
import argparse

import pandas as pd
import os.path

def main_scraper(path_to_config): 
    pl_config = ConfigParser(interpolation=ExtendedInterpolation())
    pl_config.read(path_to_config)

    end_chap = pl_config['SCRAPER'].getint('end_chap') + 1
    char_link_fp = pl_config['SCRAPER'].get('char_link_fp')
    chap_appearance_fp = pl_config['SCRAPER'].get('chap_appearance_fp')
    char_details_fp = pl_config['SCRAPER'].get('char_details_fp')

    if pl_config['SCRAPER'].getboolean('char_link'):
        print ("scraping char links")
        char_dict = {}
        scrape_char_links(char_dict, end_chap = end_chap)
        df = pd.DataFrame.from_dict(char_dict, orient = 'index',columns=['Link'])
        df = df.reset_index()
        df.to_csv(char_link_fp, index = False)

    if pl_config['SCRAPER'].getboolean('chap_appearance'):
        print ("scraping char appearance")
        if os.path.exists(chap_appearance_fp):
            df = pd.read_csv(chap_appearance_fp)
        else:
            df = pd.DataFrame()
        newdf = scrape_chap_appearances(df = df, end_chap = end_chap)
        newdf.to_csv(chap_appearance_fp, index=False)

    if pl_config['SCRAPER'].getboolean('char_details'):
        print ("scraping character details")
        char_link_df = pd.read_csv(char_link_fp)
        scrape_char_details(char_link_df, save_file_name = char_details_fp)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--path_to_config', default='cfg/cfg.ini', help='path to config file')
    args = parser.parse_args()
    main_scraper(path_to_config = args.path_to_config)