Spaces:
Sleeping
Sleeping
File size: 1,204 Bytes
cb22296 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
def scrape_chap_appearances(df, start_chap = 1, end_chap =5000, continue_last = True):
if df.empty == True:
curr_chapts = []
else:
if continue_last:
curr_chapts = df['Chapter'].tolist()
else: curr_chapts = []
for i in range(start_chap, end_chap):
if i in curr_chapts:
continue
else:
if i % 100 == 0:
print (i)
# char_list = []
URL = f'https://onepiece.fandom.com/wiki/Chapter_{i}'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_='CharTable')
for elem in table.find_all('li'):
# char_list.append(elem.text)
df = df.append({'Chapter': int(i), 'Character': elem.text}, ignore_index=True)
return df
# appearance_dict[i] = char_list
# if __name__ == '__main__':
# df = pd.read_csv("data/onedash_chap_appearance.csv")
# newdf = scrape_chap_appearances(df = df, end_chap = 1006)
# newdf.to_csv("data/onedash_chap_appearance.csv", index=False) |