Yoon-gu Hwang
commited on
Commit
·
6cd90ae
1
Parent(s):
2562105
upload files
Browse files- app.py +105 -0
- make_dataset.py +66 -0
- pokemon.json +0 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import docx
|
5 |
+
from docx.oxml.ns import qn
|
6 |
+
from docx import Document
|
7 |
+
from docx.shared import Inches, Pt, Cm, Mm, RGBColor
|
8 |
+
from docx.enum.table import WD_TABLE_ALIGNMENT
|
9 |
+
from docx2pdf import convert
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
with open('pokemon.json', 'r') as f:
|
13 |
+
pokemons = json.load(f)
|
14 |
+
|
15 |
+
GEN_RANGE = {
|
16 |
+
"1세대": [1, 151],
|
17 |
+
"2세대": [152, 251],
|
18 |
+
"3세대": [252, 386],
|
19 |
+
"4세대": [387, 493],
|
20 |
+
"5세대": [494, 649],
|
21 |
+
"6세대": [650, 721],
|
22 |
+
"7세대": [722, 809],
|
23 |
+
"8세대": [810, 905],
|
24 |
+
"9세대": [906, 1017]
|
25 |
+
}
|
26 |
+
|
27 |
+
generation = gr.Dropdown(
|
28 |
+
[f"{k}세대" for k in range(1, 10)], value="1세대", label="포켓몬 세대", info="원하는 포켓몬 세대를 선택하세요."
|
29 |
+
)
|
30 |
+
|
31 |
+
download = gr.File(label="Download a file")
|
32 |
+
text = gr.DataFrame()
|
33 |
+
|
34 |
+
def write_docx(gen):
|
35 |
+
filename = f'포켓몬{gen}.docx'
|
36 |
+
|
37 |
+
document = Document()
|
38 |
+
section = document.sections[0]
|
39 |
+
section.page_height = Mm(297)
|
40 |
+
section.page_width = Mm(210)
|
41 |
+
#changing the page margins
|
42 |
+
margin = 1.27
|
43 |
+
sections = document.sections
|
44 |
+
for section in sections:
|
45 |
+
section.top_margin = Cm(margin)
|
46 |
+
section.bottom_margin = Cm(margin)
|
47 |
+
section.left_margin = Cm(margin)
|
48 |
+
section.right_margin = Cm(margin)
|
49 |
+
document.styles['Normal'].font.name = 'NanumSquareRound'
|
50 |
+
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), 'NanumSquareRound')
|
51 |
+
|
52 |
+
data_dict = []
|
53 |
+
start, end = GEN_RANGE[gen]
|
54 |
+
for k in range(start, end+1):
|
55 |
+
name = pokemons[k-1]['name']
|
56 |
+
number = pokemons[k-1]['number']
|
57 |
+
types = pokemons[k-1]['types']
|
58 |
+
image_path = pokemons[k-1]['image_path']
|
59 |
+
|
60 |
+
data_dict.append(
|
61 |
+
dict(이름=name, No=number, 타입='+'.join(types))
|
62 |
+
)
|
63 |
+
|
64 |
+
df = pd.DataFrame(data_dict)
|
65 |
+
# Document
|
66 |
+
table = document.add_table(rows=4, cols=1)
|
67 |
+
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
68 |
+
table.style = 'Table Grid'
|
69 |
+
|
70 |
+
hdr_cells = table.rows[0].cells
|
71 |
+
hdr_cells[0].text = f"{number}"
|
72 |
+
hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(50)
|
73 |
+
hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
|
74 |
+
|
75 |
+
hdr_cells = table.rows[1].cells
|
76 |
+
p = hdr_cells[0].add_paragraph()
|
77 |
+
p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
|
78 |
+
r = p.add_run()
|
79 |
+
r.add_picture(image_path, width=Cm(14.5), height=Cm(14.5))
|
80 |
+
r.add_break(docx.enum.text.WD_BREAK.LINE)
|
81 |
+
|
82 |
+
hdr_cells = table.rows[3].cells
|
83 |
+
hdr_cells[0].text = f"{name}"
|
84 |
+
hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
|
85 |
+
hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
|
86 |
+
hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
|
87 |
+
|
88 |
+
hdr_cells = table.rows[2].cells
|
89 |
+
hdr_cells[0].text = f"{'+'.join(types)}"
|
90 |
+
hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
|
91 |
+
hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
|
92 |
+
hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
|
93 |
+
|
94 |
+
document.add_page_break()
|
95 |
+
yield df[['No', '이름', '타입']], filename.replace('docx', 'pdf')
|
96 |
+
|
97 |
+
if filename not in os.listdir():
|
98 |
+
document.save(filename)
|
99 |
+
convert(filename)
|
100 |
+
return df, filename.replace('docx', 'pdf')
|
101 |
+
|
102 |
+
demo = gr.Interface(write_docx, generation, [text, download], title="대치동 포켓몬 도감 생성기",
|
103 |
+
description="원하는 포켓몬 세대를 선택하고, 다운로드를 눌러주세요.")
|
104 |
+
demo.queue(concurrency_count=3)
|
105 |
+
demo.launch()
|
make_dataset.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib.request
|
2 |
+
import json
|
3 |
+
import urllib.parse
|
4 |
+
from urllib.parse import urlsplit, quote
|
5 |
+
from urllib.request import Request, urlopen
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
from tqdm import tqdm
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/흥나숭_(포켓몬)'
|
11 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/나몰빼미_(포켓몬)'
|
12 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/도치마론_(포켓몬)'
|
13 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/비크티니_(포켓몬)'
|
14 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/모부기_(포켓몬)'
|
15 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/나무지기_(포켓몬)'
|
16 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/치코리타_(포켓몬)'
|
17 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/토게틱_(포켓몬)'
|
18 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/포푸니_(포켓몬)'
|
19 |
+
url = 'https://pokemon.fandom.com/ko/wiki/이상해씨_(포켓몬)'
|
20 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/레트라_(포켓몬)'
|
21 |
+
# url = 'https://pokemon.fandom.com/ko/wiki/신비록_(포켓몬)'
|
22 |
+
|
23 |
+
url_info = urlsplit(url)
|
24 |
+
encoded_url = f'{url_info.scheme}://{url_info.netloc}{quote(url_info.path)}'
|
25 |
+
|
26 |
+
info = []
|
27 |
+
erros = []
|
28 |
+
target_number = 1017
|
29 |
+
cnt = 0
|
30 |
+
for _ in tqdm(range(target_number+2)):
|
31 |
+
cnt += 1
|
32 |
+
req = Request(encoded_url, headers={'User-Agent': 'Mozilla/5.0'})
|
33 |
+
res = urlopen(req)
|
34 |
+
html = res.read()
|
35 |
+
soup = BeautifulSoup(html, 'html.parser')
|
36 |
+
|
37 |
+
name = soup.find("div", {"class": "name-ko"}).text.strip()
|
38 |
+
number = soup.find("div", {"class": "index"}).text.strip()
|
39 |
+
try:
|
40 |
+
img_url = soup.find("div", {"class":"image rounded"}).find("img")['data-src']
|
41 |
+
filepath = f"images/{number.replace('.', '_')}_{name}.png"
|
42 |
+
urllib.request.urlretrieve(img_url, filepath)
|
43 |
+
except:
|
44 |
+
filepath = None
|
45 |
+
doc_text = '\n'.join([p.text.replace('\n', '').strip() for p in soup.find_all("p")])
|
46 |
+
types = [poke_type['title'].split(' ')[0].strip() for poke_type in soup.select('tbody > tr > td > div')[0].select('span > a')]
|
47 |
+
|
48 |
+
info.append(dict(
|
49 |
+
name=name,
|
50 |
+
number=number,
|
51 |
+
types=types,
|
52 |
+
doc_text=doc_text,
|
53 |
+
image_path=filepath,
|
54 |
+
url=encoded_url
|
55 |
+
))
|
56 |
+
next_monster = soup.find("table").findAll("a")[-1]['href']
|
57 |
+
encoded_url = "https://pokemon.fandom.com" + next_monster
|
58 |
+
if number == f"No.{target_number:04d}":
|
59 |
+
break
|
60 |
+
|
61 |
+
if cnt >= target_number:
|
62 |
+
break
|
63 |
+
|
64 |
+
pd.DataFrame(info).to_csv('pokemon.csv', index=False)
|
65 |
+
with open('pokemon.json', 'w') as f:
|
66 |
+
json.dump(info, f, ensure_ascii=False, indent=4)
|
pokemon.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
python-docx
|
3 |
+
pandas
|