szk1ck commited on
Commit
4543aa3
·
1 Parent(s): 728d2ce

スクレイピング機能の追加

Browse files
Files changed (1) hide show
  1. app.py +51 -10
app.py CHANGED
@@ -2,12 +2,12 @@ from janome.tokenizer import Tokenizer
2
  from wordcloud import WordCloud
3
  import gradio as gr
4
 
5
- def generate_cloud(text, width, height):
6
-
7
- # default setting
8
- width = width if width is not None else 1024
9
- height = height if height is not None else 768
10
 
 
 
11
  t = Tokenizer()
12
 
13
  tokens = t.tokenize(text)
@@ -24,13 +24,54 @@ def generate_cloud(text, width, height):
24
  return wc.to_array()
25
 
26
 
27
- gr.Interface(
28
- fn=generate_cloud,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  inputs=[
30
  gr.Textbox(label="入力テキスト"),
31
- gr.Number(label="横幅(デフォルト値:1024)"),
32
- gr.Number(label="高さ(デフォルト値:768)")
 
 
 
 
 
 
 
 
 
 
 
 
33
  ],
34
  outputs=gr.Image(type="pil"),
35
  title="☁️にほんご わーどくらうど☁"
36
- ).launch()
 
 
 
 
 
 
2
  from wordcloud import WordCloud
3
  import gradio as gr
4
 
5
+ from urllib.request import urlopen
6
+ from bs4 import BeautifulSoup
7
+
 
 
8
 
9
+
10
+ def generate_cloud(text, width, height):
11
  t = Tokenizer()
12
 
13
  tokens = t.tokenize(text)
 
24
  return wc.to_array()
25
 
26
 
27
+ def generate_cloud_from_text(text, width, height):
28
+
29
+ # default setting
30
+ width = width if width is not None else 1024
31
+ height = height if height is not None else 768
32
+
33
+ return generate_cloud(text, width, height)
34
+
35
+
36
+ def generate_cloud_from_url(url, width, height):
37
+
38
+ # URLからHTMLを取得
39
+ html = urlopen(url)
40
+
41
+ # HTMLからbodyのテキストを抽出
42
+ soup = BeautifulSoup(html, "html.parser")
43
+ body_text = soup.body.get_text()
44
+ # default setting
45
+ width = width if width is not None else 1024
46
+ height = height if height is not None else 768
47
+
48
+ return generate_cloud(body_text, width, height)
49
+
50
+
51
+ from_text = gr.Interface(
52
+ fn=generate_cloud_from_text,
53
  inputs=[
54
  gr.Textbox(label="入力テキスト"),
55
+ gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
56
+ gr.Number(value=768, label="高さ(デフォルト値:768)")
57
+ ],
58
+ outputs=gr.Image(type="pil"),
59
+ title="☁️にほんご わーどくらうど☁"
60
+ )
61
+
62
+
63
+ from_url = gr.Interface(
64
+ fn=generate_cloud_from_url,
65
+ inputs=[
66
+ gr.Textbox(label="URL"),
67
+ gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
68
+ gr.Number(value=768, label="高さ(デフォルト値:768)")
69
  ],
70
  outputs=gr.Image(type="pil"),
71
  title="☁️にほんご わーどくらうど☁"
72
+ )
73
+
74
+
75
+ demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"])
76
+
77
+ demo.launch()