Spaces:
Sleeping
Sleeping
A-baoYang
commited on
Commit
·
a7cb959
1
Parent(s):
9e554da
Add app.py
Browse files
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
from crawler.main import main
|
4 |
+
from crawler.config import Config
|
5 |
+
import asyncio
|
6 |
+
|
7 |
+
|
8 |
+
ROOT_DIR = Path(__file__).absolute().parents[0]
|
9 |
+
|
10 |
+
|
11 |
+
def crawl(url):
|
12 |
+
config = Config(
|
13 |
+
url=url,
|
14 |
+
match=f"{url}/**",
|
15 |
+
selector="body",
|
16 |
+
max_pages_to_crawl=100,
|
17 |
+
output_file_name=f"{ROOT_DIR}/output.json"
|
18 |
+
)
|
19 |
+
asyncio.run(main(config))
|
20 |
+
return f"{ROOT_DIR}/output.json"
|
21 |
+
|
22 |
+
|
23 |
+
css = """
|
24 |
+
#center {text-align: center}
|
25 |
+
footer {visibility: hidden}
|
26 |
+
a {color: rgb(255, 206, 10) !important}
|
27 |
+
"""
|
28 |
+
with gr.Blocks(css=css) as demo:
|
29 |
+
user_identify = gr.Textbox(visible=False)
|
30 |
+
|
31 |
+
gr.Markdown("# AutoCrawler", elem_id="center")
|
32 |
+
gr.Markdown("Made by `LaplaceAI`", elem_id="center")
|
33 |
+
gr.Markdown("---")
|
34 |
+
|
35 |
+
input_url = gr.Textbox(
|
36 |
+
placeholder="Enter your website...",
|
37 |
+
label="URL to crawl",
|
38 |
+
interactive=True,
|
39 |
+
visible=True,
|
40 |
+
)
|
41 |
+
crawl_btn = gr.Button("Start Crawl!")
|
42 |
+
# status = gr.Textbox(
|
43 |
+
# label="Crawler Status",
|
44 |
+
# interactive=False,
|
45 |
+
# )
|
46 |
+
output_file = gr.File()
|
47 |
+
|
48 |
+
crawl_btn.click(
|
49 |
+
inputs=input_url, fn=crawl, outputs=output_file
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
if __name__ == "__main__":
|
55 |
+
demo.queue(concurrency_count=3)
|
56 |
+
demo.launch()
|