A-baoYang commited on
Commit
a7cb959
·
1 Parent(s): 9e554da

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from crawler.main import main
4
+ from crawler.config import Config
5
+ import asyncio
6
+
7
+
8
+ ROOT_DIR = Path(__file__).absolute().parents[0]
9
+
10
+
11
+ def crawl(url):
12
+ config = Config(
13
+ url=url,
14
+ match=f"{url}/**",
15
+ selector="body",
16
+ max_pages_to_crawl=100,
17
+ output_file_name=f"{ROOT_DIR}/output.json"
18
+ )
19
+ asyncio.run(main(config))
20
+ return f"{ROOT_DIR}/output.json"
21
+
22
+
23
+ css = """
24
+ #center {text-align: center}
25
+ footer {visibility: hidden}
26
+ a {color: rgb(255, 206, 10) !important}
27
+ """
28
+ with gr.Blocks(css=css) as demo:
29
+ user_identify = gr.Textbox(visible=False)
30
+
31
+ gr.Markdown("# AutoCrawler", elem_id="center")
32
+ gr.Markdown("Made by `LaplaceAI`", elem_id="center")
33
+ gr.Markdown("---")
34
+
35
+ input_url = gr.Textbox(
36
+ placeholder="Enter your website...",
37
+ label="URL to crawl",
38
+ interactive=True,
39
+ visible=True,
40
+ )
41
+ crawl_btn = gr.Button("Start Crawl!")
42
+ # status = gr.Textbox(
43
+ # label="Crawler Status",
44
+ # interactive=False,
45
+ # )
46
+ output_file = gr.File()
47
+
48
+ crawl_btn.click(
49
+ inputs=input_url, fn=crawl, outputs=output_file
50
+ )
51
+
52
+
53
+
54
+ if __name__ == "__main__":
55
+ demo.queue(concurrency_count=3)
56
+ demo.launch()