A-baoYang commited on
Commit
499a05b
·
1 Parent(s): a7cb959

Add Dockerfile

Browse files
Files changed (3) hide show
  1. dockerfile → Dockerfile +0 -0
  2. app.py +0 -56
  3. requirements.txt +2 -0
dockerfile → Dockerfile RENAMED
File without changes
app.py DELETED
@@ -1,56 +0,0 @@
1
- import gradio as gr
2
- from pathlib import Path
3
- from crawler.main import main
4
- from crawler.config import Config
5
- import asyncio
6
-
7
-
8
- ROOT_DIR = Path(__file__).absolute().parents[0]
9
-
10
-
11
- def crawl(url):
12
- config = Config(
13
- url=url,
14
- match=f"{url}/**",
15
- selector="body",
16
- max_pages_to_crawl=100,
17
- output_file_name=f"{ROOT_DIR}/output.json"
18
- )
19
- asyncio.run(main(config))
20
- return f"{ROOT_DIR}/output.json"
21
-
22
-
23
- css = """
24
- #center {text-align: center}
25
- footer {visibility: hidden}
26
- a {color: rgb(255, 206, 10) !important}
27
- """
28
- with gr.Blocks(css=css) as demo:
29
- user_identify = gr.Textbox(visible=False)
30
-
31
- gr.Markdown("# AutoCrawler", elem_id="center")
32
- gr.Markdown("Made by `LaplaceAI`", elem_id="center")
33
- gr.Markdown("---")
34
-
35
- input_url = gr.Textbox(
36
- placeholder="Enter your website...",
37
- label="URL to crawl",
38
- interactive=True,
39
- visible=True,
40
- )
41
- crawl_btn = gr.Button("Start Crawl!")
42
- # status = gr.Textbox(
43
- # label="Crawler Status",
44
- # interactive=False,
45
- # )
46
- output_file = gr.File()
47
-
48
- crawl_btn.click(
49
- inputs=input_url, fn=crawl, outputs=output_file
50
- )
51
-
52
-
53
-
54
- if __name__ == "__main__":
55
- demo.queue(concurrency_count=3)
56
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ playwright
2
+ asyncio