Spaces:
Sleeping
Sleeping
A-baoYang
commited on
Commit
·
499a05b
1
Parent(s):
a7cb959
Add Dockerfile
Browse files- dockerfile → Dockerfile +0 -0
- app.py +0 -56
- requirements.txt +2 -0
dockerfile → Dockerfile
RENAMED
File without changes
|
app.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from pathlib import Path
|
3 |
-
from crawler.main import main
|
4 |
-
from crawler.config import Config
|
5 |
-
import asyncio
|
6 |
-
|
7 |
-
|
8 |
-
ROOT_DIR = Path(__file__).absolute().parents[0]
|
9 |
-
|
10 |
-
|
11 |
-
def crawl(url):
|
12 |
-
config = Config(
|
13 |
-
url=url,
|
14 |
-
match=f"{url}/**",
|
15 |
-
selector="body",
|
16 |
-
max_pages_to_crawl=100,
|
17 |
-
output_file_name=f"{ROOT_DIR}/output.json"
|
18 |
-
)
|
19 |
-
asyncio.run(main(config))
|
20 |
-
return f"{ROOT_DIR}/output.json"
|
21 |
-
|
22 |
-
|
23 |
-
css = """
|
24 |
-
#center {text-align: center}
|
25 |
-
footer {visibility: hidden}
|
26 |
-
a {color: rgb(255, 206, 10) !important}
|
27 |
-
"""
|
28 |
-
with gr.Blocks(css=css) as demo:
|
29 |
-
user_identify = gr.Textbox(visible=False)
|
30 |
-
|
31 |
-
gr.Markdown("# AutoCrawler", elem_id="center")
|
32 |
-
gr.Markdown("Made by `LaplaceAI`", elem_id="center")
|
33 |
-
gr.Markdown("---")
|
34 |
-
|
35 |
-
input_url = gr.Textbox(
|
36 |
-
placeholder="Enter your website...",
|
37 |
-
label="URL to crawl",
|
38 |
-
interactive=True,
|
39 |
-
visible=True,
|
40 |
-
)
|
41 |
-
crawl_btn = gr.Button("Start Crawl!")
|
42 |
-
# status = gr.Textbox(
|
43 |
-
# label="Crawler Status",
|
44 |
-
# interactive=False,
|
45 |
-
# )
|
46 |
-
output_file = gr.File()
|
47 |
-
|
48 |
-
crawl_btn.click(
|
49 |
-
inputs=input_url, fn=crawl, outputs=output_file
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
if __name__ == "__main__":
|
55 |
-
demo.queue(concurrency_count=3)
|
56 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
playwright
|
2 |
+
asyncio
|