Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -102,7 +102,7 @@ def main():
|
|
102 |
st.title("Website Crawler")
|
103 |
|
104 |
domains = st.text_area("Enter the website URLs (one per line):", value="")
|
105 |
-
page_count = st.number_input("Enter the maximum number of pages to crawl:", value=
|
106 |
col1, col2 = st.columns(2)
|
107 |
with col1:
|
108 |
concurrent_requests = st.number_input("Enter the number of concurrent requests per domain:", value=8, min_value=1, step=1)
|
@@ -111,7 +111,7 @@ def main():
|
|
111 |
|
112 |
col1, col2 = st.columns(2)
|
113 |
with col1:
|
114 |
-
exclude_url_regex_input = st.text_area("Enter exclude URL regex patterns (one per line):", value="cdn")
|
115 |
with col2:
|
116 |
domain_filter_regex_input = st.text_area("Filter our unique domains with regex (one per line):", value="instagram\nfacebook\ntwitter\nlinkedin\nsnapchat\ntiktok\nreddit\npinterest\namazon\ncdn\nyoutube\nyoutu.be")
|
117 |
|
|
|
102 |
st.title("Website Crawler")
|
103 |
|
104 |
domains = st.text_area("Enter the website URLs (one per line):", value="")
|
105 |
+
page_count = st.number_input("Enter the maximum number of pages to crawl:", value=5000, min_value=1, step=1)
|
106 |
col1, col2 = st.columns(2)
|
107 |
with col1:
|
108 |
concurrent_requests = st.number_input("Enter the number of concurrent requests per domain:", value=8, min_value=1, step=1)
|
|
|
111 |
|
112 |
col1, col2 = st.columns(2)
|
113 |
with col1:
|
114 |
+
exclude_url_regex_input = st.text_area("Enter exclude URL regex patterns (one per line):", value="cdn\nwp-content")
|
115 |
with col2:
|
116 |
domain_filter_regex_input = st.text_area("Filter our unique domains with regex (one per line):", value="instagram\nfacebook\ntwitter\nlinkedin\nsnapchat\ntiktok\nreddit\npinterest\namazon\ncdn\nyoutube\nyoutu.be")
|
117 |
|