joshuadunlop commited on
Commit
834b992
·
verified ·
1 Parent(s): 488759f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -102,7 +102,7 @@ def main():
102
  st.title("Website Crawler")
103
 
104
  domains = st.text_area("Enter the website URLs (one per line):", value="")
105
- page_count = st.number_input("Enter the maximum number of pages to crawl:", value=100, min_value=1, step=1)
106
  col1, col2 = st.columns(2)
107
  with col1:
108
  concurrent_requests = st.number_input("Enter the number of concurrent requests per domain:", value=8, min_value=1, step=1)
@@ -111,7 +111,7 @@ def main():
111
 
112
  col1, col2 = st.columns(2)
113
  with col1:
114
- exclude_url_regex_input = st.text_area("Enter exclude URL regex patterns (one per line):", value="cdn")
115
  with col2:
116
  domain_filter_regex_input = st.text_area("Filter our unique domains with regex (one per line):", value="instagram\nfacebook\ntwitter\nlinkedin\nsnapchat\ntiktok\nreddit\npinterest\namazon\ncdn\nyoutube\nyoutu.be")
117
 
 
102
  st.title("Website Crawler")
103
 
104
  domains = st.text_area("Enter the website URLs (one per line):", value="")
105
+ page_count = st.number_input("Enter the maximum number of pages to crawl:", value=5000, min_value=1, step=1)
106
  col1, col2 = st.columns(2)
107
  with col1:
108
  concurrent_requests = st.number_input("Enter the number of concurrent requests per domain:", value=8, min_value=1, step=1)
 
111
 
112
  col1, col2 = st.columns(2)
113
  with col1:
114
+ exclude_url_regex_input = st.text_area("Enter exclude URL regex patterns (one per line):", value="cdn\nwp-content")
115
  with col2:
116
  domain_filter_regex_input = st.text_area("Filter our unique domains with regex (one per line):", value="instagram\nfacebook\ntwitter\nlinkedin\nsnapchat\ntiktok\nreddit\npinterest\namazon\ncdn\nyoutube\nyoutu.be")
117