Germano Cavalcante commited on
Commit
f8ca02c
·
1 Parent(s): af0d9b7

Add crawl delay for requests

Browse files

Ref. https://projects.blender.org/Alaska/needs-information-reports/src/branch/main/needs_info_from_users.py

Files changed (1) hide show
  1. routers/utils_gitea.py +24 -0
routers/utils_gitea.py CHANGED
@@ -1,12 +1,35 @@
1
  # utils_gitea.py
2
 
3
  import json
 
4
  import urllib.error
5
  import urllib.parse
6
  import urllib.request
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
 
9
  BASE_API_URL = "https://projects.blender.org/api/v1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def url_json_get(url, data=None):
@@ -18,6 +41,7 @@ def url_json_get(url, data=None):
18
  else:
19
  request = urllib.request.Request(url)
20
 
 
21
  response = urllib.request.urlopen(request)
22
  response_data = json.loads(response.read())
23
  return response_data
 
1
  # utils_gitea.py
2
 
3
  import json
4
+ import time
5
  import urllib.error
6
  import urllib.parse
7
  import urllib.request
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
 
10
  BASE_API_URL = "https://projects.blender.org/api/v1"
11
+ CRAWL_DELAY = 2
12
+ last_request_time = None
13
+ # Conform to Blenders crawl delay request:
14
+ # https://projects.blender.org/robots.txt
15
+ try:
16
+ projects = urllib.robotparser.RobotFileParser(
17
+ url="https://projects.blender.org/robots.txt")
18
+ projects.read()
19
+ projects_crawl_delay = projects.crawl_delay("*")
20
+ if projects_crawl_delay is not None:
21
+ CRAWL_DELAY = projects_crawl_delay
22
+ except:
23
+ pass
24
+
25
+
26
+ def wait_for_crawl_delay() -> None:
27
+ global last_request_time
28
+
29
+ # Respect the crawl delay
30
+ if last_request_time is not None:
31
+ time.sleep(max(CRAWL_DELAY - (time.time() - last_request_time), 0))
32
+ last_request_time = time.time()
33
 
34
 
35
  def url_json_get(url, data=None):
 
41
  else:
42
  request = urllib.request.Request(url)
43
 
44
+ wait_for_crawl_delay()
45
  response = urllib.request.urlopen(request)
46
  response_data = json.loads(response.read())
47
  return response_data