Spaces:
Running
Running
Create jobs.py
Browse files
jobs.py
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import time
|
3 |
+
import re
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from requests.adapters import HTTPAdapter
|
7 |
+
from requests.packages.urllib3.util.retry import Retry
|
8 |
+
from requests.exceptions import HTTPError
|
9 |
+
|
10 |
+
# CONSTANTS
|
11 |
+
ENDPOINT_ACCESS_TOKEN = "https://entreprise.francetravail.fr/connexion/oauth2/access_token"
|
12 |
+
OFFRES_DEMPLOI_V2_BASE = "https://api.francetravail.io/partenaire/offresdemploi/v2/"
|
13 |
+
REFERENTIEL_ENDPOINT = "{}/referentiel".format(OFFRES_DEMPLOI_V2_BASE)
|
14 |
+
SEARCH_ENDPOINT = "{}/offres/search".format(OFFRES_DEMPLOI_V2_BASE)
|
15 |
+
|
16 |
+
|
17 |
+
class Api:
|
18 |
+
"""
|
19 |
+
Class to authentificate and use the methods of the 'API Offres emploi v2' from Emploi Store (Pole Emploi).
|
20 |
+
"""
|
21 |
+
|
22 |
+
def __init__(self, client_id, client_secret, verbose=False, proxies=None):
|
23 |
+
"""
|
24 |
+
Constructor to authentificate to 'Offres d'emploi v2'. Authentification is done using OAuth client credential grant. 'client_id' and 'client_secret' must be specified.
|
25 |
+
|
26 |
+
Retry mechanisms are implemented in case the user does too many requests (code 429: too many requests) or just because the API might sometimes be unreliable (code 502: bad gateway).
|
27 |
+
|
28 |
+
:param client_id: the client ID
|
29 |
+
:type client_id: str
|
30 |
+
:param client_secret: the client secret
|
31 |
+
:type client_secret: str
|
32 |
+
:param verbose: whether to add verbosity
|
33 |
+
:type verbose: bool
|
34 |
+
:param proxies: (optional) The proxies configuration
|
35 |
+
:type proxies: dict with keys 'http' and/or 'https'
|
36 |
+
:returns: None
|
37 |
+
|
38 |
+
|
39 |
+
:Example 1:
|
40 |
+
|
41 |
+
>>> from offres_demploi import Api
|
42 |
+
>>> client = Api(client_id="<your_client_id>", client_secret="<your_client_secret")
|
43 |
+
|
44 |
+
:Example 2:
|
45 |
+
>>> from offres_demploi import Api
|
46 |
+
>>> proxy = "localhost:3128"
|
47 |
+
>>> proxies = {"http": proxy, "https": proxy}
|
48 |
+
>>> client_id = "<your_client_id>"
|
49 |
+
>>> client_secret = "<your_client_secret"
|
50 |
+
>>> client = Api(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, proxies=proxies)
|
51 |
+
"""
|
52 |
+
self.client_id = client_id
|
53 |
+
self.client_secret = client_secret
|
54 |
+
self.verbose = verbose
|
55 |
+
self.proxies = proxies
|
56 |
+
self.timeout = 60
|
57 |
+
session = requests.Session()
|
58 |
+
retry = Retry(
|
59 |
+
total=3,
|
60 |
+
backoff_factor=1,
|
61 |
+
status_forcelist=(
|
62 |
+
502,
|
63 |
+
429,
|
64 |
+
), # 429 for too many requests and 502 for bad gateway
|
65 |
+
respect_retry_after_header=False,
|
66 |
+
)
|
67 |
+
adapter = HTTPAdapter(max_retries=retry)
|
68 |
+
session.mount("http://", adapter)
|
69 |
+
session.mount("https://", adapter)
|
70 |
+
self.session = session
|
71 |
+
|
72 |
+
def get_token(self):
|
73 |
+
"""
|
74 |
+
Get the token as a class field (for subsequent use).
|
75 |
+
|
76 |
+
:rtype: dict
|
77 |
+
:returns: A token with fields form API + expires_at custom field
|
78 |
+
|
79 |
+
:raises HTTPError: Error when requesting the ressource
|
80 |
+
|
81 |
+
|
82 |
+
"""
|
83 |
+
data = dict(
|
84 |
+
grant_type="client_credentials",
|
85 |
+
client_id=self.client_id,
|
86 |
+
client_secret=self.client_secret,
|
87 |
+
scope="api_offresdemploiv2 o2dsoffre application_{}".format(
|
88 |
+
self.client_id
|
89 |
+
),
|
90 |
+
)
|
91 |
+
headers = {"content-type": "application/x-www-form-urlencoded"}
|
92 |
+
params = dict(realm="/partenaire")
|
93 |
+
current_time = datetime.datetime.today()
|
94 |
+
r = requests.post(
|
95 |
+
url=ENDPOINT_ACCESS_TOKEN,
|
96 |
+
headers=headers,
|
97 |
+
data=data,
|
98 |
+
params=params,
|
99 |
+
timeout=self.timeout,
|
100 |
+
proxies=self.proxies,
|
101 |
+
)
|
102 |
+
try:
|
103 |
+
r.raise_for_status()
|
104 |
+
except HTTPError as error:
|
105 |
+
if r.status_code == 400:
|
106 |
+
complete_message = str(error) + "\n" + str(r.json())
|
107 |
+
raise HTTPError(complete_message)
|
108 |
+
else:
|
109 |
+
raise error
|
110 |
+
else:
|
111 |
+
token = r.json()
|
112 |
+
token["expires_at"] = current_time + datetime.timedelta(
|
113 |
+
seconds=token["expires_in"]
|
114 |
+
)
|
115 |
+
self.token = token
|
116 |
+
return token
|
117 |
+
|
118 |
+
def is_expired(self):
|
119 |
+
"""
|
120 |
+
Test if the broken as expired (based on the 'expires_at' field)
|
121 |
+
|
122 |
+
:rtype: boolean
|
123 |
+
:returns: True if the token has expired, False otherwise
|
124 |
+
|
125 |
+
"""
|
126 |
+
expired = datetime.datetime.today() >= self.token["expires_at"]
|
127 |
+
return expired
|
128 |
+
|
129 |
+
def get_headers(self):
|
130 |
+
"""
|
131 |
+
:rtype: dict
|
132 |
+
:returns: The headers necessary to do requests. Will ask a new token if it has expired since or it has never been requested
|
133 |
+
"""
|
134 |
+
if not hasattr(self, "token"):
|
135 |
+
if self.verbose:
|
136 |
+
print("Token has not been requested yet. Requesting token")
|
137 |
+
self.get_token()
|
138 |
+
elif self.is_expired():
|
139 |
+
if self.verbose:
|
140 |
+
print("Token is expired. Requesting new token")
|
141 |
+
self.get_token()
|
142 |
+
headers = {
|
143 |
+
"Authorization": "Bearer {}".format(self.token["access_token"])
|
144 |
+
}
|
145 |
+
return headers
|
146 |
+
|
147 |
+
def referentiel(self, referentiel):
|
148 |
+
"""
|
149 |
+
Get dictionary of 'referentiel'.
|
150 |
+
'Réferentiel' available: domaine, appellations (domaines professionnelles ROME), metiers, themes, continents,
|
151 |
+
pays, regions, departements , communes , secteursActivites, naturesContrats, typesContrats, niveauxFormations,
|
152 |
+
permis, langues
|
153 |
+
|
154 |
+
Full list available at: https://www.emploi-store-dev.fr/portail-developpeur-cms/home/catalogue-des-api/documentation-des-api/api/api-offres-demploi-v2/referentiels.html
|
155 |
+
|
156 |
+
:param referentiel: The 'referentiel' to look for
|
157 |
+
:type referentiel: str
|
158 |
+
:raises HTTPError: Error when requesting the ressource
|
159 |
+
:rtype: dict
|
160 |
+
:returns: The 'referentiel' with the keys 'code' for the acronyme/abbreviation and 'libelle' for the full name.
|
161 |
+
|
162 |
+
:Example:
|
163 |
+
|
164 |
+
>>> client.referentiel("themes")
|
165 |
+
|
166 |
+
"""
|
167 |
+
referentiel_endpoint = "{}/{}".format(REFERENTIEL_ENDPOINT, referentiel)
|
168 |
+
|
169 |
+
r = self.session.get(
|
170 |
+
url=referentiel_endpoint,
|
171 |
+
headers=self.get_headers(),
|
172 |
+
timeout=self.timeout,
|
173 |
+
proxies=self.proxies,
|
174 |
+
)
|
175 |
+
try:
|
176 |
+
r.raise_for_status()
|
177 |
+
except Exception as e:
|
178 |
+
raise e
|
179 |
+
else:
|
180 |
+
return r.json()
|
181 |
+
|
182 |
+
def search(self, params=None, silent_http_errors=False):
|
183 |
+
"""
|
184 |
+
Make job search based on parameters defined in:
|
185 |
+
https://www.emploi-store-dev.fr/portail-developpeur-cms/home/catalogue-des-api/documentation-des-api/api/api-offres-demploi-v2/rechercher-par-criteres.html
|
186 |
+
|
187 |
+
:param params: The parameters of the search request
|
188 |
+
:type param: dict
|
189 |
+
:param silent_http_errors: Silent HTTP errors if True, raise error otherwise. Default is False
|
190 |
+
:type silent_http_errors: bool
|
191 |
+
|
192 |
+
:raises HTTPError: Error when requesting the ressource
|
193 |
+
|
194 |
+
:rtype: dict
|
195 |
+
:returns: A dictionary with three fields:
|
196 |
+
- 'filtresPossibles', that display the aggregates output
|
197 |
+
- 'resultats': that is the job offers
|
198 |
+
- 'Content-Range': the current range index ('first_index' and 'last_index') and the maximum result index ('max_results')
|
199 |
+
|
200 |
+
|
201 |
+
:Example:
|
202 |
+
>>> params = {}
|
203 |
+
>>> params.update({"MotsCles": "Ouvrier"})
|
204 |
+
>>> params.update({"minCreationDate": "2020-01-01T00:00:00Z"})
|
205 |
+
>>> client.search(params=params)
|
206 |
+
"""
|
207 |
+
if self.verbose:
|
208 |
+
print('Making request with params {}'.format(params))
|
209 |
+
r = self.session.get(
|
210 |
+
url=SEARCH_ENDPOINT,
|
211 |
+
params=params,
|
212 |
+
headers=self.get_headers(),
|
213 |
+
timeout=self.timeout,
|
214 |
+
proxies=self.proxies,
|
215 |
+
)
|
216 |
+
|
217 |
+
try:
|
218 |
+
r.raise_for_status()
|
219 |
+
except HTTPError as error:
|
220 |
+
if r.status_code == 400:
|
221 |
+
complete_message = str(error) + "\n" + r.json()["message"]
|
222 |
+
if silent_http_errors:
|
223 |
+
print(complete_message)
|
224 |
+
else:
|
225 |
+
raise HTTPError(complete_message)
|
226 |
+
else:
|
227 |
+
if silent_http_errors:
|
228 |
+
print(str(error))
|
229 |
+
else:
|
230 |
+
raise error
|
231 |
+
else:
|
232 |
+
found_range = re.search(
|
233 |
+
pattern="offres (?P<first_index>\d+)-(?P<last_index>\d+)/(?P<max_results>\d+)",
|
234 |
+
string=r.headers["Content-Range"],
|
235 |
+
).groupdict()
|
236 |
+
out = r.json()
|
237 |
+
out.update({"Content-Range": found_range})
|
238 |
+
return out
|