Dryanvfi commited on
Commit
e268015
·
1 Parent(s): 0c4e22a

Update app.py

Browse files

Updated the API call for the certificate inventory to retrieve greater data volumes than offered by the previous API.
Added error handling to skip API calls that may result in errors due to environment configurations.
Also made prompt engineering more dynamic, using dictionaries to avoid errors and determine specifically what to instruct the LLM on and avoid confusion.

Files changed (1) hide show
  1. app.py +178 -102
app.py CHANGED
@@ -78,8 +78,16 @@ def prompt_analyze_questions(prompt, passed_args):
78
 
79
  # Data Retrieval
80
 
 
 
81
  def getData(tlspc_api_key, openai_api_key):
82
- #Get Cert Requests
 
 
 
 
 
 
83
  def getCertRequests():
84
  currentPage = 0
85
  cert_requests_url = "https://api.venafi.cloud/outagedetection/v1/certificaterequestssearch"
@@ -103,38 +111,74 @@ def getData(tlspc_api_key, openai_api_key):
103
  data = response.json()
104
  cert_requests += data['certificateRequests']
105
  return cert_requests
106
- try:
107
- # Store OpenAI API Key
108
- openai.api_key = openai_api_key
109
- # Get Certificate Data
110
- cert_url = "https://api.venafi.cloud/outagedetection/v1/certificates?ownershipTree=false&excludeSupersededInstances=false&limit=10000"
111
 
112
- headers = {
113
- "accept": "application/json",
114
- "tppl-api-key": tlspc_api_key
115
- }
116
-
117
- cert_response = requests.get(cert_url, headers=headers)
118
-
119
- certs_df = pd.json_normalize(cert_response.json()['certificates']).convert_dtypes()
120
- certs_df.rename(columns = {'id':'certificateId'}, inplace = True)
121
- certs_df.drop(['companyId'],axis=1,inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- certs_df['validityStart'] = pd.to_datetime(certs_df['validityStart']).dt.date
124
- certs_df['validityEnd'] = pd.to_datetime(certs_df['validityEnd']).dt.date
 
 
 
 
 
125
 
126
- # Application Data and Formatting
127
- application_url = "https://api.venafi.cloud/outagedetection/v1/applications"
128
 
129
- headers = {
130
- "accept": "application/json",
131
- "tppl-api-key": tlspc_api_key
132
- }
133
 
 
134
  application_response = requests.get(application_url, headers=headers)
135
 
136
  application_df = pd.json_normalize(application_response.json()['applications']).convert_dtypes()
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  application_df_2 = application_df[['id',
139
  'name',
140
  'description',
@@ -150,19 +194,23 @@ def getData(tlspc_api_key, openai_api_key):
150
  record_path = ['ownerIdsAndTypes'],
151
  meta = ['id']).convert_dtypes()
152
 
153
- application_df = pd.merge(application_df_2, application_owners, left_on = 'id', right_on = 'id')
154
- application_df.rename(columns = {'id':'application_id',
155
  'creationDate':'application_creationDate',
156
  'modificationDate':'application_modificationDate'}, inplace = True)
 
 
 
157
 
158
- # User Data
159
- users_url = "https://api.venafi.cloud/v1/users"
160
 
161
- headers = {
162
- "accept": "application/json",
163
- "tppl-api-key": tlspc_api_key
164
- }
165
 
 
166
  users_response = requests.get(users_url, headers=headers)
167
 
168
  users_df = pd.json_normalize(users_response.json()['users']).convert_dtypes()
@@ -171,14 +219,19 @@ def getData(tlspc_api_key, openai_api_key):
171
 
172
  users_df.drop(['companyId'],axis=1,inplace=True)
173
 
174
- # Teams Data
175
- teams_url = "https://api.venafi.cloud/v1/teams"
 
176
 
177
- headers = {
178
- "accept": "application/json",
179
- "tppl-api-key": tlspc_api_key
180
- }
 
 
 
181
 
 
182
  teams_response = requests.get(teams_url, headers=headers)
183
 
184
  teams_df = pd.json_normalize(teams_response.json()['teams']).convert_dtypes()
@@ -186,15 +239,19 @@ def getData(tlspc_api_key, openai_api_key):
186
  'modificationDate':'teams_modificationDate'}, inplace = True)
187
 
188
  teams_df.drop(['companyId'],axis=1,inplace=True)
 
 
 
189
 
190
- # Machines Data
191
- machines_url = "https://api.venafi.cloud/v1/machines"
192
 
193
- headers = {
194
- "accept": "application/json",
195
- "tppl-api-key": tlspc_api_key
196
- }
197
 
 
198
  machines_response = requests.get(machines_url, headers=headers)
199
 
200
  machines_df = pd.json_normalize(machines_response.json()['machines']).convert_dtypes()
@@ -203,15 +260,19 @@ def getData(tlspc_api_key, openai_api_key):
203
  'modificationDate':'machine_modificationDate'}, inplace = True)
204
 
205
  machines_df.drop(['companyId'],axis=1,inplace=True)
 
 
 
206
 
207
- # Machine Identities Data
208
- machine_identities_url = "https://api.venafi.cloud/v1/machineidentities"
209
 
210
- headers = {
211
- "accept": "application/json",
212
- "tppl-api-key": tlspc_api_key
213
- }
214
 
 
215
  machine_identities_response = requests.get(machine_identities_url, headers=headers)
216
 
217
  machine_identities_df = pd.json_normalize(machine_identities_response.json()['machineIdentities']).convert_dtypes().iloc[:,:7]
@@ -221,20 +282,19 @@ def getData(tlspc_api_key, openai_api_key):
221
  'modificationDate':'machine_identities_modificationDate'}, inplace = True)
222
 
223
  machine_identities_df.drop(['companyId'],axis=1,inplace=True)
 
 
 
224
 
225
- cert_requests_json = getCertRequests()
226
- cert_requests_df = pd.json_normalize(cert_requests_json).convert_dtypes()
227
- cert_requests_df.rename(columns = {'id':'cert_request_id', 'creationDate':'cert_request_creationDate'}, inplace = True)
228
- cert_requests_df.drop(['companyId'],axis=1,inplace=True)
229
-
230
- # Issuing Templates
231
- issuing_template_url = "https://api.venafi.cloud/v1/certificateissuingtemplates"
232
 
233
- headers = {
234
- "accept": "application/json",
235
- "tppl-api-key": tlspc_api_key
236
- }
237
 
 
238
  issuing_template_response = requests.get(issuing_template_url, headers=headers)
239
 
240
  issuing_templates_df = pd.json_normalize(issuing_template_response.json()['certificateIssuingTemplates']).convert_dtypes()
@@ -242,47 +302,64 @@ def getData(tlspc_api_key, openai_api_key):
242
  'creationDate':'issuing_template_creationDate'}, inplace = True)
243
 
244
  issuing_templates_df.drop(['companyId'],axis=1,inplace=True)
 
 
 
245
 
246
- # Prompt Engineering
247
-
248
- # Get data structure for each dataframe to be passed in initial prompt
249
- users_data_description = users_df.dtypes.apply(lambda x: x.name).to_dict()
250
- application_data_description = application_df.dtypes.apply(lambda x: x.name).to_dict()
251
- certificate_data_description = certs_df.dtypes.apply(lambda x: x.name).to_dict()
252
- teams_data_description = teams_df.dtypes.apply(lambda x: x.name).to_dict()
253
- machines_data_description = machines_df.dtypes.apply(lambda x: x.name).to_dict()
254
- machine_identities_data_description = machine_identities_df.dtypes.apply(lambda x: x.name).to_dict()
255
- cert_requests_data_description = cert_requests_df.dtypes.apply(lambda x: x.name).to_dict()
256
- issuing_templates_data_description = issuing_templates_df.dtypes.apply(lambda x: x.name).to_dict()
257
-
258
- data_structure_overview = f"""I have multiple python pandas dataframes.
259
- One is named application_df which contains data on applications and has the following structure: {application_data_description}.
260
- Another python pandas dataframe is named users_df and contains user information and has the following structure: {users_data_description}.
261
- Another python pandas dataframe is named certs_df and contains certificate information and has the following structure: {certificate_data_description}.
262
- Another python pandas dataframe is named teams_df and contains teams information and has the following structure: {teams_data_description}.
263
- Another python pandas dataframe is named machines_df and contains machine information and has the following structure: {machines_data_description}.
264
- Another python pandas dataframe is named machine_identities_df and contains machine identity information and has the following structure: {machine_identities_data_description}.
265
- Another python pandas dataframe is named cert_requests_df and contains certificate request information and has the following structure: {cert_requests_data_description}
266
- Another python pandas dataframe is named issuing_templates_df and contains issuing template information and has the following structure: {issuing_templates_data_description}
267
- """
268
-
269
- data_relationships_overview = """The dataframes relate to eachother in the following manner.
270
- The column values in the 'user_id' column in users_df match the column values in the 'ownerId' column in application_df.
271
- The column values in the 'team_id' column in teams_df match the column values in the 'owningTeamId' column in machines_df.
272
- The column values in the 'certificateOwnerUserId' column in cert_requests_df match the column values in the 'user_id' column in users_df.
273
- The column values in the 'certificateIssuingTemplateId' column in cert_requests_df match the column values in the 'issuing_template_id' column in issuing_templates_df.
274
- The column values in the 'certificateOwnerUserId' column in cert_requests_df match the column values in the 'user_id' column in users_df.
275
- The column values in the 'certificateIssuingTemplateId' column in certs_request_df match the column values in the 'issuing_template_id' column in issuing_templates_df.
276
- """
277
- passed_args = {'application_df': application_df, 'users_df': users_df, 'certs_df': certs_df,
278
- 'teams_df': teams_df, 'machines_df': machines_df, 'machine_identities_df': machine_identities_df,
279
- 'cert_requests_df': cert_requests_df, 'issuing_templates_df': issuing_templates_df,
280
- 'data_structure_overview': data_structure_overview,
281
- 'data_relationships_overview': data_relationships_overview}
282
-
283
- return 'Data successfully loaded!', passed_args
284
- except Exception as e:
285
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  # User facing application
288
  with gr.Blocks(theme='aliabid94/new-theme') as demo:
@@ -298,7 +375,6 @@ with gr.Blocks(theme='aliabid94/new-theme') as demo:
298
  To get started, navigate to the API Keys tab. This will connect to your TLS Protect Cloud and OpenAI accounts.
299
 
300
  Signup for TLS Protect Cloud at [venafi.com/signup](https://venafi.com/signup).
301
-
302
  Use of this project either when run locally in your environment or on Hugging Face may introduce risks. Running this project accesses data from your TLS Protect Cloud account. While this project does not store your TLS Protect Cloud data or send that data to OpenAI, the prompts entered are sent to OpenAI for Python code generation.
303
 
304
  As with every opensource project, application, or online service that uses your API keys, you are strongly recommended to rotate your API keys after use.
 
78
 
79
  # Data Retrieval
80
 
81
+ # Some API calls may fail if customers haven't fully configured their environment
82
+
83
  def getData(tlspc_api_key, openai_api_key):
84
+ # Store OpenAI API Key
85
+ openai.api_key = openai_api_key
86
+
87
+ # Create dictionary for adding variables
88
+ dfs = {}
89
+
90
+ # Get Cert Requests
91
  def getCertRequests():
92
  currentPage = 0
93
  cert_requests_url = "https://api.venafi.cloud/outagedetection/v1/certificaterequestssearch"
 
111
  data = response.json()
112
  cert_requests += data['certificateRequests']
113
  return cert_requests
 
 
 
 
 
114
 
115
+ try:
116
+ cert_requests_json = getCertRequests()
117
+ certificate_requests_df = pd.json_normalize(cert_requests_json).convert_dtypes()
118
+ certificate_requests_df.rename(columns = {'id':'cert_request_id', 'creationDate':'cert_request_creationDate'}, inplace = True)
119
+ certificate_requests_df.drop(['companyId'],axis=1,inplace=True)
120
+ dfs['certificate_requests_df'] = certificate_requests_df
121
+ except:
122
+ pass
123
+
124
+ # Certificate inventory
125
+ cert_url = "https://api.venafi.cloud/outagedetection/v1/certificatesearch?ownershipTree=true&excludeSupersededInstances=false"
126
+
127
+ def getCerts():
128
+ currentPage = 0
129
+ payload = {
130
+ "paging":
131
+ { "pageNumber": currentPage }}
132
+ response = requests.post(url=cert_url, headers=headers,json=payload)
133
+ if(response.status_code != 200):
134
+ raise Exception('Error retrieving certificates:' + "\n" + response.text + "\n=============\n")
135
+ data = response.json()
136
+ certs = data['certificates']
137
+ while data['count'] >= 1:
138
+ currentPage += 1
139
+ print('Getting page ' + str(currentPage) + ' - Count:' + str(data['count']))
140
+ payload['paging']['pageNumber'] = currentPage
141
+ response = requests.post(url=cert_url, headers=headers,json=payload)
142
+ data = response.json()
143
+ certs += data['certificates']
144
+ return certs
145
 
146
+ try:
147
+ certs_json = getCerts()
148
+ certificates_df = pd.json_normalize(certs_json).convert_dtypes()
149
+ certificates_df.rename(columns = {'id':'certificateId'}, inplace = True)
150
+ dfs['certificates_df'] = certificates_df
151
+ except Exception:
152
+ pass
153
 
154
+ # Application Data and Formatting
155
+ application_url = "https://api.venafi.cloud/outagedetection/v1/applications"
156
 
157
+ headers = {
158
+ "accept": "application/json",
159
+ "tppl-api-key": tlspc_api_key
160
+ }
161
 
162
+ try:
163
  application_response = requests.get(application_url, headers=headers)
164
 
165
  application_df = pd.json_normalize(application_response.json()['applications']).convert_dtypes()
166
 
167
+ cols = ['id',
168
+ 'name',
169
+ 'description',
170
+ 'fullyQualifiedDomainNames',
171
+ 'ipRanges',
172
+ 'ports',
173
+ 'modificationDate',
174
+ 'creationDate','ownership.owningUsers',
175
+ 'ownership.owningTeams']
176
+
177
+ for c in cols:
178
+ df_cols = application_df.columns
179
+ if c not in df_cols:
180
+ application_df[c] = np.nan
181
+
182
  application_df_2 = application_df[['id',
183
  'name',
184
  'description',
 
194
  record_path = ['ownerIdsAndTypes'],
195
  meta = ['id']).convert_dtypes()
196
 
197
+ applications_df = pd.merge(application_df_2, application_owners, left_on = 'id', right_on = 'id')
198
+ applications_df.rename(columns = {'id':'application_id',
199
  'creationDate':'application_creationDate',
200
  'modificationDate':'application_modificationDate'}, inplace = True)
201
+ dfs['applications_df'] = applications_df
202
+ except Exception:
203
+ pass
204
 
205
+ # User Data
206
+ users_url = "https://api.venafi.cloud/v1/users"
207
 
208
+ headers = {
209
+ "accept": "application/json",
210
+ "tppl-api-key": tlspc_api_key
211
+ }
212
 
213
+ try:
214
  users_response = requests.get(users_url, headers=headers)
215
 
216
  users_df = pd.json_normalize(users_response.json()['users']).convert_dtypes()
 
219
 
220
  users_df.drop(['companyId'],axis=1,inplace=True)
221
 
222
+ dfs['users_df'] = users_df
223
+ except Exception:
224
+ pass
225
 
226
+ # Teams Data
227
+ teams_url = "https://api.venafi.cloud/v1/teams"
228
+
229
+ headers = {
230
+ "accept": "application/json",
231
+ "tppl-api-key": tlspc_api_key
232
+ }
233
 
234
+ try:
235
  teams_response = requests.get(teams_url, headers=headers)
236
 
237
  teams_df = pd.json_normalize(teams_response.json()['teams']).convert_dtypes()
 
239
  'modificationDate':'teams_modificationDate'}, inplace = True)
240
 
241
  teams_df.drop(['companyId'],axis=1,inplace=True)
242
+ dfs['teams_df'] = teams_df
243
+ except Exception:
244
+ pass
245
 
246
+ # Machines Data
247
+ machines_url = "https://api.venafi.cloud/v1/machines"
248
 
249
+ headers = {
250
+ "accept": "application/json",
251
+ "tppl-api-key": tlspc_api_key
252
+ }
253
 
254
+ try:
255
  machines_response = requests.get(machines_url, headers=headers)
256
 
257
  machines_df = pd.json_normalize(machines_response.json()['machines']).convert_dtypes()
 
260
  'modificationDate':'machine_modificationDate'}, inplace = True)
261
 
262
  machines_df.drop(['companyId'],axis=1,inplace=True)
263
+ dfs['machines_df'] = machines_df
264
+ except Exception:
265
+ pass
266
 
267
+ # Machine Identities Data
268
+ machine_identities_url = "https://api.venafi.cloud/v1/machineidentities"
269
 
270
+ headers = {
271
+ "accept": "application/json",
272
+ "tppl-api-key": tlspc_api_key
273
+ }
274
 
275
+ try:
276
  machine_identities_response = requests.get(machine_identities_url, headers=headers)
277
 
278
  machine_identities_df = pd.json_normalize(machine_identities_response.json()['machineIdentities']).convert_dtypes().iloc[:,:7]
 
282
  'modificationDate':'machine_identities_modificationDate'}, inplace = True)
283
 
284
  machine_identities_df.drop(['companyId'],axis=1,inplace=True)
285
+ dfs['machine_identities_df'] = machine_identities_df
286
+ except Exception:
287
+ pass
288
 
289
+ # Issuing Templates
290
+ issuing_template_url = "https://api.venafi.cloud/v1/certificateissuingtemplates"
 
 
 
 
 
291
 
292
+ headers = {
293
+ "accept": "application/json",
294
+ "tppl-api-key": tlspc_api_key
295
+ }
296
 
297
+ try:
298
  issuing_template_response = requests.get(issuing_template_url, headers=headers)
299
 
300
  issuing_templates_df = pd.json_normalize(issuing_template_response.json()['certificateIssuingTemplates']).convert_dtypes()
 
302
  'creationDate':'issuing_template_creationDate'}, inplace = True)
303
 
304
  issuing_templates_df.drop(['companyId'],axis=1,inplace=True)
305
+ dfs['issuing_templates_df'] = issuing_templates_df
306
+ except Exception:
307
+ pass
308
 
309
+ # Remove dictionary objects that aren't dataframes
310
+
311
+ def dataframe_filtering(pair):
312
+ key, value = pair
313
+ if isinstance(value, pd.DataFrame):
314
+ return True
315
+ else:
316
+ return False
317
+
318
+ dfs = dict(filter(dataframe_filtering, dfs.items()))
319
+
320
+ # Prompt Engineering
321
+
322
+ # Get data structure for each dataframe to be passed in initial prompt
323
+ dict_data_types = {}
324
+ for i in dfs.keys():
325
+ dict_data_types[i] = dfs[i].dtypes.apply(lambda x: x.name).to_dict()
326
+
327
+ data_structure_overview = 'I have multiple python pandas dataframes.'
328
+
329
+ for i in dict_data_types:
330
+ data_structure_overview += str('\nOne is named '
331
+ + i +
332
+ ' and contains ' +
333
+ i.split('df')[0].replace('_',' ') +
334
+ ' information and has the following structure: '
335
+ + str(dict_data_types[i]))
336
+
337
+ # Define the relationships of the dataframes to eachother
338
+ data_relationships_overview = "The dataframes relate to eachother in the following manner:"
339
+
340
+ if 'users_df' in dfs.keys() and 'applications_df' in dfs.keys():
341
+ data_relationships_overview += "/nThe column values in the 'user_id' column in users_df match the column values in the 'ownerId' column in application_df."
342
+ if 'teams_df' in dfs.keys() and 'machines_df' in dfs.keys():
343
+ data_relationships_overview += "/nThe column values in the 'team_id' column in teams_df match the column values in the 'owningTeamId' column in machines_df."
344
+ if 'certificate_requests_df' in dfs.keys() and 'users_df' in dfs.keys():
345
+ data_relationships_overview += "/nThe column values in the 'certificateOwnerUserId' column in cert_requests_df match the column values in the 'user_id' column in users_df."
346
+ if 'certificate_requests_df' in dfs.keys() and 'issuing_templates_df' in dfs.keys():
347
+ data_relationships_overview += "/nThe column values in the 'certificateIssuingTemplateId' column in cert_requests_df match the column values in the 'issuing_template_id' column in issuing_templates_df."
348
+ if 'machine_identities_df' in dfs.keys() and 'certificates_df' in dfs.keys():
349
+ data_relationships_overview += "/nThe column values in the 'certificateId' column in machine_identities_df match the column values in the 'certificateId' column in certificates_df."
350
+ if 'machine_identities_df' in dfs.keys() and 'machines_df' in dfs.keys():
351
+ data_relationships_overview += "/nThe column values in the 'machine_id' column in machine_identities_df match the column values in the 'machine_id' column in machines_df."
352
+
353
+ # If the data relationship overview is blank, just use a '' to avoid confusing the LLM
354
+ if data_relationships_overview == "The dataframes relate to eachother in the following manner:":
355
+ data_relationships_overview = ''
356
+
357
+ dfs['data_structure_overview'] = data_structure_overview
358
+ dfs['data_relationships_overview'] = data_relationships_overview
359
+
360
+ passed_args = dfs
361
+
362
+ return 'Data successfully loaded!', passed_args
363
 
364
  # User facing application
365
  with gr.Blocks(theme='aliabid94/new-theme') as demo:
 
375
  To get started, navigate to the API Keys tab. This will connect to your TLS Protect Cloud and OpenAI accounts.
376
 
377
  Signup for TLS Protect Cloud at [venafi.com/signup](https://venafi.com/signup).
 
378
  Use of this project either when run locally in your environment or on Hugging Face may introduce risks. Running this project accesses data from your TLS Protect Cloud account. While this project does not store your TLS Protect Cloud data or send that data to OpenAI, the prompts entered are sent to OpenAI for Python code generation.
379
 
380
  As with every opensource project, application, or online service that uses your API keys, you are strongly recommended to rotate your API keys after use.