seanpedrickcase commited on
Commit
651ef78
1 Parent(s): 30b5dc1

Added additional password auth for AWS-based files. Changed 'Clean' default to no

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. app.py +3 -2
  3. search_funcs/aws_functions.py +36 -33
.gitignore CHANGED
@@ -17,6 +17,7 @@
17
  *.pkl.gz
18
  *.pem
19
  *.json.out
 
20
  docs/*
21
  build/*
22
  dist/*
@@ -25,4 +26,5 @@ db/*
25
  experiments/*
26
  model/*
27
  build_deps/*
28
- build_deps_old/*
 
 
17
  *.pkl.gz
18
  *.pem
19
  *.json.out
20
+ *.env
21
  docs/*
22
  build/*
23
  dist/*
 
26
  experiments/*
27
  model/*
28
  build_deps/*
29
+ build_deps_old/*
30
+ AWS errors 19-03.txt
app.py CHANGED
@@ -157,6 +157,7 @@ depends on factors such as the type of documents or queries. Information taken f
157
  in_join_column = gr.Dropdown(label="Column to join in new data frame")
158
  search_df_join_column = gr.Dropdown(label="Column to join in search data frame")
159
  with gr.Accordion(label = "AWS data access", open = False):
 
160
  with gr.Row():
161
  in_aws_keyword_file = gr.Dropdown(label="Choose keyword file to load from AWS (only valid for API Gateway app)", choices=["None", "Bioasq - Biomedical example data - keyword search"])
162
  load_aws_keyword_data_button = gr.Button(value="Load keyword data from AWS", variant="secondary")
@@ -176,8 +177,8 @@ depends on factors such as the type of documents or queries. Information taken f
176
  in_no_search_results_button.click(display_info, inputs=in_no_search_info)
177
 
178
  ### Loading AWS data ###
179
- load_aws_keyword_data_button.click(fn=load_data_from_aws, inputs=[in_aws_keyword_file], outputs=[in_bm25_file, out_aws_data_message])
180
- load_aws_semantic_data_button.click(fn=load_data_from_aws, inputs=[in_aws_semantic_file], outputs=[in_semantic_file, out_aws_data_message])
181
 
182
 
183
  ### BM25 SEARCH ###
 
157
  in_join_column = gr.Dropdown(label="Column to join in new data frame")
158
  search_df_join_column = gr.Dropdown(label="Column to join in search data frame")
159
  with gr.Accordion(label = "AWS data access", open = False):
160
+ aws_password_box = gr.Textbox(label="Password for AWS data access (ask Data team if you don't have this)")
161
  with gr.Row():
162
  in_aws_keyword_file = gr.Dropdown(label="Choose keyword file to load from AWS (only valid for API Gateway app)", choices=["None", "Bioasq - Biomedical example data - keyword search"])
163
  load_aws_keyword_data_button = gr.Button(value="Load keyword data from AWS", variant="secondary")
 
177
  in_no_search_results_button.click(display_info, inputs=in_no_search_info)
178
 
179
  ### Loading AWS data ###
180
+ load_aws_keyword_data_button.click(fn=load_data_from_aws, inputs=[in_aws_keyword_file, aws_password_box], outputs=[in_bm25_file, out_aws_data_message])
181
+ load_aws_semantic_data_button.click(fn=load_data_from_aws, inputs=[in_aws_semantic_file, aws_password_box], outputs=[in_semantic_file, out_aws_data_message])
182
 
183
 
184
  ### BM25 SEARCH ###
search_funcs/aws_functions.py CHANGED
@@ -106,60 +106,63 @@ def download_files_from_s3(bucket_name, s3_folder, local_folder, filenames):
106
 
107
 
108
 
109
- def load_data_from_aws(in_aws_keyword_file, bucket_name=bucket_name):
110
 
111
  temp_dir = tempfile.mkdtemp()
112
  local_keyword_stub = temp_dir + '/keyword/'
113
  local_semantic_stub = temp_dir + '/semantic/'
114
 
115
  files = []
 
 
116
 
117
- if "Bioasq - Biomedical example data" in in_aws_keyword_file:
118
 
119
- s3_folder_stub = 'example_data/bioasq/latest/'
120
-
121
- if 'keyword' in in_aws_keyword_file:
122
- s3_folder_stub = s3_folder_stub + 'keyword/'
123
- local_folder_path = local_keyword_stub
124
 
125
- if 'semantic' in in_aws_keyword_file:
126
- s3_folder_stub = s3_folder_stub + 'semantic/'
127
- local_folder_path = local_semantic_stub
128
-
129
 
130
- # Check if folder exists
131
- if not os.path.exists(local_folder_path):
132
- print(f"Folder {local_folder_path} does not exist! Making folder.")
133
 
134
- os.mkdir(local_folder_path)
135
 
136
- # Check if folder is empty
137
- if len(os.listdir(local_folder_path)) == 0:
138
- print(f"Folder {local_folder_path} is empty")
139
 
140
- if 'keyword' in in_aws_keyword_file:
141
- # Download keyword folder
142
- download_files_from_s3(bucket_name, s3_folder_stub, local_folder_path, filenames='*')
143
 
144
- if 'semantic' in in_aws_keyword_file:
145
- # Download keyword folder
146
- download_files_from_s3(bucket_name, s3_folder_stub, local_folder_path, filenames=['mini-bioasq-0000_cleaned_bge_embedding_compress.npz', 'mini-bioasq-0000_cleaned_prepared_docs.pkl.gz'])
147
 
148
- print("AWS data downloaded")
149
 
150
- else:
151
- print(f"Folder {local_folder_path} is not empty")
152
 
153
- #files = os.listdir(local_folder_stub)
154
- #print(files)
155
 
156
- files = [os.path.join(local_folder_path, f) for f in os.listdir(local_folder_path) if os.path.isfile(os.path.join(local_folder_path, f))]
157
 
158
- out_message = "Data successfully loaded from AWS"
159
- print(out_message)
160
 
 
 
 
161
  else:
162
- out_message = "Data not loaded from AWS"
163
  print(out_message)
164
 
165
  return files, out_message
 
106
 
107
 
108
 
109
+ def load_data_from_aws(in_aws_keyword_file, aws_password="", bucket_name=bucket_name):
110
 
111
  temp_dir = tempfile.mkdtemp()
112
  local_keyword_stub = temp_dir + '/keyword/'
113
  local_semantic_stub = temp_dir + '/semantic/'
114
 
115
  files = []
116
+ if aws_password:
117
+ if "Bioasq - Biomedical example data" in in_aws_keyword_file and aws_password == os.environ['BIOASQ_PASSWORD']:
118
 
119
+ s3_folder_stub = 'example_data/bioasq/latest/'
120
 
121
+ if 'keyword' in in_aws_keyword_file:
122
+ s3_folder_stub = s3_folder_stub + 'keyword/'
123
+ local_folder_path = local_keyword_stub
 
 
124
 
125
+ if 'semantic' in in_aws_keyword_file:
126
+ s3_folder_stub = s3_folder_stub + 'semantic/'
127
+ local_folder_path = local_semantic_stub
128
+
129
 
130
+ # Check if folder exists
131
+ if not os.path.exists(local_folder_path):
132
+ print(f"Folder {local_folder_path} does not exist! Making folder.")
133
 
134
+ os.mkdir(local_folder_path)
135
 
136
+ # Check if folder is empty
137
+ if len(os.listdir(local_folder_path)) == 0:
138
+ print(f"Folder {local_folder_path} is empty")
139
 
140
+ if 'keyword' in in_aws_keyword_file:
141
+ # Download keyword folder
142
+ download_files_from_s3(bucket_name, s3_folder_stub, local_folder_path, filenames='*')
143
 
144
+ if 'semantic' in in_aws_keyword_file:
145
+ # Download keyword folder
146
+ download_files_from_s3(bucket_name, s3_folder_stub, local_folder_path, filenames=['mini-bioasq-0000_cleaned_bge_embedding_compress.npz', 'mini-bioasq-0000_cleaned_prepared_docs.pkl.gz'])
147
 
148
+ print("AWS data downloaded")
149
 
150
+ else:
151
+ print(f"Folder {local_folder_path} is not empty")
152
 
153
+ #files = os.listdir(local_folder_stub)
154
+ #print(files)
155
 
156
+ files = [os.path.join(local_folder_path, f) for f in os.listdir(local_folder_path) if os.path.isfile(os.path.join(local_folder_path, f))]
157
 
158
+ out_message = "Data successfully loaded from AWS"
159
+ print(out_message)
160
 
161
+ else:
162
+ out_message = "Data not loaded from AWS"
163
+ print(out_message)
164
  else:
165
+ out_message = "No password provided. Please ask the data team for access if you need this."
166
  print(out_message)
167
 
168
  return files, out_message