mtyrrell commited on
Commit
3f5be66
·
1 Parent(s): d256f72

start analysis button, bug fixes, authentication

Browse files
Files changed (4) hide show
  1. .gitignore +4 -1
  2. app.py +46 -21
  3. logs/app.log +0 -103
  4. modules/utils.py +7 -2
.gitignore CHANGED
@@ -4,4 +4,7 @@
4
  *.xlsx
5
  /testing/
6
  /modules/__pycache__/
7
- /logs/
 
 
 
 
4
  *.xlsx
5
  /testing/
6
  /modules/__pycache__/
7
+ /logs/
8
+ app.log
9
+ logs
10
+ logs/
app.py CHANGED
@@ -34,7 +34,7 @@ load_dotenv()
34
  def main():
35
  # Temporarily set authentication to True for testing
36
  if 'authenticated' not in st.session_state:
37
- st.session_state['authenticated'] = True
38
 
39
  if st.session_state['authenticated']:
40
  # Remove login success message for testing
@@ -88,7 +88,8 @@ def main():
88
  applications being filtered out. At the same time, this also \
89
  increases the probability of false negatives (FNs). The rate of \
90
  FNs at the lowest setting is approximately 6 percent, and \
91
- approaches 13 percent at the highest setting. ',
 
92
  options = list(sens_options.keys()),
93
  horizontal = False)
94
 
@@ -108,54 +109,78 @@ def main():
108
 
109
  uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
110
 
111
- if uploaded_file is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  try:
113
  logger.info(f"File uploaded: {uploaded_file.name}")
114
 
115
  if not st.session_state['data_processed']:
116
- logger.info("Starting data processing")
117
  try:
118
  st.session_state['df'] = process_data(uploaded_file, sens_level)
119
  logger.info("Data processing completed successfully")
120
  st.session_state['data_processed'] = True
121
  except Exception as e:
122
  logger.error(f"Error in process_data: {str(e)}")
123
- raise # Re-raise the exception to be caught by outer try-except
124
-
125
  df = st.session_state['df']
 
 
126
 
127
- # Get the current date
128
  current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
129
  output_filename = f'processed_applications_{current_datetime}.csv'
130
-
131
- # Convert DataFrame to CSV string buffer instead of file
132
  csv_buffer = df.to_csv(index=False).encode()
133
  logger.info("CSV buffer created successfully")
134
 
 
 
 
 
 
135
  st.download_button(
136
  label="Download data as CSV",
137
  data=csv_buffer,
138
  file_name=output_filename,
139
  mime='text/csv',
 
140
  )
141
 
142
  except Exception as e:
143
  logger.error(f"Error processing file: {str(e)}")
144
  st.error("Failed to process the file. Please ensure your column names match the template file.")
 
 
 
145
 
146
 
147
  # Comment out or remove the else block containing login form
148
- # else:
149
- # username = st.text_input("Username")
150
- # password = st.text_input("Password", type="password")
151
- # if st.button("Login"):
152
- # if validate_login(username, password):
153
- # st.session_state['authenticated'] = True
154
- # st.experimental_rerun()
155
- # else:
156
- # st.error("Incorrect username or password")
157
-
158
-
159
- # Run the main function
160
  main()
161
 
 
34
  def main():
35
  # Temporarily set authentication to True for testing
36
  if 'authenticated' not in st.session_state:
37
+ st.session_state['authenticated'] = False
38
 
39
  if st.session_state['authenticated']:
40
  # Remove login success message for testing
 
88
  applications being filtered out. At the same time, this also \
89
  increases the probability of false negatives (FNs). The rate of \
90
  FNs at the lowest setting is approximately 6 percent, and \
91
+ approaches 13 percent at the highest setting. \
92
+ NOTE: changing this setting does not affect the raw data in the CSV output file (only the REVIEW/REJECT labels)',
93
  options = list(sens_options.keys()),
94
  horizontal = False)
95
 
 
109
 
110
  uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
111
 
112
+ # Add session state variables if they don't exist
113
+ if 'show_button' not in st.session_state:
114
+ st.session_state['show_button'] = True
115
+ if 'processing' not in st.session_state:
116
+ st.session_state['processing'] = False
117
+ if 'data_processed' not in st.session_state:
118
+ st.session_state['data_processed'] = False
119
+
120
+ # Only show the button if show_button is True and file is uploaded and not processing
121
+ if uploaded_file is not None and st.session_state['show_button'] and not st.session_state['processing']:
122
+ if st.button("Start Analysis", key="start_analysis"):
123
+ st.session_state['show_button'] = False
124
+ st.session_state['processing'] = True
125
+ st.rerun()
126
+
127
+ # If we're processing, show the processing logic
128
+ if st.session_state['processing']:
129
  try:
130
  logger.info(f"File uploaded: {uploaded_file.name}")
131
 
132
  if not st.session_state['data_processed']:
133
+ logger.info("Starting data processing...")
134
  try:
135
  st.session_state['df'] = process_data(uploaded_file, sens_level)
136
  logger.info("Data processing completed successfully")
137
  st.session_state['data_processed'] = True
138
  except Exception as e:
139
  logger.error(f"Error in process_data: {str(e)}")
140
+ raise
141
+
142
  df = st.session_state['df']
143
+ logger.info(f"DataFrame columns: {list(df.columns)}")
144
+ logger.info(f"DataFrame shape: {df.shape}")
145
 
 
146
  current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
147
  output_filename = f'processed_applications_{current_datetime}.csv'
 
 
148
  csv_buffer = df.to_csv(index=False).encode()
149
  logger.info("CSV buffer created successfully")
150
 
151
+ def reset_button_state():
152
+ st.session_state['show_button'] = True
153
+ st.session_state['processing'] = False
154
+ st.session_state['data_processed'] = False
155
+
156
  st.download_button(
157
  label="Download data as CSV",
158
  data=csv_buffer,
159
  file_name=output_filename,
160
  mime='text/csv',
161
+ on_click=reset_button_state
162
  )
163
 
164
  except Exception as e:
165
  logger.error(f"Error processing file: {str(e)}")
166
  st.error("Failed to process the file. Please ensure your column names match the template file.")
167
+ st.session_state['show_button'] = True
168
+ st.session_state['processing'] = False
169
+ st.rerun()
170
 
171
 
172
  # Comment out or remove the else block containing login form
173
+ else:
174
+ username = st.text_input("Username")
175
+ password = st.text_input("Password", type="password")
176
+ if st.button("Login"):
177
+ if validate_login(username, password):
178
+ st.session_state['authenticated'] = True
179
+ st.rerun()
180
+ else:
181
+ st.error("Incorrect username or password")
182
+
183
+
184
+
185
  main()
186
 
logs/app.log DELETED
@@ -1,103 +0,0 @@
1
- 2025-02-04 20:16:29,467 - datasets - INFO - PyTorch version 2.5.1 available.
2
- 2025-02-04 20:16:31,199 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
3
- 2025-02-04 20:16:37,895 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
4
- 2025-02-04 20:16:38,062 - modules.utils - INFO - T1 df import
5
- 2025-02-04 20:16:38,062 - modules.utils - INFO - T2 columns renamed
6
- 2025-02-04 20:16:38,249 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
7
- 2025-02-04 20:16:38,249 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
8
- 2025-02-04 20:16:44,645 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
9
- 2025-02-04 20:16:44,645 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
10
- 2025-02-04 20:16:49,350 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
11
- 2025-02-04 20:16:49,350 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
12
- 2025-02-04 20:16:55,639 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
13
- 2025-02-04 20:16:55,639 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
14
- 2025-02-04 20:17:00,538 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
15
- 2025-02-04 20:17:00,539 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
16
- 2025-02-04 20:21:02,708 - datasets - INFO - PyTorch version 2.5.1 available.
17
- 2025-02-04 20:21:05,106 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
18
- 2025-02-04 20:21:10,682 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
19
- 2025-02-04 20:21:10,849 - modules.utils - INFO - T1 df import
20
- 2025-02-04 20:21:10,850 - modules.utils - INFO - T2 columns renamed
21
- 2025-02-04 20:21:10,994 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
22
- 2025-02-04 20:21:10,994 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
23
- 2025-02-04 20:21:17,001 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
24
- 2025-02-04 20:21:17,001 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
25
- 2025-02-04 20:21:21,035 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
26
- 2025-02-04 20:21:21,035 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
27
- 2025-02-04 20:21:27,247 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
28
- 2025-02-04 20:21:27,247 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
29
- 2025-02-04 20:21:31,121 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
30
- 2025-02-04 20:21:31,121 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
31
- 2025-02-04 20:23:19,122 - datasets - INFO - PyTorch version 2.5.1 available.
32
- 2025-02-04 20:23:21,471 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
33
- 2025-02-04 20:23:26,565 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
34
- 2025-02-04 20:23:26,732 - modules.utils - INFO - T1 df import
35
- 2025-02-04 20:23:26,732 - modules.utils - INFO - T2 columns renamed
36
- 2025-02-04 20:23:26,875 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
37
- 2025-02-04 20:23:26,875 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
38
- 2025-02-04 20:23:30,614 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
39
- 2025-02-04 20:23:30,614 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
40
- 2025-02-04 20:23:33,378 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
41
- 2025-02-04 20:23:33,378 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
42
- 2025-02-04 20:23:37,329 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
43
- 2025-02-04 20:23:37,329 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
44
- 2025-02-04 20:23:41,129 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
45
- 2025-02-04 20:23:41,130 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
46
- 2025-02-04 20:26:27,528 - datasets - INFO - PyTorch version 2.5.1 available.
47
- 2025-02-04 20:26:29,749 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
48
- 2025-02-04 20:26:35,354 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
49
- 2025-02-04 20:26:35,512 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
50
- 2025-02-04 20:26:35,513 - __main__ - INFO - Starting data processing...
51
- 2025-02-04 20:26:35,519 - modules.utils - INFO - T1 df import
52
- 2025-02-04 20:26:35,520 - modules.utils - INFO - T2 columns renamed
53
- 2025-02-04 20:26:35,658 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
54
- 2025-02-04 20:26:35,658 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
55
- 2025-02-04 20:26:39,590 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
56
- 2025-02-04 20:26:39,591 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
57
- 2025-02-04 20:26:43,283 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
58
- 2025-02-04 20:26:43,283 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
59
- 2025-02-04 20:26:47,454 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
60
- 2025-02-04 20:26:47,455 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
61
- 2025-02-04 20:26:50,569 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
62
- 2025-02-04 20:26:50,569 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
63
- 2025-02-04 20:27:00,137 - __main__ - ERROR - Error in process_data: Cannot set a DataFrame with multiple columns to the single column pred_score
64
- 2025-02-04 20:27:00,138 - __main__ - ERROR - Error processing file: Cannot set a DataFrame with multiple columns to the single column pred_score
65
- 2025-02-04 20:31:19,620 - datasets - INFO - PyTorch version 2.5.1 available.
66
- 2025-02-04 20:31:21,779 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
67
- 2025-02-04 20:31:26,273 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
68
- 2025-02-04 20:31:26,432 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
69
- 2025-02-04 20:31:26,433 - __main__ - INFO - Starting data processing...
70
- 2025-02-04 20:31:26,439 - modules.utils - INFO - T1 df import
71
- 2025-02-04 20:31:26,440 - modules.utils - INFO - T2 columns renamed
72
- 2025-02-04 20:31:26,563 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
73
- 2025-02-04 20:31:26,563 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
74
- 2025-02-04 20:31:30,934 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
75
- 2025-02-04 20:31:30,935 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
76
- 2025-02-04 20:31:33,839 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
77
- 2025-02-04 20:31:33,839 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
78
- 2025-02-04 20:31:37,942 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
79
- 2025-02-04 20:31:37,942 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
80
- 2025-02-04 20:31:41,885 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
81
- 2025-02-04 20:31:41,886 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_fin_lab2
82
- 2025-02-04 20:31:51,715 - __main__ - INFO - Data processing completed successfully
83
- 2025-02-04 20:31:51,715 - __main__ - INFO - DataFrame columns: ['id', 'scope_txt', 'tech_txt', 'fin_txt', 'maf_funding', 'cont_public', 'cont_private', 'cont_other', 'scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2', 'ADAPMIT', 'SECTOR1', 'SECTOR2', 'LANG', 'lev_total', 'lev_gt_0', 'lev_maf_%', 'lev_maf_scale', 'pred_score', 'pred_action']
84
- 2025-02-04 20:31:51,715 - __main__ - INFO - DataFrame shape: (14, 23)
85
- 2025-02-04 20:31:51,718 - __main__ - INFO - CSV buffer created successfully
86
- 2025-02-04 20:31:56,984 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
87
- 2025-02-04 20:31:57,147 - __main__ - INFO - File uploaded: MAF_2024_TEST2.xlsx
88
- 2025-02-04 20:31:57,148 - __main__ - INFO - DataFrame columns: ['id', 'scope_txt', 'tech_txt', 'fin_txt', 'maf_funding', 'cont_public', 'cont_private', 'cont_other', 'scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2', 'ADAPMIT', 'SECTOR1', 'SECTOR2', 'LANG', 'lev_total', 'lev_gt_0', 'lev_maf_%', 'lev_maf_scale', 'pred_score', 'pred_action']
89
- 2025-02-04 20:31:57,148 - __main__ - INFO - DataFrame shape: (14, 23)
90
- 2025-02-04 20:31:57,150 - __main__ - INFO - CSV buffer created successfully
91
- 2025-02-04 20:33:58,763 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
92
- 2025-02-04 20:34:06,910 - huggingface_hub._login - WARNING - Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
93
- 2025-02-04 20:34:07,067 - __main__ - INFO - File uploaded: MAF_2024_FULL.xlsx
94
- 2025-02-04 20:34:07,068 - __main__ - INFO - Starting data processing
95
- 2025-02-04 20:34:07,121 - modules.utils - INFO - data import successful
96
- 2025-02-04 20:34:07,673 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
97
- 2025-02-04 20:34:07,675 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab1
98
- 2025-02-04 20:34:59,028 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
99
- 2025-02-04 20:34:59,028 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_scope_lab2
100
- 2025-02-04 20:35:25,589 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
101
- 2025-02-04 20:35:25,589 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab1
102
- 2025-02-04 20:36:08,932 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: mps
103
- 2025-02-04 20:36:08,933 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: mtyrrell/classifier_SF_tech_lab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/utils.py CHANGED
@@ -176,9 +176,14 @@ def process_data(uploaded_file, sens_level):
176
  steps_remaining = total_steps - step_count
177
  if step_count > 1:
178
  estimated_time_remaining = (elapsed_time / step_count) * steps_remaining
179
- estimated_time_remaining_text.write(f'Estimated Time Remaining: {estimated_time_remaining:.0f} seconds (step {step_count+1} of 9)')
 
 
 
 
 
180
  else:
181
- estimated_time_remaining_text.write(f'Calculating time remaining... (step {step_count+1} of 9)')
182
 
183
  model_processing_text.empty()
184
 
 
176
  steps_remaining = total_steps - step_count
177
  if step_count > 1:
178
  estimated_time_remaining = (elapsed_time / step_count) * steps_remaining
179
+ estimated_time_remaining_text.markdown(
180
+ f"Elapsed time: {elapsed_time:.1f}s. "
181
+ f"Estimated time remaining: {estimated_time_remaining:.1f}s"
182
+ f" (step {step_count+1} of {len(model_names)})"
183
+ )
184
+ # estimated_time_remaining_text.write(f'Estimated Time Remaining: {estimated_time_remaining:.0f} seconds (step {step_count+1} of 9)')
185
  else:
186
+ estimated_time_remaining_text.write(f'Calculating time remaining... (step {step_count+1} of {len(model_names)})')
187
 
188
  model_processing_text.empty()
189