mtyrrell commited on
Commit
ef2deda
·
1 Parent(s): 6829fd5

clean up barrier analysis + new metrics

Browse files
Files changed (1) hide show
  1. modules/utils.py +15 -16
modules/utils.py CHANGED
@@ -17,11 +17,18 @@ def create_excel():
17
  wb = Workbook()
18
  sheet = wb.active
19
  sheet.title = "template"
20
- columns = ['id','scope','technology','financial','barrier']
 
 
 
 
 
 
 
21
  sheet.append(columns) # Appending columns to the first row
22
-
23
  # formatting
24
- for c in sheet['A1:E4'][0]:
25
  c.fill = PatternFill('solid', fgColor = 'bad8e1')
26
  c.font = Font(bold=True)
27
 
@@ -74,7 +81,6 @@ def extract_predicted_labels(output, ordinal_selection=1, threshold=0.5):
74
  # Function to call model and run inference for varying classification tasks/models
75
  def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
76
  device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
77
- # model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2','bar_lab2']
78
  model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
79
  if model_name in model_names_sf:
80
  col_name = re.sub(r'_(.*)', r'_txt', model_name)
@@ -118,9 +124,6 @@ def process_data(uploaded_file, sens_level):
118
  'contributions_private_sector':'cont_private',
119
  'contributions_other':'cont_other'}, inplace=True)
120
 
121
- # df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
122
- # df.fillna('', inplace=True)
123
- # df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
124
 
125
  df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
126
  df.fillna('', inplace=True)
@@ -184,7 +187,6 @@ def process_data(uploaded_file, sens_level):
184
 
185
  st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
186
 
187
-
188
  # Convert funding columns to numeric, replacing any non-numeric values with NaN
189
  df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
190
  df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
@@ -194,23 +196,20 @@ def process_data(uploaded_file, sens_level):
194
  # Fill any NaN values with 0
195
  df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
196
 
197
-
198
  df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
199
-
200
- df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1)
201
-
202
  df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
203
-
204
  # Calculate leverage as percentage of MAF funding
205
  df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
206
-
207
  # Create normalized leverage scale (0-1) where 300% leverage = 1
208
  df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
209
 
210
  # Further data processing and actions
211
  sector_classes = ['Energy','Transport','Industries']
212
- # df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10,0), axis=1)
213
- df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0'])/9*10,0), axis=1)
214
  df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
215
 
216
  return df
 
17
  wb = Workbook()
18
  sheet = wb.active
19
  sheet.title = "template"
20
+ columns = ['id',
21
+ 'scope',
22
+ 'technology',
23
+ 'financial',
24
+ 'maf_funding_requested',
25
+ 'contributions_public_sector',
26
+ 'contributions_private_sector',
27
+ 'contributions_other']
28
  sheet.append(columns) # Appending columns to the first row
29
+
30
  # formatting
31
+ for c in sheet['A1:H4'][0]:
32
  c.fill = PatternFill('solid', fgColor = 'bad8e1')
33
  c.font = Font(bold=True)
34
 
 
81
  # Function to call model and run inference for varying classification tasks/models
82
  def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
83
  device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
 
84
  model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
85
  if model_name in model_names_sf:
86
  col_name = re.sub(r'_(.*)', r'_txt', model_name)
 
124
  'contributions_private_sector':'cont_private',
125
  'contributions_other':'cont_other'}, inplace=True)
126
 
 
 
 
127
 
128
  df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
129
  df.fillna('', inplace=True)
 
187
 
188
  st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
189
 
 
190
  # Convert funding columns to numeric, replacing any non-numeric values with NaN
191
  df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
192
  df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
 
196
  # Fill any NaN values with 0
197
  df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
198
 
199
+ # Get total of all leverage
200
  df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
201
+ # Leverage > MAF request
202
+ # df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1) # not used
203
+ # Leverage > 0 ?
204
  df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
 
205
  # Calculate leverage as percentage of MAF funding
206
  df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
 
207
  # Create normalized leverage scale (0-1) where 300% leverage = 1
208
  df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
209
 
210
  # Further data processing and actions
211
  sector_classes = ['Energy','Transport','Industries']
212
+ df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0']+df['lev_maf_scale'])/10*10,0), axis=1)
 
213
  df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
214
 
215
  return df