Spaces:

mtyrrell
/

maf_prefilter_app

Running

App Files Files Community

mtyrrell commited on Feb 4

Commit

ef2deda

1 Parent(s): 6829fd5

clean up barrier analysis + new metrics

Browse files

Files changed (1) hide show

modules/utils.py +15 -16

modules/utils.py CHANGED Viewed

@@ -17,11 +17,18 @@ def create_excel():
     wb = Workbook()
     sheet = wb.active
     sheet.title = "template"
-    columns = ['id','scope','technology','financial','barrier']
     sheet.append(columns)  # Appending columns to the first row
     # formatting
-    for c in sheet['A1:E4'][0]:
         c.fill = PatternFill('solid', fgColor = 'bad8e1')
         c.font = Font(bold=True)
@@ -74,7 +81,6 @@ def extract_predicted_labels(output, ordinal_selection=1, threshold=0.5):
 # Function to call model and run inference for varying classification tasks/models
 def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
     device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
-    # model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2','bar_lab2']
     model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
     if model_name in model_names_sf:
         col_name = re.sub(r'_(.*)', r'_txt', model_name)
@@ -118,9 +124,6 @@ def process_data(uploaded_file, sens_level):
         'contributions_private_sector':'cont_private',
         'contributions_other':'cont_other'}, inplace=True)
-    # df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
-    # df.fillna('', inplace=True)
-    # df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
     df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
     df.fillna('', inplace=True)
@@ -184,7 +187,6 @@ def process_data(uploaded_file, sens_level):
     st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
     # Convert funding columns to numeric, replacing any non-numeric values with NaN
     df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
     df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
@@ -194,23 +196,20 @@ def process_data(uploaded_file, sens_level):
     # Fill any NaN values with 0
     df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
     df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
-    df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1)
     df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
     # Calculate leverage as percentage of MAF funding
     df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
     # Create normalized leverage scale (0-1) where 300% leverage = 1
     df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
     # Further data processing and actions
     sector_classes = ['Energy','Transport','Industries']
-    # df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10,0), axis=1)
-    df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0'])/9*10,0), axis=1)
     df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
     return df

     wb = Workbook()
     sheet = wb.active
     sheet.title = "template"
+    columns = ['id',
+               'scope',
+               'technology',
+               'financial',
+               'maf_funding_requested',
+               'contributions_public_sector',
+               'contributions_private_sector',
+               'contributions_other']
     sheet.append(columns)  # Appending columns to the first row
     # formatting
+    for c in sheet['A1:H4'][0]:
         c.fill = PatternFill('solid', fgColor = 'bad8e1')
         c.font = Font(bold=True)
 # Function to call model and run inference for varying classification tasks/models
 def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
     device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
     model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
     if model_name in model_names_sf:
         col_name = re.sub(r'_(.*)', r'_txt', model_name)
         'contributions_private_sector':'cont_private',
         'contributions_other':'cont_other'}, inplace=True)
     df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
     df.fillna('', inplace=True)
     st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
     # Convert funding columns to numeric, replacing any non-numeric values with NaN
     df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
     df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
     # Fill any NaN values with 0
     df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
+    # Get total of all leverage
     df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
+    # Leverage > MAF request
+    # df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1) # not used
+    # Leverage > 0 ?
     df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
     # Calculate leverage as percentage of MAF funding
     df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
     # Create normalized leverage scale (0-1) where 300% leverage = 1
     df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
     # Further data processing and actions
     sector_classes = ['Energy','Transport','Industries']
+    df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0']+df['lev_maf_scale'])/10*10,0), axis=1)
     df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
     return df