Spaces:
Sleeping
Sleeping
clean up barrier analysis + new metrics
Browse files- modules/utils.py +15 -16
modules/utils.py
CHANGED
@@ -17,11 +17,18 @@ def create_excel():
|
|
17 |
wb = Workbook()
|
18 |
sheet = wb.active
|
19 |
sheet.title = "template"
|
20 |
-
columns = ['id',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
sheet.append(columns) # Appending columns to the first row
|
22 |
-
|
23 |
# formatting
|
24 |
-
for c in sheet['A1:
|
25 |
c.fill = PatternFill('solid', fgColor = 'bad8e1')
|
26 |
c.font = Font(bold=True)
|
27 |
|
@@ -74,7 +81,6 @@ def extract_predicted_labels(output, ordinal_selection=1, threshold=0.5):
|
|
74 |
# Function to call model and run inference for varying classification tasks/models
|
75 |
def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
|
76 |
device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
|
77 |
-
# model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2','bar_lab2']
|
78 |
model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
|
79 |
if model_name in model_names_sf:
|
80 |
col_name = re.sub(r'_(.*)', r'_txt', model_name)
|
@@ -118,9 +124,6 @@ def process_data(uploaded_file, sens_level):
|
|
118 |
'contributions_private_sector':'cont_private',
|
119 |
'contributions_other':'cont_other'}, inplace=True)
|
120 |
|
121 |
-
# df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
|
122 |
-
# df.fillna('', inplace=True)
|
123 |
-
# df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
|
124 |
|
125 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
|
126 |
df.fillna('', inplace=True)
|
@@ -184,7 +187,6 @@ def process_data(uploaded_file, sens_level):
|
|
184 |
|
185 |
st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
|
186 |
|
187 |
-
|
188 |
# Convert funding columns to numeric, replacing any non-numeric values with NaN
|
189 |
df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
|
190 |
df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
|
@@ -194,23 +196,20 @@ def process_data(uploaded_file, sens_level):
|
|
194 |
# Fill any NaN values with 0
|
195 |
df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
|
196 |
|
197 |
-
|
198 |
df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
|
199 |
-
|
200 |
-
df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1)
|
201 |
-
|
202 |
df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
|
203 |
-
|
204 |
# Calculate leverage as percentage of MAF funding
|
205 |
df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
|
206 |
-
|
207 |
# Create normalized leverage scale (0-1) where 300% leverage = 1
|
208 |
df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
|
209 |
|
210 |
# Further data processing and actions
|
211 |
sector_classes = ['Energy','Transport','Industries']
|
212 |
-
|
213 |
-
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0'])/9*10,0), axis=1)
|
214 |
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
|
215 |
|
216 |
return df
|
|
|
17 |
wb = Workbook()
|
18 |
sheet = wb.active
|
19 |
sheet.title = "template"
|
20 |
+
columns = ['id',
|
21 |
+
'scope',
|
22 |
+
'technology',
|
23 |
+
'financial',
|
24 |
+
'maf_funding_requested',
|
25 |
+
'contributions_public_sector',
|
26 |
+
'contributions_private_sector',
|
27 |
+
'contributions_other']
|
28 |
sheet.append(columns) # Appending columns to the first row
|
29 |
+
|
30 |
# formatting
|
31 |
+
for c in sheet['A1:H4'][0]:
|
32 |
c.fill = PatternFill('solid', fgColor = 'bad8e1')
|
33 |
c.font = Font(bold=True)
|
34 |
|
|
|
81 |
# Function to call model and run inference for varying classification tasks/models
|
82 |
def predict_category(df, model_name, progress_bar, repo, profile, multilabel=False):
|
83 |
device = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.has_mps else torch.device("cpu"))
|
|
|
84 |
model_names_sf = ['scope_lab1', 'scope_lab2', 'tech_lab1', 'tech_lab3', 'fin_lab2']
|
85 |
if model_name in model_names_sf:
|
86 |
col_name = re.sub(r'_(.*)', r'_txt', model_name)
|
|
|
124 |
'contributions_private_sector':'cont_private',
|
125 |
'contributions_other':'cont_other'}, inplace=True)
|
126 |
|
|
|
|
|
|
|
127 |
|
128 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt','maf_funding','cont_public','cont_private','cont_other'])
|
129 |
df.fillna('', inplace=True)
|
|
|
187 |
|
188 |
st.write(f'Processing complete. Total time: {elapsed_time:.1f} seconds')
|
189 |
|
|
|
190 |
# Convert funding columns to numeric, replacing any non-numeric values with NaN
|
191 |
df['maf_funding'] = pd.to_numeric(df['maf_funding'], errors='coerce')
|
192 |
df['cont_public'] = pd.to_numeric(df['cont_public'], errors='coerce')
|
|
|
196 |
# Fill any NaN values with 0
|
197 |
df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']] = df[['maf_funding', 'cont_public', 'cont_private', 'cont_other']].fillna(0)
|
198 |
|
199 |
+
# Get total of all leverage
|
200 |
df['lev_total'] = df.apply(lambda x: x['cont_public'] + x['cont_private'] + x['cont_other'], axis=1)
|
201 |
+
# Leverage > MAF request
|
202 |
+
# df['lev_gt_maf'] = df.apply(lambda x: 'True' if x['lev_total'] > x['maf_funding'] else 'False', axis=1) # not used
|
203 |
+
# Leverage > 0 ?
|
204 |
df['lev_gt_0'] = (df['lev_total'] > 0).astype(int)
|
|
|
205 |
# Calculate leverage as percentage of MAF funding
|
206 |
df['lev_maf_%'] = df.apply(lambda x: round(x['lev_total']/x['maf_funding']*100,2) if x['maf_funding'] != 0 else 0, axis=1)
|
|
|
207 |
# Create normalized leverage scale (0-1) where 300% leverage = 1
|
208 |
df['lev_maf_scale'] = df['lev_maf_%'].apply(lambda x: min(x/300, 1) if x > 0 else 0)
|
209 |
|
210 |
# Further data processing and actions
|
211 |
sector_classes = ['Energy','Transport','Industries']
|
212 |
+
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3']+ x['lev_gt_0']+df['lev_maf_scale'])/10*10,0), axis=1)
|
|
|
213 |
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation' or not ((x['SECTOR1'] in sector_classes) or (x['SECTOR2'] in sector_classes))) else 'REVIEW', axis=1)
|
214 |
|
215 |
return df
|