ppsingh commited on
Commit
8024e2c
·
1 Parent(s): acf37a0

Update utils/ghg_classifier.py

Browse files
Files changed (1) hide show
  1. utils/ghg_classifier.py +10 -6
utils/ghg_classifier.py CHANGED
@@ -10,10 +10,9 @@ from transformers import pipeline
10
 
11
  # Labels dictionary ###
12
  _lab_dict = {
13
- 'LABEL_0':'NEGATIVE',
14
- 'LABEL_1':'NOT GHG',
15
- 'LABEL_2':'GHG',
16
- 'NA':'NA',
17
  }
18
 
19
 
@@ -74,9 +73,12 @@ def ghg_classification(haystack_doc:pd.DataFrame,
74
  """
75
  logging.info("Working on GHG Extraction")
76
  haystack_doc['GHG Label'] = 'NA'
77
- haystack_doc['GHG Score'] = 'NA'
 
78
  temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
 
79
  df = haystack_doc[haystack_doc['Target Label'] == 'NEGATIVE']
 
80
 
81
  if not classifier_model:
82
  classifier_model = st.session_state['ghg_classifier']
@@ -84,9 +86,11 @@ def ghg_classification(haystack_doc:pd.DataFrame,
84
  results = classifier_model(list(temp.text))
85
  labels_= [(l[0]['label'],l[0]['score']) for l in results]
86
  temp['GHG Label'],temp['GHG Score'] = zip(*labels_)
 
 
87
  df = pd.concat([df,temp])
88
- df['GHG Label'] = df['GHG Label'].apply(lambda i: _lab_dict[i])
89
  df = df.reset_index(drop =True)
 
90
  df.index += 1
91
 
92
  return df
 
10
 
11
  # Labels dictionary ###
12
  _lab_dict = {
13
+ 'GHG':'GHG',
14
+ 'NOT_GHG':'NON GHG TRANSPORT TARGET',
15
+ 'NEGATIVE':'OTHERS',
 
16
  }
17
 
18
 
 
73
  """
74
  logging.info("Working on GHG Extraction")
75
  haystack_doc['GHG Label'] = 'NA'
76
+ haystack_doc['GHG Score'] = 0.0
77
+ # applying GHG Identifier to only 'Target' paragraphs.
78
  temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
79
+ temp = temp.reset_index(drop=True)
80
  df = haystack_doc[haystack_doc['Target Label'] == 'NEGATIVE']
81
+ df = df.reset_index(drop=True)
82
 
83
  if not classifier_model:
84
  classifier_model = st.session_state['ghg_classifier']
 
86
  results = classifier_model(list(temp.text))
87
  labels_= [(l[0]['label'],l[0]['score']) for l in results]
88
  temp['GHG Label'],temp['GHG Score'] = zip(*labels_)
89
+ temp['GHG Label'] = temp['GHG Label'].apply(lambda x: _lab_dict[x])
90
+ # merge back Target and non-Target dataframe
91
  df = pd.concat([df,temp])
 
92
  df = df.reset_index(drop =True)
93
+ df['GHG Score'] = df['GHG Score'].round(2)
94
  df.index += 1
95
 
96
  return df