cpv_3.1_eval_pipeline

Sleeping

App Files Files Community

leavoigt commited on Aug 16, 2023

Commit

4382380

1 Parent(s): 02ce349

Rename appStore/groups.py to appStore/classifier.py

Browse files

Files changed (2) hide show

appStore/classifier.py +116 -0
appStore/groups.py +0 -114

appStore/classifier.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# set path
+import glob, os, sys;
+sys.path.append('../utils')
+from setfit import SetFitModel
+#import needed libraries
+#import seaborn as sns
+#import matplotlib.pyplot as plt
+#import numpy as np
+#import pandas as pd
+#import streamlit as st
+from utils.groups_classifier import load_groupsClassifier, groups_classification
+#import logging
+#logger = logging.getLogger(__name__)
+#from utils.config import get_classifier_params
+#from utils.preprocessing import paraLengthCheck
+#from io import BytesIO
+#import xlsxwriter
+#import plotly.express as px
+vg_model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups")
+# Declare all the necessary variables
+classifier_identifier = 'group_classification'
+params  = get_classifier_params(classifier_identifier)
+# def app():
+#     ### Main app code ###
+#     with st.container():
+#         # Classify groups
+#         df = group_classification(haystack_doc=df, threshold= params['threshold'])
+# def groups_display():
+#     if  'key1' in st.session_state:
+#         df = st.session_state.key1
+#         df['Action_check']  = df['Policy-Action Label'].apply(lambda x: True if 'Action' in x else False)
+#         hits  = df[df['Action_check'] == True]
+#         # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
+#         range_val = min(5,len(hits))
+#         if range_val !=0:
+#             count_action = len(hits)
+#             st.write("")
+#             st.markdown("###### Top few Action Classified paragraph/text results from list of {} classified paragraphs ######".format(count_action))
+#             st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
+#             range_val = min(5,len(hits))
+#             for i in range(range_val):
+#                 # the page number reflects the page that contains the main paragraph
+#                 # according to split limit, the overlapping part can be on a separate page
+#                 st.write('**Result {}** : `page {}`, `Sector: {}`,\
+#                             `Indicators: {}`, `Adapt-Mitig :{}`'\
+#                     .format(i+1,
+#                             hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
+#                             hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
+#                 st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
+#             hits = hits.reset_index(drop =True)
+#             st.write('----------------')
+#             st.write('Explore the data')
+#             st.write(hits)
+#             df.drop(columns = ['Action_check'],inplace=True)
+#             df_xlsx = to_excel(df)
+#             with st.sidebar:
+#                 st.write('-------------')
+#                 st.download_button(label='📥 Download Result',
+#                             data=df_xlsx ,
+#                             file_name= 'cpu_analysis.xlsx')
+#         else:
+#             st.info("🤔 No Actions found")
+# def groups_display():
+#     if  'key1' in st.session_state:
+#         df = st.session_state.key1
+#         df['Policy_check']  = df['Policy-Action Label'].apply(lambda x: True if 'Policies & Plans' in x else False)
+#         hits  = df[df['Policy_check'] == True]
+#         # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
+#         range_val = min(5,len(hits))
+#         if range_val !=0:
+#             count_policy = len(hits)
+#             st.write("")
+#             st.markdown("###### Top few Policy/Plans Classified paragraph/text results from list of {} classified paragraphs ######".format(count_policy))
+#             st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
+#             range_val = min(5,len(hits))
+#             for i in range(range_val):
+#                 # the page number reflects the page that contains the main paragraph
+#                 # according to split limit, the overlapping part can be on a separate page
+#                 st.write('**Result {}** : `page {}`, `Sector: {}`,\
+#                             `Indicators: {}`, `Adapt-Mitig :{}`'\
+#                     .format(i+1,
+#                             hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
+#                             hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
+#                 st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
+#             hits = hits.reset_index(drop =True)
+#             st.write('----------------')
+#             st.write('Explore the data')
+#             st.write(hits)
+#             df.drop(columns = ['Policy_check'],inplace=True)
+#             df_xlsx = to_excel(df)
+#             with st.sidebar:
+#                 st.write('-------------')
+#                 st.download_button(label='📥 Download Result',
+#                             data=df_xlsx ,
+#                             file_name= 'vulnerable_groups.xlsx')
+#         else:
+#             st.info("🤔 No Groups found")

appStore/groups.py DELETED Viewed

@@ -1,114 +0,0 @@
-# set path
-import glob, os, sys;
-sys.path.append('../utils')
-#import needed libraries
-import seaborn as sns
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import streamlit as st
-from utils.groups_classifier import load_groupsClassifier, groups_classification
-import logging
-logger = logging.getLogger(__name__)
-from utils.config import get_classifier_params
-from utils.preprocessing import paraLengthCheck
-from io import BytesIO
-import xlsxwriter
-import plotly.express as px
-# Declare all the necessary variables
-classifier_identifier = 'group_classification'
-params  = get_classifier_params(classifier_identifier)
-def app():
-    ### Main app code ###
-    with st.container():
-        # Classify groups
-        df = group_classification(haystack_doc=df, threshold= params['threshold'])
-def groups_display():
-    if  'key1' in st.session_state:
-        df = st.session_state.key1
-        df['Action_check']  = df['Policy-Action Label'].apply(lambda x: True if 'Action' in x else False)
-        hits  = df[df['Action_check'] == True]
-        # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
-        range_val = min(5,len(hits))
-        if range_val !=0:
-            count_action = len(hits)
-            st.write("")
-            st.markdown("###### Top few Action Classified paragraph/text results from list of {} classified paragraphs ######".format(count_action))
-            st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
-            range_val = min(5,len(hits))
-            for i in range(range_val):
-                # the page number reflects the page that contains the main paragraph
-                # according to split limit, the overlapping part can be on a separate page
-                st.write('**Result {}** : `page {}`, `Sector: {}`,\
-                            `Indicators: {}`, `Adapt-Mitig :{}`'\
-                    .format(i+1,
-                            hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
-                            hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
-                st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
-            hits = hits.reset_index(drop =True)
-            st.write('----------------')
-            st.write('Explore the data')
-            st.write(hits)
-            df.drop(columns = ['Action_check'],inplace=True)
-            df_xlsx = to_excel(df)
-            with st.sidebar:
-                st.write('-------------')
-                st.download_button(label='📥 Download Result',
-                            data=df_xlsx ,
-                            file_name= 'cpu_analysis.xlsx')
-        else:
-            st.info("🤔 No Actions found")
-def groups_display():
-    if  'key1' in st.session_state:
-        df = st.session_state.key1
-        df['Policy_check']  = df['Policy-Action Label'].apply(lambda x: True if 'Policies & Plans' in x else False)
-        hits  = df[df['Policy_check'] == True]
-        # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
-        range_val = min(5,len(hits))
-        if range_val !=0:
-            count_policy = len(hits)
-            st.write("")
-            st.markdown("###### Top few Policy/Plans Classified paragraph/text results from list of {} classified paragraphs ######".format(count_policy))
-            st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
-            range_val = min(5,len(hits))
-            for i in range(range_val):
-                # the page number reflects the page that contains the main paragraph
-                # according to split limit, the overlapping part can be on a separate page
-                st.write('**Result {}** : `page {}`, `Sector: {}`,\
-                            `Indicators: {}`, `Adapt-Mitig :{}`'\
-                    .format(i+1,
-                            hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
-                            hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
-                st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
-            hits = hits.reset_index(drop =True)
-            st.write('----------------')
-            st.write('Explore the data')
-            st.write(hits)
-            df.drop(columns = ['Policy_check'],inplace=True)
-            df_xlsx = to_excel(df)
-            with st.sidebar:
-                st.write('-------------')
-                st.download_button(label='📥 Download Result',
-                            data=df_xlsx ,
-                            file_name= 'vulnerable_groups.xlsx')
-        else:
-            st.info("🤔 No Groups found")