leavoigt commited on
Commit
4382380
1 Parent(s): 02ce349

Rename appStore/groups.py to appStore/classifier.py

Browse files
Files changed (2) hide show
  1. appStore/classifier.py +116 -0
  2. appStore/groups.py +0 -114
appStore/classifier.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set path
2
+ import glob, os, sys;
3
+ sys.path.append('../utils')
4
+ from setfit import SetFitModel
5
+ #import needed libraries
6
+ #import seaborn as sns
7
+ #import matplotlib.pyplot as plt
8
+ #import numpy as np
9
+ #import pandas as pd
10
+ #import streamlit as st
11
+ from utils.groups_classifier import load_groupsClassifier, groups_classification
12
+ #import logging
13
+ #logger = logging.getLogger(__name__)
14
+ #from utils.config import get_classifier_params
15
+ #from utils.preprocessing import paraLengthCheck
16
+ #from io import BytesIO
17
+ #import xlsxwriter
18
+ #import plotly.express as px
19
+
20
+ vg_model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups")
21
+
22
+
23
+ # Declare all the necessary variables
24
+ classifier_identifier = 'group_classification'
25
+ params = get_classifier_params(classifier_identifier)
26
+
27
+ # def app():
28
+
29
+ # ### Main app code ###
30
+ # with st.container():
31
+
32
+ # # Classify groups
33
+ # df = group_classification(haystack_doc=df, threshold= params['threshold'])
34
+
35
+ # def groups_display():
36
+ # if 'key1' in st.session_state:
37
+ # df = st.session_state.key1
38
+
39
+
40
+ # df['Action_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Action' in x else False)
41
+ # hits = df[df['Action_check'] == True]
42
+ # # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
43
+ # range_val = min(5,len(hits))
44
+ # if range_val !=0:
45
+ # count_action = len(hits)
46
+
47
+ # st.write("")
48
+ # st.markdown("###### Top few Action Classified paragraph/text results from list of {} classified paragraphs ######".format(count_action))
49
+ # st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
50
+ # range_val = min(5,len(hits))
51
+ # for i in range(range_val):
52
+ # # the page number reflects the page that contains the main paragraph
53
+ # # according to split limit, the overlapping part can be on a separate page
54
+ # st.write('**Result {}** : `page {}`, `Sector: {}`,\
55
+ # `Indicators: {}`, `Adapt-Mitig :{}`'\
56
+ # .format(i+1,
57
+ # hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
58
+ # hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
59
+ # st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
60
+ # hits = hits.reset_index(drop =True)
61
+ # st.write('----------------')
62
+ # st.write('Explore the data')
63
+ # st.write(hits)
64
+ # df.drop(columns = ['Action_check'],inplace=True)
65
+ # df_xlsx = to_excel(df)
66
+
67
+ # with st.sidebar:
68
+ # st.write('-------------')
69
+ # st.download_button(label='📥 Download Result',
70
+ # data=df_xlsx ,
71
+ # file_name= 'cpu_analysis.xlsx')
72
+
73
+ # else:
74
+ # st.info("🤔 No Actions found")
75
+
76
+
77
+ # def groups_display():
78
+ # if 'key1' in st.session_state:
79
+ # df = st.session_state.key1
80
+
81
+
82
+ # df['Policy_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Policies & Plans' in x else False)
83
+ # hits = df[df['Policy_check'] == True]
84
+ # # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
85
+ # range_val = min(5,len(hits))
86
+ # if range_val !=0:
87
+ # count_policy = len(hits)
88
+
89
+ # st.write("")
90
+ # st.markdown("###### Top few Policy/Plans Classified paragraph/text results from list of {} classified paragraphs ######".format(count_policy))
91
+ # st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
92
+ # range_val = min(5,len(hits))
93
+ # for i in range(range_val):
94
+ # # the page number reflects the page that contains the main paragraph
95
+ # # according to split limit, the overlapping part can be on a separate page
96
+ # st.write('**Result {}** : `page {}`, `Sector: {}`,\
97
+ # `Indicators: {}`, `Adapt-Mitig :{}`'\
98
+ # .format(i+1,
99
+ # hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
100
+ # hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
101
+ # st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
102
+ # hits = hits.reset_index(drop =True)
103
+ # st.write('----------------')
104
+ # st.write('Explore the data')
105
+ # st.write(hits)
106
+ # df.drop(columns = ['Policy_check'],inplace=True)
107
+ # df_xlsx = to_excel(df)
108
+
109
+ # with st.sidebar:
110
+ # st.write('-------------')
111
+ # st.download_button(label='📥 Download Result',
112
+ # data=df_xlsx ,
113
+ # file_name= 'vulnerable_groups.xlsx')
114
+
115
+ # else:
116
+ # st.info("🤔 No Groups found")
appStore/groups.py DELETED
@@ -1,114 +0,0 @@
1
- # set path
2
- import glob, os, sys;
3
- sys.path.append('../utils')
4
-
5
- #import needed libraries
6
- import seaborn as sns
7
- import matplotlib.pyplot as plt
8
- import numpy as np
9
- import pandas as pd
10
- import streamlit as st
11
- from utils.groups_classifier import load_groupsClassifier, groups_classification
12
- import logging
13
- logger = logging.getLogger(__name__)
14
- from utils.config import get_classifier_params
15
- from utils.preprocessing import paraLengthCheck
16
- from io import BytesIO
17
- import xlsxwriter
18
- import plotly.express as px
19
-
20
-
21
- # Declare all the necessary variables
22
- classifier_identifier = 'group_classification'
23
- params = get_classifier_params(classifier_identifier)
24
-
25
- def app():
26
-
27
- ### Main app code ###
28
- with st.container():
29
-
30
- # Classify groups
31
- df = group_classification(haystack_doc=df, threshold= params['threshold'])
32
-
33
- def groups_display():
34
- if 'key1' in st.session_state:
35
- df = st.session_state.key1
36
-
37
-
38
- df['Action_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Action' in x else False)
39
- hits = df[df['Action_check'] == True]
40
- # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
41
- range_val = min(5,len(hits))
42
- if range_val !=0:
43
- count_action = len(hits)
44
-
45
- st.write("")
46
- st.markdown("###### Top few Action Classified paragraph/text results from list of {} classified paragraphs ######".format(count_action))
47
- st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
48
- range_val = min(5,len(hits))
49
- for i in range(range_val):
50
- # the page number reflects the page that contains the main paragraph
51
- # according to split limit, the overlapping part can be on a separate page
52
- st.write('**Result {}** : `page {}`, `Sector: {}`,\
53
- `Indicators: {}`, `Adapt-Mitig :{}`'\
54
- .format(i+1,
55
- hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
56
- hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
57
- st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
58
- hits = hits.reset_index(drop =True)
59
- st.write('----------------')
60
- st.write('Explore the data')
61
- st.write(hits)
62
- df.drop(columns = ['Action_check'],inplace=True)
63
- df_xlsx = to_excel(df)
64
-
65
- with st.sidebar:
66
- st.write('-------------')
67
- st.download_button(label='📥 Download Result',
68
- data=df_xlsx ,
69
- file_name= 'cpu_analysis.xlsx')
70
-
71
- else:
72
- st.info("🤔 No Actions found")
73
-
74
-
75
- def groups_display():
76
- if 'key1' in st.session_state:
77
- df = st.session_state.key1
78
-
79
-
80
- df['Policy_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Policies & Plans' in x else False)
81
- hits = df[df['Policy_check'] == True]
82
- # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
83
- range_val = min(5,len(hits))
84
- if range_val !=0:
85
- count_policy = len(hits)
86
-
87
- st.write("")
88
- st.markdown("###### Top few Policy/Plans Classified paragraph/text results from list of {} classified paragraphs ######".format(count_policy))
89
- st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
90
- range_val = min(5,len(hits))
91
- for i in range(range_val):
92
- # the page number reflects the page that contains the main paragraph
93
- # according to split limit, the overlapping part can be on a separate page
94
- st.write('**Result {}** : `page {}`, `Sector: {}`,\
95
- `Indicators: {}`, `Adapt-Mitig :{}`'\
96
- .format(i+1,
97
- hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
98
- hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
99
- st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
100
- hits = hits.reset_index(drop =True)
101
- st.write('----------------')
102
- st.write('Explore the data')
103
- st.write(hits)
104
- df.drop(columns = ['Policy_check'],inplace=True)
105
- df_xlsx = to_excel(df)
106
-
107
- with st.sidebar:
108
- st.write('-------------')
109
- st.download_button(label='📥 Download Result',
110
- data=df_xlsx ,
111
- file_name= 'vulnerable_groups.xlsx')
112
-
113
- else:
114
- st.info("🤔 No Groups found")