|
|
|
import glob, os, sys; |
|
sys.path.append('../utils') |
|
|
|
|
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
from st_aggrid import AgGrid |
|
import logging |
|
logger = logging.getLogger(__name__) |
|
from io import BytesIO |
|
import xlsxwriter |
|
import plotly.express as px |
|
from pandas.api.types import ( |
|
is_categorical_dtype, |
|
is_datetime64_any_dtype, |
|
is_numeric_dtype, |
|
is_object_dtype, |
|
is_list_like) |
|
|
|
|
|
|
|
|
|
def targets(): |
|
if 'key1' in st.session_state: |
|
df = st.session_state['key1'].copy() |
|
idx = df['NetzeroLabel_Score'].idxmax() |
|
netzero_placeholder = df.loc[idx, 'text'] |
|
df = df.drop(df.filter(regex='Score').columns, axis=1) |
|
df = df[df.TargetLabel==True].reset_index(drop=True) |
|
df['keep'] = True |
|
df.drop(columns = ['ActionLabel','PolicyLabel','PlansLabel'], inplace=True) |
|
st.session_state['target_hits'] = df |
|
st.session_state['netzero'] = netzero_placeholder |
|
|
|
def target_display(): |
|
if 'key1' in st.session_state: |
|
st.caption(""" **{}** is splitted into **{}** paragraphs/text chunks."""\ |
|
.format(os.path.basename(st.session_state['filename']), |
|
len(st.session_state['key0']))) |
|
|
|
hits = st.session_state['target_hits'] |
|
if len(hits) !=0: |
|
|
|
count_target = sum(hits['TargetLabel'] == True) |
|
count_ghg = sum(hits['GHGLabel'] == True) |
|
count_netzero = sum(hits['NetzeroLabel'] == True) |
|
count_nonghg = sum(hits['NonGHGLabel'] == True) |
|
count_mitigation = sum(hits['MitigationLabel'] == True) |
|
count_adaptation = sum(hits['AdaptationLabel'] == True) |
|
|
|
|
|
c1, c2 = st.columns([1,1]) |
|
with c1: |
|
st.write('**Target Related Paragraphs**: `{}`'.format(count_target)) |
|
st.write('**Netzero Related Paragraphs**: `{}`'.format(count_netzero)) |
|
st.write('**Mitigation Related Paragraphs**: `{}`'.format(count_mitigation)) |
|
with c2: |
|
st.write('**GHG Target Related Paragraphs**: `{}`'.format(count_ghg)) |
|
st.write('**NonGHG Target Related Paragraphs**: `{}`'.format(count_nonghg)) |
|
st.write('**Adaptation Related Paragraphs**: `{}`'.format(count_adaptation)) |
|
st.write('----------------') |
|
|
|
st.markdown("<h4 style='text-align: left; color: black;'> Sectoral Target Related Paragraphs Count </h4>", unsafe_allow_html=True) |
|
|
|
cols = list(hits.columns) |
|
sector_cols = list(set(cols) - {'TargetLabel','MitigationLabel','AdaptationLabel','GHGLabel','NetzeroLabel','NonGHGLabel','text','keep','page'}) |
|
sector_cols.sort() |
|
hits['Sector'] = hits.apply(lambda x: [col for col in sector_cols if x[col] == True],axis=1) |
|
hits['Sub-Target'] = hits.apply(lambda x: [col for col in ['GHGLabel','NetzeroLabel','NonGHGLabel'] if x[col] == True ],axis=1) |
|
placeholder= [] |
|
for col in sector_cols: |
|
placeholder.append({'Sector':col,'Count':sum(hits[col] == True)}) |
|
hits['Sector'] |
|
sector_df = pd.DataFrame.from_dict(placeholder) |
|
fig = px.bar(sector_df, x='Sector', y='Count') |
|
st.plotly_chart(fig,use_container_width= True) |
|
|
|
st.dataframe(hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector','Sub-Target',]]) |
|
else: |
|
st.info("🤔 No Targets Found") |
|
|
|
|
|
|
|
def actions(): |
|
if 'key1' in st.session_state: |
|
df = st.session_state['key1'].copy() |
|
df = df.drop(df.filter(regex='Score').columns, axis=1) |
|
df = df[df.ActionLabel==True].reset_index(drop=True) |
|
df['keep'] = True |
|
df.drop(columns = ['TargetLabel','PolicyLabel','PlansLabel','GHGLabel','NetzeroLabel','NonGHGLabel'], inplace=True) |
|
st.session_state['action_hits'] = df |
|
|
|
def action_display(): |
|
if 'key1' in st.session_state: |
|
st.caption(""" **{}** is splitted into **{}** paragraphs/text chunks."""\ |
|
.format(os.path.basename(st.session_state['filename']), |
|
len(st.session_state['key0']))) |
|
|
|
hits = st.session_state['action_hits'] |
|
if len(hits) !=0: |
|
|
|
count_action = sum(hits['ActionLabel'] == True) |
|
count_mitigation = sum(hits['MitigationLabel'] == True) |
|
count_adaptation = sum(hits['AdaptationLabel'] == True) |
|
|
|
|
|
c1, c2 = st.columns([1,1]) |
|
with c1: |
|
st.write('**Action Related Paragraphs**: `{}`'.format(count_action)) |
|
st.write('**Mitigation Related Paragraphs**: `{}`'.format(count_mitigation)) |
|
with c2: |
|
st.write('**Adaptation Related Paragraphs**: `{}`'.format(count_adaptation)) |
|
st.write('----------------') |
|
st.markdown("<h4 style='text-align: left; color: black;'> Sectoral Action Related Paragraphs Count </h4>", unsafe_allow_html=True) |
|
cols = list(hits.columns) |
|
sector_cols = list(set(cols) - {'ActionLabel','MitigationLabel','AdaptationLabel','GHGLabel','NetzeroLabel','NonGHGLabel','text','keep','page'}) |
|
sector_cols.sort() |
|
hits['Sector'] = hits.apply(lambda x: [col for col in sector_cols if x[col] == True],axis=1) |
|
|
|
placeholder= [] |
|
for col in sector_cols: |
|
placeholder.append({'Sector':col,'Count':sum(hits[col] == True)}) |
|
sector_df = pd.DataFrame.from_dict(placeholder) |
|
fig = px.bar(sector_df, x='Sector', y='Count') |
|
st.plotly_chart(fig,use_container_width= True) |
|
|
|
st.dataframe(hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']]) |
|
else: |
|
st.info("🤔 No Actions Found") |
|
|
|
|
|
|
|
def policy(): |
|
if 'key1' in st.session_state: |
|
df = st.session_state['key1'].copy() |
|
df = df.drop(df.filter(regex='Score').columns, axis=1) |
|
df = df[df.PolicyLabel==True].reset_index(drop=True) |
|
df['keep'] = True |
|
df.drop(columns = ['TargetLabel','ActionLabel','PlansLabel','GHGLabel','NetzeroLabel','NonGHGLabel'], inplace=True) |
|
st.session_state['policy_hits'] = df |
|
|
|
def policy_display(): |
|
if 'key1' in st.session_state: |
|
st.caption(""" **{}** is splitted into **{}** paragraphs/text chunks."""\ |
|
.format(os.path.basename(st.session_state['filename']), |
|
len(st.session_state['key0']))) |
|
|
|
hits = st.session_state['policy_hits'] |
|
if len(hits) !=0: |
|
|
|
count_action = sum(hits['PolicyLabel'] == True) |
|
count_mitigation = sum(hits['MitigationLabel'] == True) |
|
count_adaptation = sum(hits['AdaptationLabel'] == True) |
|
|
|
|
|
c1, c2 = st.columns([1,1]) |
|
with c1: |
|
st.write('**Policy Related Paragraphs**: `{}`'.format(count_action)) |
|
st.write('**Mitigation Related Paragraphs**: `{}`'.format(count_mitigation)) |
|
with c2: |
|
st.write('**Adaptation Related Paragraphs**: `{}`'.format(count_adaptation)) |
|
st.write('----------------') |
|
st.markdown("<h4 style='text-align: left; color: black;'> Sectoral Policy Related Paragraphs Count </h4>", unsafe_allow_html=True) |
|
cols = list(hits.columns) |
|
sector_cols = list(set(cols) - {'PolicyLabel','MitigationLabel','AdaptationLabel','GHGLabel','NetzeroLabel','NonGHGLabel','text','keep','page'}) |
|
sector_cols.sort() |
|
hits['Sector'] = hits.apply(lambda x: [col for col in sector_cols if x[col] == True],axis=1) |
|
placeholder= [] |
|
for col in sector_cols: |
|
placeholder.append({'Sector':col,'Count':sum(hits[col] == True)}) |
|
sector_df = pd.DataFrame.from_dict(placeholder) |
|
fig = px.bar(sector_df, x='Sector', y='Count') |
|
st.plotly_chart(fig,use_container_width= True) |
|
|
|
st.dataframe(hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']]) |
|
else: |
|
st.info("🤔 No Policy Found") |
|
|
|
def plans(): |
|
if 'key1' in st.session_state: |
|
df = st.session_state['key1'].copy() |
|
df = df.drop(df.filter(regex='Score').columns, axis=1) |
|
df = df[df.PlansLabel==True].reset_index(drop=True) |
|
df['keep'] = True |
|
df.drop(columns = ['TargetLabel','PolicyLabel','ActionLabel','GHGLabel','NetzeroLabel','NonGHGLabel'], inplace=True) |
|
st.session_state['plan_hits'] = df |
|
|
|
def plans_display(): |
|
if 'key1' in st.session_state: |
|
st.caption(""" **{}** is splitted into **{}** paragraphs/text chunks."""\ |
|
.format(os.path.basename(st.session_state['filename']), |
|
len(st.session_state['key0']))) |
|
|
|
hits = st.session_state['plan_hits'] |
|
if len(hits) !=0: |
|
|
|
count_action = sum(hits['PlansLabel'] == True) |
|
count_mitigation = sum(hits['MitigationLabel'] == True) |
|
count_adaptation = sum(hits['AdaptationLabel'] == True) |
|
|
|
|
|
c1, c2 = st.columns([1,1]) |
|
with c1: |
|
st.write('**Plans Related Paragraphs**: `{}`'.format(count_action)) |
|
st.write('**Mitigation Related Paragraphs**: `{}`'.format(count_mitigation)) |
|
with c2: |
|
st.write('**Adaptation Related Paragraphs**: `{}`'.format(count_adaptation)) |
|
st.write('----------------') |
|
st.markdown("<h4 style='text-align: left; color: black;'> Sectoral Plans Related Paragraphs Count </h4>", unsafe_allow_html=True) |
|
cols = list(hits.columns) |
|
sector_cols = list(set(cols) - {'PlansLabel','MitigationLabel','AdaptationLabel','GHGLabel','NetzeroLabel','NonGHGLabel','text','keep','page'}) |
|
sector_cols.sort() |
|
hits['Sector'] = hits.apply(lambda x: [col for col in sector_cols if x[col] == True],axis=1) |
|
placeholder= [] |
|
for col in sector_cols: |
|
placeholder.append({'Sector':col,'Count':sum(hits[col] == True)}) |
|
sector_df = pd.DataFrame.from_dict(placeholder) |
|
fig = px.bar(sector_df, x='Sector', y='Count') |
|
st.plotly_chart(fig,use_container_width= True) |
|
|
|
st.dataframe(hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']]) |
|
else: |
|
st.info("🤔 No Plans Found") |
|
|
|
|
|
|