import streamlit as st st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide") import seaborn as sns import pdfplumber from pandas import DataFrame from keybert import KeyBERT import matplotlib.pyplot as plt import numpy as np import streamlit as st ##@st.cache(allow_output_mutation=True) def load_model(): return KeyBERT() def read_(file): if file is not None: text = [] with pdfplumber.open(file) as pdf: for page in pdf.pages: text.append(page.extract_text()) text_str = ' '.join([page for page in text]) return text_str st.sidebar.image( "https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true", use_column_width=True ) st.sidebar.container(: file = st.file_uploader('Upload PDF File', type=['pdf']) ) st.sidebar.title( "Options:" ) st.sidebar.markdown( "You can freely browse the different chapters - ie example prompts from different people - and see the results." ) selected_date = st.sidebar.selectbox( "Please select the chapter you want to read:", ['c1','c2'] ) with st.container(): st.markdown("

SDSN X GIZ - Policy Action Tracking

", unsafe_allow_html=True) st.write(' ') st.write(' ') with st.expander("ℹī¸ - About this app", expanded=True): st.write( """ The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network. It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) 🤗 to create keywords/keyphrases that are most similar to a document. """ ) st.markdown("") st.markdown("") st.markdown("## 📌 Step One: Upload document ") with st.container(): file = st.file_uploader('Upload PDF File', type=['pdf']) text_str = read_(file) st.write('Number of pages:',len(pdf.pages))