Spaces:
Runtime error
Runtime error
import streamlit as st | |
st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide") | |
import seaborn as sns | |
import pdfplumber | |
from pandas import DataFrame | |
from keybert import KeyBERT | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import streamlit as st | |
##@st.cache(allow_output_mutation=True) | |
def load_model(): | |
return KeyBERT() | |
def read_(file): | |
if file is not None: | |
text = [] | |
with pdfplumber.open(file) as pdf: | |
for page in pdf.pages: | |
text.append(page.extract_text()) | |
text_str = ' '.join([page for page in text]) | |
return text_str | |
st.sidebar.image( | |
"https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true", | |
use_column_width=True | |
) | |
st.sidebar.container( | |
file = st.file_uploader('Upload PDF File', type=['pdf']) | |
) | |
st.sidebar.title( | |
"Options:" | |
) | |
st.sidebar.markdown( | |
"You can freely browse the different chapters - ie example prompts from different people - and see the results." | |
) | |
selected_date = st.sidebar.selectbox( | |
"Please select the chapter you want to read:", | |
['c1','c2'] | |
) | |
with st.container(): | |
st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True) | |
st.write(' ') | |
st.write(' ') | |
with st.expander("โน๏ธ - About this app", expanded=True): | |
st.write( | |
""" | |
The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network. | |
It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) ๐ค to create keywords/keyphrases that are most similar to a document. | |
""" | |
) | |
st.markdown("") | |
st.markdown("") | |
st.markdown("## ๐ Step One: Upload document ") | |
with st.container(): | |
file = st.file_uploader('Upload PDF File', type=['pdf']) | |
text_str = read_(file) | |
st.write('Number of pages:',len(pdf.pages)) |