File size: 2,094 Bytes
41460de
 
 
 
 
 
 
 
 
 
 
 
3e0a87b
 
 
 
402e9be
de5cd4d
 
 
 
 
 
 
 
 
 
402e9be
d4a926a
402e9be
 
3e0a87b
de5cd4d
 
0bd3e3f
0110fa1
0bd3e3f
 
 
 
 
 
 
 
 
 
3e0a87b
c5118ce
0110fa1
c5118ce
 
41460de
c5118ce
41460de
c5118ce
 
68739a8
41460de
c5118ce
 
 
41460de
3e0a87b
c5118ce
 
c6338e6
 
 
 
 
de5cd4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")

import seaborn as sns
import pdfplumber
from pandas import DataFrame
from keybert import KeyBERT
import matplotlib.pyplot as plt
import numpy as np
import streamlit as st



   ##@st.cache(allow_output_mutation=True)
def load_model():
    return KeyBERT()
    
 def read_(file):
     if file is not None:
        text = []
        with pdfplumber.open(file) as pdf:
            for page in pdf.pages:
                text.append(page.extract_text())
            text_str = ' '.join([page for page in text])

            
            
st.sidebar.image(
    "https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
    use_column_width=True
)

st.sidebar.container():
    file = st.file_uploader('Upload PDF File', type=['pdf'])
st.sidebar.title(
    "Options:"
)

st.sidebar.markdown(
    "You can freely browse the different chapters - ie example prompts from different people - and see the results."
)

selected_date = st.sidebar.selectbox(
    "Please select the chapter you want to read:",
    ['c1','c2']
)

with st.container():
    st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
    st.write(' ')
    st.write(' ')

with st.expander("ℹ️ - About this app", expanded=True):

    st.write(
        """     
        The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.

        It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) 🤗 to create keywords/keyphrases that are most similar to a document.
        """
    )

st.markdown("")
st.markdown("")
st.markdown("##  📌 Step One: Upload document ")


with st.container():

    file = st.file_uploader('Upload PDF File', type=['pdf'])
    text_str = read_(file)
    st.write('Number of pages:',len(pdf.pages))