Spaces:

peter2000
/

policy_test

Runtime error

App Files Files Community

peter2000 commited on Jul 28, 2022

Commit

529d899

1 Parent(s): 5a34641

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -183

app.py CHANGED Viewed

@@ -1,188 +1,15 @@
-import streamlit as st
-st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
-import seaborn as sns
-import pdfplumber
-from pandas import DataFrame
-import matplotlib.pyplot as plt
-import numpy as np
-import streamlit as st
-import sentence-transformers
-   ##@st.cache(allow_output_mutation=True)
-def load_model():
-    return KeyBERT()
-def read_(file):
-     if file is not None:
-        text = []
-        with pdfplumber.open(file) as pdf:
-            for page in pdf.pages:
-                text.append(page.extract_text())
-            text_str = ' '.join([page for page in text])
-            st.write('Document:', pdf.metadata)
-            st.write('Number of pages:',len(pdf.pages))
-            pdf.close()
-            return text_str
-st.sidebar.image(
-    "https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
-    use_column_width=True
-)
-st.sidebar.markdown("##  📌 Step One: Upload document ")
-with st.sidebar:
-    file = st.file_uploader('Upload PDF File', type=['pdf'])
-st.sidebar.title(
-    "Options:"
-)
-st.sidebar.markdown(
-    "You can freely browse the different chapters - ie example prompts from different people - and see the results."
-)
-selected_date = st.sidebar.selectbox(
-    "Please select the chapter you want to read:",
-    ['c1','c2']
-)
-with st.container():
-    st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
-    st.write(' ')
-    st.write(' ')
-with st.expander("ℹ️ - About this app", expanded=True):
-    st.write(
-        """
-        The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.
-        It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) 🤗 to create keywords/keyphrases that are most similar to a document.
-        """
-    )
-st.markdown("")
-st.markdown("")
-#st.markdown("##  📌 Step One: Upload document ")
-with st.container():
-    st.markdown("##  📌 Step One: Upload document ")
-    ##file = st.file_uploader('Upload PDF File', type=['pdf'])
-    text_str = read_(file)
-import seaborn as sns
-import pdfplumber
-from pandas import DataFrame
-from keybert import KeyBERT
-import matplotlib.pyplot as plt
-import numpy as np
 import streamlit as st
-    @st.cache(allow_output_mutation=True)
-    def load_model():
-        return KeyBERT()
-    kw_model = load_model()
-    keywords = kw_model.extract_keywords(
-    text_str,
-    keyphrase_ngram_range=(1, 2),
-    use_mmr=True,
-    stop_words="english",
-    top_n=10,
-    diversity=0.7,
-    )
-    st.markdown("## 🎈 What is my document about?")
-    df = (
-        DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
-        .sort_values(by="Relevancy", ascending=False)
-        .reset_index(drop=True)
-    )
-    df.index += 1
-    # Add styling
-    cmGreen = sns.light_palette("green", as_cmap=True)
-    cmRed = sns.light_palette("red", as_cmap=True)
-    df = df.style.background_gradient(
-        cmap=cmGreen,
-        subset=[
-            "Relevancy",
-        ],
-    )
-    c1, c2, c3 = st.columns([1, 3, 1])
-    format_dictionary = {
-        "Relevancy": "{:.1%}",
-    }
-    df = df.format(format_dictionary)
-    with c2:
-        st.table(df)
-    ######## SDG!
-    from transformers import pipeline
-    finetuned_checkpoint = "jonas/sdg_classifier_osdg"
-    classifier = pipeline("text-classification", model=finetuned_checkpoint)
-    word_list = text_str.split()
-    len_word_list = len(word_list)
-    par_list = []
-    par_len = 130
-    for i in range(0,len_word_list // par_len):
-        string_part = ' '.join(word_list[i*par_len:(i+1)*par_len])
-        par_list.append(string_part)
-    labels = classifier(par_list)
-    labels_= [(l['label'],l['score']) for l in labels]
-    df = DataFrame(labels_, columns=["SDG", "Relevancy"])
-    df['text'] = ['... '+par+' ...' for par in par_list]
-    df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
-    df.index += 1
-    df =df[df['Relevancy']>.9]
-    x = df['SDG'].value_counts()
-    plt.rcParams['font.size'] = 25
-    colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
-    # plot
-    fig, ax = plt.subplots()
-    ax.pie(x, colors=colors, radius=2, center=(4, 4),
-         wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
-    st.markdown("## 🎈 Anything related to SDGs?")
-    c4, c5, c6 = st.columns([5, 7, 1])
-    # Add styling
-    cmGreen = sns.light_palette("green", as_cmap=True)
-    cmRed = sns.light_palette("red", as_cmap=True)
-    df = df.style.background_gradient(
-        cmap=cmGreen,
-        subset=[
-            "Relevancy",
-        ],
-    )
-    format_dictionary = {
-        "Relevancy": "{:.1%}",
-    }
-    df = df.format(format_dictionary)
-    with c4:
-        st.pyplot(fig)
-    with c5:
-        st.table(df)

+import appStore.keyword_search as keyword_search
+import appStore.sdg_analysis as sdg_analysis
+# import appStore.check_site as check_site
+from appStore.multiapp import MultiApp
 import streamlit as st
+st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
+app = MultiApp()
+app.add_app("Analyse Policy Document", sdg_analysis.app)
+app.add_app("KeyWord Search", keyword_search.app)
+# app.add_app("Check Coherence", check_site.app)
+app.run()