Sakshi
policy analyser app
0106d5f
raw
history blame
4.89 kB
"""
Rules for Policy Analyser
"""
# Imports
import os
import glob
import json
from datetime import datetime
import base64
import openai
import streamlit as st
import pandas as pd
from policy_analyser.analyse import analyse
st.set_page_config('Policy Analyser', '🧐', layout = 'wide')
# def call_openai(system_prompt, document, seed = 42):
# messages = [{'role' : 'system', 'content' : system_prompt},
# {'role' : 'user', 'content' : document}]
# response = openai.ChatCompletion.create(
# engine = 'AskoGPT4-1106',
# messages = messages,
# seed = seed,
# temperature = 0.3,
# api_key = 'a033560cc39647da989fcce2910f6e84',
# api_version = '2023-07-01-preview',
# api_type = 'azure',
# api_base = 'https://asko-v1.openai.azure.com/'
# )
# return response.choices[0].message.content
def displayPDF(file):
# Opening file from file path
if isinstance(file, str):
file_bytes = open(file, 'rb').read()
else:
file_bytes = file
# with open(file, "rb") as f:
base64_pdf = base64.b64encode(file_bytes).decode('utf-8')
# Embedding PDF in HTML
pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
# Displaying File
st.markdown(pdf_display, unsafe_allow_html=True)
def view_saved_results():
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
files = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')]
file = st.sidebar.selectbox('Select Result to view', options = files)
if file is not None:
file = os.path.join(dirpath, file)
pdf_path = file.replace('.analysis.json', '.pdf')
displayPDF(pdf_path)
analysis = json.load(open(file))
if len(analysis) > 0:
for stage in analysis:
if stage['stage'] == 'EXTRACTION':
st.sidebar.json(stage['response']['processed'])
if stage['stage'] == 'POST_PROCESS':
st.sidebar.json(stage['response'])
if stage['stage'] == 'ANALYSE':
df = pd.DataFrame.from_records(stage['response'])
for verdict in ['Good', 'Average', 'Bad']:
df_tmp = df.loc[df['verdict'] == verdict]
if len(df_tmp) > 0:
st.sidebar.markdown(f'**{verdict}**')
st.sidebar.table(df_tmp)
def run():
file = st.sidebar.file_uploader('Upload PDF')
if file is not None:
file_bytes = file.getvalue()
displayPDF(file_bytes)
analysis = analyse(file_bytes)
st.json(analysis)
for stage in analysis:
if stage['stage']['ANALYSE']:
df = pd.DataFrame(stage['response'])
for verdict in ['Good', 'Average', 'Bad']:
df_tmp = df.loc[df['verdict'] == verdict]
if len(df_tmp) > 0:
st.sidebar.markdown(f'**{verdict}**')
st.sidebar.table(df_tmp)
def validate_results():
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
file = st.sidebar.selectbox('Select file to validation', options = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')])
if file is not None:
filepath = os.path.join(dirpath, file)
json_data = json.load(open(filepath))
if len(json_data) > 0:
for stage in json_data:
if stage['stage'] == 'EXTRACTION':
entities = stage['response']['processed']
for entity in entities:
entity.update(
{'entityValueQC' : entity['entityValue'], 'isRight' : False, 'QCRemarks' : ''}
)
df = pd.DataFrame.from_records(entities)
df = df[['isRight', 'entityName', 'entityValue', 'entityValueQC', 'QCRemarks']]
df = df.astype({'entityValueQC' : str})
df = st.data_editor(df)
displayPDF(filepath.replace('.analysis.json', '.pdf'))
print(df)
if st.button('Save?'):
df.to_csv(filepath.replace('.analysis.json', '.qc-entities.csv'), index = False)
def main():
view_results_page = st.Page(view_saved_results, title = 'View Save Results')
run_page = st.Page(run, title = 'Run your PDF')
qc = st.Page(validate_results, title = 'QC')
pages = st.navigation(
{
'View' : [view_results_page, qc],
'Run & Analyse' : [run_page]
}
)
pages.run()
if __name__ == '__main__':
main()