Spaces:
Running
Running
""" | |
Rules for Policy Analyser | |
""" | |
# Imports | |
import os | |
import glob | |
import json | |
from datetime import datetime | |
import base64 | |
import openai | |
import streamlit as st | |
import pandas as pd | |
from policy_analyser.analyse import analyse | |
st.set_page_config('Policy Analyser', '🧐', layout = 'wide') | |
# def call_openai(system_prompt, document, seed = 42): | |
# messages = [{'role' : 'system', 'content' : system_prompt}, | |
# {'role' : 'user', 'content' : document}] | |
# response = openai.ChatCompletion.create( | |
# engine = 'AskoGPT4-1106', | |
# messages = messages, | |
# seed = seed, | |
# temperature = 0.3, | |
# api_key = 'a033560cc39647da989fcce2910f6e84', | |
# api_version = '2023-07-01-preview', | |
# api_type = 'azure', | |
# api_base = 'https://asko-v1.openai.azure.com/' | |
# ) | |
# return response.choices[0].message.content | |
def displayPDF(file): | |
# Opening file from file path | |
if isinstance(file, str): | |
file_bytes = open(file, 'rb').read() | |
else: | |
file_bytes = file | |
# with open(file, "rb") as f: | |
base64_pdf = base64.b64encode(file_bytes).decode('utf-8') | |
# Embedding PDF in HTML | |
pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">' | |
# Displaying File | |
st.markdown(pdf_display, unsafe_allow_html=True) | |
def view_saved_results(): | |
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc' | |
files = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')] | |
file = st.sidebar.selectbox('Select Result to view', options = files) | |
if file is not None: | |
file = os.path.join(dirpath, file) | |
pdf_path = file.replace('.analysis.json', '.pdf') | |
displayPDF(pdf_path) | |
analysis = json.load(open(file)) | |
if len(analysis) > 0: | |
for stage in analysis: | |
if stage['stage'] == 'EXTRACTION': | |
st.sidebar.json(stage['response']['processed']) | |
if stage['stage'] == 'POST_PROCESS': | |
st.sidebar.json(stage['response']) | |
if stage['stage'] == 'ANALYSE': | |
df = pd.DataFrame.from_records(stage['response']) | |
for verdict in ['Good', 'Average', 'Bad']: | |
df_tmp = df.loc[df['verdict'] == verdict] | |
if len(df_tmp) > 0: | |
st.sidebar.markdown(f'**{verdict}**') | |
st.sidebar.table(df_tmp) | |
def run(): | |
file = st.sidebar.file_uploader('Upload PDF') | |
if file is not None: | |
file_bytes = file.getvalue() | |
displayPDF(file_bytes) | |
analysis = analyse(file_bytes) | |
st.json(analysis) | |
for stage in analysis: | |
if stage['stage']['ANALYSE']: | |
df = pd.DataFrame(stage['response']) | |
for verdict in ['Good', 'Average', 'Bad']: | |
df_tmp = df.loc[df['verdict'] == verdict] | |
if len(df_tmp) > 0: | |
st.sidebar.markdown(f'**{verdict}**') | |
st.sidebar.table(df_tmp) | |
def validate_results(): | |
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc' | |
file = st.sidebar.selectbox('Select file to validation', options = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')]) | |
if file is not None: | |
filepath = os.path.join(dirpath, file) | |
json_data = json.load(open(filepath)) | |
if len(json_data) > 0: | |
for stage in json_data: | |
if stage['stage'] == 'EXTRACTION': | |
entities = stage['response']['processed'] | |
for entity in entities: | |
entity.update( | |
{'entityValueQC' : entity['entityValue'], 'isRight' : False, 'QCRemarks' : ''} | |
) | |
df = pd.DataFrame.from_records(entities) | |
df = df[['isRight', 'entityName', 'entityValue', 'entityValueQC', 'QCRemarks']] | |
df = df.astype({'entityValueQC' : str}) | |
df = st.data_editor(df) | |
displayPDF(filepath.replace('.analysis.json', '.pdf')) | |
print(df) | |
if st.button('Save?'): | |
df.to_csv(filepath.replace('.analysis.json', '.qc-entities.csv'), index = False) | |
def main(): | |
view_results_page = st.Page(view_saved_results, title = 'View Save Results') | |
run_page = st.Page(run, title = 'Run your PDF') | |
qc = st.Page(validate_results, title = 'QC') | |
pages = st.navigation( | |
{ | |
'View' : [view_results_page, qc], | |
'Run & Analyse' : [run_page] | |
} | |
) | |
pages.run() | |
if __name__ == '__main__': | |
main() |