Spaces:

pikaduck
/

policy-analyser

Running

policy-analyser / policy_analyser /analyse_.py

Sakshi

arch lob agnostic

bef8e94 about 1 month ago

9.56 kB

	"""
	Run analysis
	@author : Sakshi Tantak
	"""

	# Imports
	from time import time
	from datetime import datetime

	from policy_analyser import ACKO_POLICY, ANALYSIS_PROMPT, SUGGESTION_PROMPT
	from policy_analyser.ocr import PyMuPDF4LLMOCR
	from policy_analyser.extraction import extract
	from policy_analyser.rules import prepare_payload, rules
	from policy_analyser.llm import call_openai

	# OCR = AzureLayoutOCR()
	OCR = PyMuPDF4LLMOCR()

	def analyse(file_bytes, end2end = False):
	print('OCR Started ...')
	ocr_start = time()
	if isinstance(file_bytes, str):
	text = file_bytes
	elif isinstance(file_bytes, (bytearray, bytes)):
	text, _ = OCR(file_bytes)
	ocr_end = time()
	print(f'OCR done [{ocr_end - ocr_start}]')
	if len(text) > 0:
	if not end2end:
	print('Extraction Started ...')
	ext_start = time()
	raw_response, entities = extract(text)
	ext_end = time()
	print(f'Extraction done [{ext_end - ext_start}]')
	if len(entities) > 0:
	print('Preparing payload for analysis ...')
	payload = prepare_payload(entities)
	print('Payload prepared for analysis')
	print('Analysing ...')
	analysis_start = time()
	analysis = rules(payload)
	analysis_end = time()
	print(f'Analysed [{analysis_end - analysis_start}]')
	print('Summarising ...')
	summary = {}
	summary_start = time()
	for verdict in ['Good', 'Average', 'Bad']:
	descriptions = '\n'.join([factor['reason'] for factor in analysis if factor['verdict'] == verdict])
	if len(descriptions) > 0:
	prompt = f"""Given the following analysis on the {verdict} factors of a customer's policy that they have bought, generate a crisp and catchy summary of the factors for a customer. Try to make it factor-wise with bullet points
	NOTE : THE POLICY WAS NOT SOLD BY US
	analysis : {descriptions}
	summary : """
	response = call_openai(prompt)
	print(response)
	else:
	response = ''
	summary[verdict] = response
	summary_end = time()
	# print(f'Summarised [{summary_end - summary_start}]')
	# factors_str = ''
	# for verdict in ['Good', 'Average', 'Bad']:
	# factors_str += verdict + ' Factors:'
	# factors_str += '\n' + '\n'.join([f"{factor['factor']}: {factor['reason']}" for factor in analysis if factor['verdict'] == verdict])
	# print('Suggesting ...')
	# suggestion_start = time()
	# suggestion = call_openai(f"""Given the following main factors and their values of a customer's health insurance policy, use these factors to compare with given Acko's health policy and suggest to the customer how the Average and Bad factors maybe covered better by Acko's policy.
	# Format response in less than 50 words and make it factor-wise. Try to format in points. Include emojis to make it catchy.
	# Customer Poliocy Factors:
	# {factors_str}

	# Acko Policy : {ACKO_POLICY}

	# Customer Suggestion : """)
	# suggestion_end = time()
	# print(f'Suggested [{suggestion_end - suggestion_start}]')
	response = [
	{
	'stage' : 'OCR',
	'response' : text,
	'time' : ocr_end - ocr_start
	},
	{
	'stage' : 'EXTRACTION',
	'response' : {
	'raw' : raw_response,
	'processed' : entities
	},
	'time' : ext_end - ext_start
	},
	{
	'stage' : 'POST_PROCESS',
	'response' : payload,
	'time' : 0
	},
	{
	'stage' : 'ANALYSE',
	'response' : analysis,
	'time' : analysis_end - analysis_start
	},
	{
	'stage' : 'ANALYSIS_SUMMARY',
	'response' : summary,
	'time' : summary_end - summary_start
	},
	# {
	# 'stage' : 'SUGGEST',
	# 'response' : suggestion,
	# 'time' : suggestion_end - suggestion_start
	# }
	]
	return response

	response = [
	{
	'stage' : 'OCR',
	'response' : text,
	'time' : 0
	},
	{
	'stage' : 'EXTRACTION',
	'response' : {
	'raw' : '',
	'processed' : []
	},
	'time' : 0
	},
	{
	'stage' : 'POST_PROCESS',
	'response' : {},
	'time' : 0
	},
	{
	'stage' : 'ANALYSE',
	'response' : [],
	'time' : 0
	},
	{
	'stage' : 'ANALYSIS_SUMMARY',
	'response' : {'Good' : '', 'Average' : '', 'Bad' : ''},
	'time' : 0
	},
	# {
	# 'stage' : 'SUGGEST',
	# 'response' : '',
	# 'time' : 0
	# }
	]
	return response

	else:
	response = [
	{
	'stage' : 'OCR',
	'response' : text,
	'time' : ocr_end - ocr_start
	}
	]
	try:
	print('Analysing ...')
	analysis_start = time()
	raw_response = call_openai(ANALYSIS_PROMPT + 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates")
	analysis_end = time()
	print('Analysis : ', raw_response)
	print(f'Analysed [{analysis_end - analysis_start}]')
	if raw_response is not None:
	response.append(
	{
	'stage' : 'ANALYSE',
	'response' : raw_response,
	'time' : analysis_end - analysis_start
	}
	)
	print('Suggesting our policy ...')
	suggestion_start = time()
	suggestion = call_openai(SUGGESTION_PROMPT + "\nCustomer Policy Analysis : " + raw_response + "\nAcko's Policy : " + ACKO_POLICY)
	suggestion_end = time()
	print(f'Suggested [{suggestion_end - suggestion_start}]')
	if suggestion is not None:
	response.append({
	'stage' : 'SUGGEST',
	'response' : suggestion,
	'time' : suggestion_end - suggestion_start
	}
	)
	return response
	except Exception as e:
	print(e)
	response.extend(
	[
	{
	'stage' : 'ANALYSE',
	'response' : '',
	'time' : 0
	},
	{
	'stage' : 'SUGGEST',
	'response' : '',
	'time' : 0
	}
	]
	)
	return response

	if __name__ == '__main__':
	import os
	import json
	import sys
	from tqdm import tqdm
	filepaths = sys.argv[1:]

	for filepath in tqdm(filepaths):
	# if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
	# continue
	if '.analysis' in filepath or '.e2e-analysis' in filepath:
	continue
	print(filepath)
	if filepath.endswith('.pdf'):
	file_bytes = open(filepath, 'rb').read()
	elif filepath.endswith(('.txt', '.md')):
	file_bytes = open(filepath).read()
	end2end = True
	analysis = analyse(file_bytes, True)
	# print(analysis)
	basepath = os.path.splitext(filepath)[0]
	if not end2end:
	with open(os.path.splitext(filepath)[0] + '.analysis.json', 'w') as f:
	json.dump(analysis, f, indent = 4)
	else:
	with open(os.path.splitext(filepath)[0] + '.e2e-analysis.json', 'w') as f:
	json.dump(analysis, f, indent = 4)
	with open(os.path.splitext(filepath)[0] + '.e2e-analysis.md', 'w') as f:
	f.write(analysis[1]['response'])