Sakshi
default prompt for suggest added
555dd1d
"""
Run analysis
@author : Sakshi Tantak
"""
# Imports
import os
from time import time
from datetime import datetime
from policy_analyser import PROMPTS_DIR, DATA_DIR
from policy_analyser.ocr import PyMuPDF4LLMOCR, AzureDocumentIntelligenceOCR
from policy_analyser.llm import call_openai
from policy_analyser.utils import markdown_table_to_json
class LOB:
def __init__(self, ocr_engine = 'open-source/pymupdf4llm'):
if ocr_engine == 'open-source/pymupdf4llm':
self.engine = PyMuPDF4LLMOCR()
elif ocr_engine == 'azure/layout':
self.engine = AzureDocumentIntelligenceOCR()
self.file_type = 'pdf'
with open(os.path.join(PROMPTS_DIR, 'analysis.txt'), 'r') as f:
self.analysis_prompt = f.read()
def __call__(self, file_bytes):
response = [
{
'stage' : 'OCR',
'response' : '',
'time' : 0
},
{
'stage' : 'ANALYSE',
'response' : '',
'time' : 0
},
{
'stage' : 'SUGGEST',
'response' : '',
'time' : 0
}
]
try:
print('OCR Started ...')
ocr_start = time()
if isinstance(file_bytes, str):
text = file_bytes
elif isinstance(file_bytes, (bytearray, bytes)):
text, _ = self.engine(file_bytes)
ocr_end = time()
print(f'OCR done [{ocr_end - ocr_start}]')
if len(text) > 0:
response[0].update({'response' : text, 'time' : ocr_end - ocr_start})
try:
print('Analysing ...')
analysis_start = time()
raw_response = self._analyse(text = text)
analysis_end = time()
print('Analysis : ', raw_response)
print(f'Analysed [{analysis_end - analysis_start}]')
if raw_response is not None and len(raw_response) > 0:
response[1].update({'response' : raw_response, 'time' : analysis_end - analysis_start})
try:
print('Suggesting our policy ...')
suggestion_start = time()
suggestion = self._suggest(analysis = raw_response)
suggestion_end = time()
print(f'Suggested [{suggestion_end - suggestion_start}]')
if suggestion is not None and len(suggestion) > 0:
response[2].update({'response' : suggestion, 'time' : suggestion_end - suggestion_start})
except Exception as sugg_e:
print(f'Exception while suggesting : {sugg_e}')
except Exception as analysis_e:
print(f'Exception while analysing : {analysis_e}')
except Exception as ocr_e:
print(f'Exception while OCR : {ocr_e}')
return response
def _analyse(self, **kwargs):
raise NotImplemented
def _suggest(self, **kwargs):
raise NotImplemented
class Health(LOB):
def __init__(self, ocr_engine = 'open-source/pymupdf4llm'):
super().__init__(ocr_engine)
with open(os.path.join(PROMPTS_DIR, 'health', 'analysis_output_format.txt'), 'r') as f:
self.analysis_output_format = f.read()
with open(os.path.join(PROMPTS_DIR, 'health', 'rules.txt'), 'r') as f:
self.rules = f.read()
with open(os.path.join(PROMPTS_DIR, 'health', 'suggest.txt'), 'r') as f:
self.suggest_prompt = f.read()
with open(os.path.join(DATA_DIR, 'health_policy.md'), 'r') as f:
self.acko_policy = f.read()
with open(os.path.join(DATA_DIR, 'health_super_topup.md'), 'r') as f:
self.acko_super_topup = f.read()
def _analyse(self, **kwargs):
text = kwargs.get('text')
if len(text) > 0:
prompt = self.analysis_prompt.replace('{{lob}}', 'health').replace('{{rules}}', self.rules).replace('{{output_format}}', self.analysis_output_format)
prompt += 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates"
response = call_openai(prompt)
if len(response) > 0:
return response
return ''
def _suggest(self, **kwargs):
analysis = kwargs.get('analysis')
if len(analysis) > 0:
bad_factors = markdown_table_to_json(analysis.split(f'<BAD>')[-1].split(f'</BAD>')[0].replace('## Bad Factors', ''))
bad_factor_names = [factor['Factor'] for factor in bad_factors]
avg_factors = markdown_table_to_json(analysis.split(f'<AVERAGE>')[-1].split(f'</AVERAGE>')[0].replace('## Average Factors', ''))
avg_factor_names = [factor['Factor'] for factor in bad_factors]
if len(bad_factors) > 3:
prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Policy : " + self.acko_policy + '\nNote : Super Top up is an additional policy the customer can buy to enhance the benefits along with their current policy'
print('selected policy')
elif len(avg_factors) > 3:
prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Policy : " + self.acko_policy + '\nNote : Super Top up is an additional policy the customer can buy to enhance the benefits along with their current policy'
print('selected policy')
elif len(bad_factors) < 3 and any(['sum insured' in factor.lower() for factor in bad_factor_names]):
prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Super Top-up Policy : " + self.acko_super_topup
print('selected super topup')
elif len(avg_factors) < 3 and any(['sum insured' in factor.lower() for factor in avg_factor_names]):
prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Super Top-up Policy : " + self.acko_super_topup
print('selected super topup')
else:
prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Policy : " + self.acko_policy + '\nNote : Super Top up is an additional policy the customer can buy to enhance the benefits along with their current policy'
# if len(bad_factors) < 3 and any(['sum insured' in factor.lower() for factor in bad_factor_names]) \
# or len(avg_factors) < 3 and any(['sum insured' in factor.lower() for factor in avg_factor_names]):
# prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Policy : " + self.acko_super_topup
# else:
# prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Super Top up Policy : " + self.acko_policy + '\nNote : Super Top up is an additional policy the customer can buy to enhance the benefits along with their current policy'
response = call_openai(prompt)
if len(response) > 0:
return response
return ''
def __call__(self, file_bytes):
return super().__call__(file_bytes)
if __name__ == '__main__':
import os
import json
import sys
from tqdm import tqdm
filepaths = sys.argv[1:]
health = Health()
for filepath in tqdm(filepaths):
# if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
# continue
# if '.analysis' in filepath or '.e2e-analysis' in filepath:
# continue
print(filepath)
if filepath.endswith('.pdf'):
file_bytes = open(filepath, 'rb').read()
elif filepath.endswith(('.txt', '.md')):
file_bytes = open(filepath).read()
end2end = True
analysis = health(file_bytes)
# print(analysis)
basepath = os.path.splitext(filepath)[0]
# if not end2end:
# with open(os.path.splitext(filepath)[0] + '.analysis.json', 'w') as f:
# json.dump(analysis, f, indent = 4)
# else:
# with open(os.path.splitext(filepath)[0] + '.o1-mini.e2e-analysis.json', 'w') as f:
# json.dump(analysis, f, indent = 4)
# with open(os.path.splitext(filepath)[0] + '.o1-mini.e2e-analysis.md', 'w') as f:
# f.write(analysis[1]['response'])