Sakshi commited on
Commit
0106d5f
·
1 Parent(s): 3586943

policy analyser app

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ *.pycache
3
+ *.pyc
4
+ *.env
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+
5
+ from utils import validate_pdf
6
+ from styles import apply_custom_styles
7
+ from policy_analyser.analyse import analyse
8
+
9
+ if 'GPT_KEY' not in os.environ or os.environ.get('GPT_KEY') in [None, '']:
10
+ os.environ['GPT_KEY'] = st.secrets['GPT_KEY']
11
+
12
+ def main():
13
+ # Apply custom styles
14
+ apply_custom_styles()
15
+
16
+ # Header
17
+ st.markdown("""
18
+ <div class="header-container">
19
+ <img src="https://acko-brand.ackoassets.com/brand/vector-svg/gradient/horizontal-reverse.svg" height=50 width=100>
20
+ <h1>Insurance Policy Analyzer</h1>
21
+ <p>Upload and compare insurance policies</p>
22
+ </div>
23
+ """, unsafe_allow_html=True)
24
+
25
+ # File upload section
26
+ st.markdown('<div class="upload-container">', unsafe_allow_html=True)
27
+ uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
28
+ print(uploaded_files)
29
+ st.markdown('</div>', unsafe_allow_html=True)
30
+
31
+ if uploaded_files and st.button('Analyse'):
32
+ # Create tabs for different views
33
+ tab1, tab2 = st.tabs(["Summary View", "Detailed Comparison"])
34
+
35
+ # Store analysis results
36
+ all_analyses = []
37
+
38
+ # Process each uploaded file
39
+ for uploaded_file in uploaded_files:
40
+ # Read PDF content
41
+ pdf_bytes = uploaded_file.read()
42
+ # displayPDF(pdf_bytes)
43
+
44
+ # Validate PDF
45
+ if not validate_pdf(pdf_bytes):
46
+ st.error(f"Invalid PDF file: {uploaded_file.name}")
47
+ continue
48
+
49
+ # Show loading state
50
+ with st.spinner(f"Analyzing {uploaded_file.name}..."):
51
+ try:
52
+ # Make API call
53
+ response = analyse(pdf_bytes, True)
54
+ analysis = next(
55
+ (item for item in response if item.get("stage") == "ANALYSE"), None
56
+ )['response']
57
+ analysis = analysis.split('[CUSTOMER_RESPONSE]')[-1].split('[/CUSTOMER_RESPONSE]')[0]
58
+ suggestion = next(
59
+ (item for item in response if item.get("stage") == "SUGGEST"), None
60
+ )['response']
61
+ suggestion = suggestion.split('[POLICY_PITCH]')[-1].split('[/POLICY_PITCH]')[0]
62
+ # Store results
63
+ all_analyses.append({
64
+ 'name': uploaded_file.name,
65
+ 'analysis' : analysis,
66
+ 'suggestion' : suggestion
67
+ })
68
+
69
+ except Exception as e:
70
+ st.error(f"Error analyzing {uploaded_file.name}: {str(e)}")
71
+
72
+ # Summary View Tab
73
+ with tab1:
74
+ for idx, analysis in enumerate(all_analyses):
75
+ with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
76
+ with st.container():
77
+ st.markdown(analysis['analysis'])
78
+ with st.container():
79
+ st.markdown('# Why Acko? 🚀')
80
+ st.markdown(analysis['suggestion'])
81
+
82
+ # Detailed Comparison Tab
83
+ with tab2:
84
+ if len(all_analyses) > 1:
85
+ # Create comparison matrix
86
+ factors_to_compare = set()
87
+ for analysis in all_analyses:
88
+ factors_to_compare.update(
89
+ [f.split(':')[0] for f in analysis['good_factors'] +
90
+ analysis['average_factors'] + analysis['bad_factors']]
91
+ )
92
+
93
+ # Create comparison table
94
+ st.markdown("### Policy Comparison Matrix")
95
+
96
+ comparison_data = []
97
+ for factor in sorted(factors_to_compare):
98
+ row = {'Factor': factor}
99
+ for idx, analysis in enumerate(all_analyses):
100
+ policy_name = f"Policy {idx + 1}"
101
+ verdict = 'Not Found'
102
+ for category in ['good_factors', 'average_factors', 'bad_factors']:
103
+ for item in analysis[category]:
104
+ if item.split(':')[0] == factor:
105
+ verdict = category.split('_')[0].title()
106
+ break
107
+ row[policy_name] = verdict
108
+ comparison_data.append(row)
109
+
110
+ # Display comparison table
111
+ st.table(comparison_data)
112
+ else:
113
+ st.info("Upload multiple policies to see comparison")
114
+
115
+ # Footer
116
+ st.markdown("""
117
+ <div style="margin-top: 50px; text-align: center; color: #666;">
118
+ <p>Upload one or more insurance policy PDFs to get detailed analysis and comparison.</p>
119
+ <p>We support all major insurance providers.</p>
120
+ </div>
121
+ """, unsafe_allow_html=True)
122
+
123
+ if __name__ == "__main__":
124
+ st.set_page_config(
125
+ page_title="Insurance Policy Analyzer",
126
+ page_icon="📋",
127
+ layout="wide"
128
+ )
129
+ main()
main.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import io
3
+ from utils import mock_api_call, parse_analysis_response, validate_pdf, displayPDF
4
+ from styles import apply_custom_styles, show_factor_section, show_detailed_factors, show_factor_summary
5
+
6
+ from policy_analyser.analyse import analyse
7
+
8
+ def main():
9
+ # Apply custom styles
10
+ apply_custom_styles()
11
+
12
+ # Header
13
+ st.markdown("""
14
+ <div class="header-container">
15
+ <img src="https://acko-brand.ackoassets.com/brand/vector-svg/gradient/horizontal-reverse.svg" height=50 width=100>
16
+ <h1>Insurance Policy Analyzer</h1>
17
+ <p>Upload and compare insurance policies</p>
18
+ </div>
19
+ """, unsafe_allow_html=True)
20
+
21
+ # File upload section
22
+ st.markdown('<div class="upload-container">', unsafe_allow_html=True)
23
+ uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
24
+ st.markdown('</div>', unsafe_allow_html=True)
25
+
26
+ if uploaded_files:
27
+ # Create tabs for different views
28
+ tab1, tab2 = st.tabs(["Summary View", "Detailed Comparison"])
29
+
30
+ # Store analysis results
31
+ all_analyses = []
32
+
33
+ # Process each uploaded file
34
+ for uploaded_file in uploaded_files:
35
+ # Read PDF content
36
+ pdf_bytes = uploaded_file.read()
37
+ # displayPDF(pdf_bytes)
38
+
39
+ # Validate PDF
40
+ if not validate_pdf(pdf_bytes):
41
+ st.error(f"Invalid PDF file: {uploaded_file.name}")
42
+ continue
43
+
44
+ # Show loading state
45
+ with st.spinner(f"Analyzing {uploaded_file.name}..."):
46
+ try:
47
+ # Make API call
48
+ response = analyse(pdf_bytes)
49
+ print(response)
50
+ summary = next(
51
+ (item for item in response if item.get("stage") == "ANALYSIS_SUMMARY"), None
52
+ )['response']
53
+
54
+ # Parse response
55
+ good_factors, average_factors, bad_factors = parse_analysis_response(response)
56
+
57
+ # Store results
58
+ all_analyses.append({
59
+ 'name': uploaded_file.name,
60
+ 'good_factors': good_factors,
61
+ 'average_factors': average_factors,
62
+ 'bad_factors': bad_factors
63
+ })
64
+
65
+ except Exception as e:
66
+ st.error(f"Error analyzing {uploaded_file.name}: {str(e)}")
67
+
68
+ # Summary View Tab
69
+ with tab1:
70
+ for idx, analysis in enumerate(all_analyses):
71
+ with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
72
+ with st.container():
73
+ cols = st.columns(2)
74
+ displayed = 0
75
+ for verdict in ['Good', 'Average', 'Bad']:
76
+ lst = [f.split(':')[0] for f in analysis[f'{verdict.lower()}_factors']]
77
+ if len(lst) > 0:
78
+ title = f'{verdict} Factors'
79
+ if verdict == 'Good':
80
+ title += '✅'
81
+ sentiment = 'Yay!'
82
+ elif verdict == 'Average':
83
+ title += '⚠️'
84
+ sentiment = 'Hmmmm'
85
+ else:
86
+ title += '❌'
87
+ sentiment = 'Meh'
88
+ cols = st.columns(2)
89
+ with st.container():
90
+ with cols[0]:
91
+ if displayed % 3 == 0:
92
+ show_factor_section(title, lst, verdict.lower())
93
+ else:
94
+ show_factor_summary(summary[verdict], verdict.lower(), sentiment)
95
+ with cols[1]:
96
+ if displayed % 3 == 0:
97
+ show_factor_summary(summary[verdict], verdict.lower(), sentiment)
98
+ else:
99
+ show_factor_section(title, lst, verdict.lower())
100
+ displayed += 1
101
+
102
+ st.markdown('-----')
103
+ # with st.container():
104
+ # suggestion = next((item for item in response if item.get("stage") == "SUGGEST"), None)['response']
105
+ # show_factor_summary(suggestion, 'Good', 'What we have ⛑️')
106
+
107
+ # Detailed Comparison Tab
108
+ with tab2:
109
+ if len(all_analyses) > 1:
110
+ # Create comparison matrix
111
+ factors_to_compare = set()
112
+ for analysis in all_analyses:
113
+ factors_to_compare.update(
114
+ [f.split(':')[0] for f in analysis['good_factors'] +
115
+ analysis['average_factors'] + analysis['bad_factors']]
116
+ )
117
+
118
+ # Create comparison table
119
+ st.markdown("### Policy Comparison Matrix")
120
+
121
+ comparison_data = []
122
+ for factor in sorted(factors_to_compare):
123
+ row = {'Factor': factor}
124
+ for idx, analysis in enumerate(all_analyses):
125
+ policy_name = f"Policy {idx + 1}"
126
+ verdict = 'Not Found'
127
+ for category in ['good_factors', 'average_factors', 'bad_factors']:
128
+ for item in analysis[category]:
129
+ if item.split(':')[0] == factor:
130
+ verdict = category.split('_')[0].title()
131
+ break
132
+ row[policy_name] = verdict
133
+ comparison_data.append(row)
134
+
135
+ # Display comparison table
136
+ st.table(comparison_data)
137
+ else:
138
+ st.info("Upload multiple policies to see comparison")
139
+
140
+ # Footer
141
+ st.markdown("""
142
+ <div style="margin-top: 50px; text-align: center; color: #666;">
143
+ <p>Upload one or more insurance policy PDFs to get detailed analysis and comparison.</p>
144
+ <p>We support all major insurance providers.</p>
145
+ </div>
146
+ """, unsafe_allow_html=True)
147
+
148
+ if __name__ == "__main__":
149
+ st.set_page_config(
150
+ page_title="Insurance Policy Analyzer",
151
+ page_icon="📋",
152
+ layout="wide"
153
+ )
154
+ main()
policy_analyser/__init__.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from dotenv import load_dotenv
4
+
5
+ try:
6
+ load_dotenv('.env')
7
+ except:
8
+ pass
9
+
10
+ PACKAGE = 'policy_analyser'
11
+ PROJECT_DIR = os.getcwd()
12
+ PACKAGE_PATH = os.path.join(PROJECT_DIR, PACKAGE)
13
+ PROMPTS_DIR = os.path.join(PACKAGE_PATH, 'prompts')
14
+ DATA_DIR = os.path.join(PACKAGE_PATH, 'data')
15
+
16
+ CREDENTIALS = {
17
+ 'azure' : {
18
+ 'plain-text' : {
19
+ 'endpoint' : os.environ.get('AZURE_PLAIN_TEXT_ENDPOINT', ''),
20
+ 'key' : os.environ.get('AZURE_PLAIN_TEXT_KEY')
21
+ },
22
+ 'layout' : {
23
+ 'endpoint' : os.environ.get('AZURE_LAYOUT_ENDPOINT', ''),
24
+ 'key' : os.environ.get('AZURE_LAYOUT_KEY', ''),
25
+ 'model' : os.environ.get('AZURE_LAYOUT_MODEL', '')
26
+ }
27
+ }
28
+ }
29
+
30
+ GPT_ENGINE = 'o3-mini'
31
+ GPT_KEY = os.environ.get('GPT_KEY', '')
32
+ GPT_VERSION = '2024-12-01-preview'
33
+ GPT_API_BASE = 'https://ai-ackods910341544474.openai.azure.com/'
34
+
35
+ EXTRACTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'extraction.txt')).read()
36
+ entities = json.load(open(os.path.join(DATA_DIR, 'policy_analyser_entities.json')))
37
+ for entity in entities:
38
+ del entity['entityId']
39
+ entities_str = '\n---\n'.join(['\n'.join([f'{k} : {v}' for k, v in entity.items()]) for entity in entities])
40
+ EXTRACTION_PROMPT += entities_str
41
+ ANALYSIS_PROMPT = open(os.path.join(PROMPTS_DIR, 'analysis.txt')).read().strip()
42
+ SUGGESTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'suggest.txt')).read().strip()
43
+ ACKO_POLICY = open(os.path.join(DATA_DIR, 'Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md')).read()
policy_analyser/analyse.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Run analysis
3
+ @author : Sakshi Tantak
4
+ """
5
+
6
+ # Imports
7
+ from time import time
8
+ from datetime import datetime
9
+
10
+ from policy_analyser import ACKO_POLICY, ANALYSIS_PROMPT, SUGGESTION_PROMPT
11
+ from policy_analyser.ocr import AzureLayoutOCR, PyMuPDF4LLMOCR
12
+ from policy_analyser.extraction import extract
13
+ from policy_analyser.rules import prepare_payload, rules
14
+ from policy_analyser.llm import call_openai
15
+
16
+ # OCR = AzureLayoutOCR()
17
+ OCR = PyMuPDF4LLMOCR()
18
+
19
+ def analyse(file_bytes, end2end = False):
20
+ print('OCR Started ...')
21
+ ocr_start = time()
22
+ if isinstance(file_bytes, str):
23
+ text = file_bytes
24
+ elif isinstance(file_bytes, (bytearray, bytes)):
25
+ text, _ = OCR(file_bytes)
26
+ ocr_end = time()
27
+ print(f'OCR done [{ocr_end - ocr_start}]')
28
+ if len(text) > 0:
29
+ if not end2end:
30
+ print('Extraction Started ...')
31
+ ext_start = time()
32
+ raw_response, entities = extract(text)
33
+ ext_end = time()
34
+ print(f'Extraction done [{ext_end - ext_start}]')
35
+ if len(entities) > 0:
36
+ print('Preparing payload for analysis ...')
37
+ payload = prepare_payload(entities)
38
+ print('Payload prepared for analysis')
39
+ print('Analysing ...')
40
+ analysis_start = time()
41
+ analysis = rules(payload)
42
+ analysis_end = time()
43
+ print(f'Analysed [{analysis_end - analysis_start}]')
44
+ print('Summarising ...')
45
+ summary = {}
46
+ summary_start = time()
47
+ for verdict in ['Good', 'Average', 'Bad']:
48
+ descriptions = '\n'.join([factor['reason'] for factor in analysis if factor['verdict'] == verdict])
49
+ if len(descriptions) > 0:
50
+ prompt = f"""Given the following analysis on the {verdict} factors of a customer's policy that they have bought, generate a crisp and catchy summary of the factors for a customer. Try to make it factor-wise with bullet points
51
+ NOTE : THE POLICY WAS NOT SOLD BY US
52
+ analysis : {descriptions}
53
+ summary : """
54
+ response = call_openai(prompt)
55
+ print(response)
56
+ else:
57
+ response = ''
58
+ summary[verdict] = response
59
+ summary_end = time()
60
+ # print(f'Summarised [{summary_end - summary_start}]')
61
+ # factors_str = ''
62
+ # for verdict in ['Good', 'Average', 'Bad']:
63
+ # factors_str += verdict + ' Factors:'
64
+ # factors_str += '\n' + '\n'.join([f"{factor['factor']}: {factor['reason']}" for factor in analysis if factor['verdict'] == verdict])
65
+ # print('Suggesting ...')
66
+ # suggestion_start = time()
67
+ # suggestion = call_openai(f"""Given the following main factors and their values of a customer's health insurance policy, use these factors to compare with given Acko's health policy and suggest to the customer how the Average and Bad factors maybe covered better by Acko's policy.
68
+ # Format response in less than 50 words and make it factor-wise. Try to format in points. Include emojis to make it catchy.
69
+ # Customer Poliocy Factors:
70
+ # {factors_str}
71
+
72
+ # Acko Policy : {ACKO_POLICY}
73
+
74
+ # Customer Suggestion : """)
75
+ # suggestion_end = time()
76
+ # print(f'Suggested [{suggestion_end - suggestion_start}]')
77
+ response = [
78
+ {
79
+ 'stage' : 'OCR',
80
+ 'response' : text,
81
+ 'time' : ocr_end - ocr_start
82
+ },
83
+ {
84
+ 'stage' : 'EXTRACTION',
85
+ 'response' : {
86
+ 'raw' : raw_response,
87
+ 'processed' : entities
88
+ },
89
+ 'time' : ext_end - ext_start
90
+ },
91
+ {
92
+ 'stage' : 'POST_PROCESS',
93
+ 'response' : payload,
94
+ 'time' : 0
95
+ },
96
+ {
97
+ 'stage' : 'ANALYSE',
98
+ 'response' : analysis,
99
+ 'time' : analysis_end - analysis_start
100
+ },
101
+ {
102
+ 'stage' : 'ANALYSIS_SUMMARY',
103
+ 'response' : summary,
104
+ 'time' : summary_end - summary_start
105
+ },
106
+ # {
107
+ # 'stage' : 'SUGGEST',
108
+ # 'response' : suggestion,
109
+ # 'time' : suggestion_end - suggestion_start
110
+ # }
111
+ ]
112
+ return response
113
+
114
+ response = [
115
+ {
116
+ 'stage' : 'OCR',
117
+ 'response' : text,
118
+ 'time' : 0
119
+ },
120
+ {
121
+ 'stage' : 'EXTRACTION',
122
+ 'response' : {
123
+ 'raw' : '',
124
+ 'processed' : []
125
+ },
126
+ 'time' : 0
127
+ },
128
+ {
129
+ 'stage' : 'POST_PROCESS',
130
+ 'response' : {},
131
+ 'time' : 0
132
+ },
133
+ {
134
+ 'stage' : 'ANALYSE',
135
+ 'response' : [],
136
+ 'time' : 0
137
+ },
138
+ {
139
+ 'stage' : 'ANALYSIS_SUMMARY',
140
+ 'response' : {'Good' : '', 'Average' : '', 'Bad' : ''},
141
+ 'time' : 0
142
+ },
143
+ # {
144
+ # 'stage' : 'SUGGEST',
145
+ # 'response' : '',
146
+ # 'time' : 0
147
+ # }
148
+ ]
149
+ return response
150
+
151
+ else:
152
+ response = [
153
+ {
154
+ 'stage' : 'OCR',
155
+ 'response' : text,
156
+ 'time' : ocr_end - ocr_start
157
+ }
158
+ ]
159
+ try:
160
+ print('Analysing ...')
161
+ analysis_start = time()
162
+ raw_response = call_openai(ANALYSIS_PROMPT + 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates")
163
+ analysis_end = time()
164
+ print(f'Analysed [{analysis_end - analysis_start}]')
165
+ if raw_response is not None:
166
+ response.append(
167
+ {
168
+ 'stage' : 'ANALYSE',
169
+ 'response' : raw_response,
170
+ 'time' : analysis_end - analysis_start
171
+ }
172
+ )
173
+ print('Suggesting our policy ...')
174
+ suggestion_start = time()
175
+ suggestion = call_openai(SUGGESTION_PROMPT + "\nCustomer Policy Analysis : " + raw_response + "\nAcko's Policy : " + ACKO_POLICY)
176
+ suggestion_end = time()
177
+ print(f'Suggested [{suggestion_end - suggestion_start}]')
178
+ if suggestion is not None:
179
+ response.append({
180
+ 'stage' : 'SUGGEST',
181
+ 'response' : suggestion,
182
+ 'time' : suggestion_end - suggestion_start
183
+ }
184
+ )
185
+ return response
186
+ except Exception as e:
187
+ print(e)
188
+ response.extend(
189
+ [
190
+ {
191
+ 'stage' : 'ANALYSE',
192
+ 'response' : '',
193
+ 'time' : 0
194
+ },
195
+ {
196
+ 'stage' : 'SUGGEST',
197
+ 'response' : '',
198
+ 'time' : 0
199
+ }
200
+ ]
201
+ )
202
+ return response
203
+
204
+ if __name__ == '__main__':
205
+ import os
206
+ import json
207
+ import sys
208
+ from tqdm import tqdm
209
+ filepaths = sys.argv[1:]
210
+
211
+ for filepath in tqdm(filepaths):
212
+ # if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
213
+ # continue
214
+ if '.analysis' in filepath or '.e2e-analysis' in filepath:
215
+ continue
216
+ print(filepath)
217
+ if filepath.endswith('.pdf'):
218
+ file_bytes = open(filepath, 'rb').read()
219
+ elif filepath.endswith(('.txt', '.md')):
220
+ file_bytes = open(filepath).read()
221
+ end2end = True
222
+ analysis = analyse(file_bytes, True)
223
+ # print(analysis)
224
+ basepath = os.path.splitext(filepath)[0]
225
+ if not end2end:
226
+ with open(os.path.splitext(filepath)[0] + '.analysis.json', 'w') as f:
227
+ json.dump(analysis, f, indent = 4)
228
+ else:
229
+ with open(os.path.splitext(filepath)[0] + '.e2e-analysis.json', 'w') as f:
230
+ json.dump(analysis, f, indent = 4)
231
+ with open(os.path.splitext(filepath)[0] + '.e2e-analysis.md', 'w') as f:
232
+ f.write(analysis[1]['response'])
policy_analyser/data/Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md ADDED
The diff for this file is too large to render. See raw diff
 
policy_analyser/data/__init__.py ADDED
File without changes
policy_analyser/data/policy_analyser_entities.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "entityId": 0,
4
+ "entityName": "Sum Insured (SI)",
5
+ "entityDesc": "Total sum insured according to selected insurance plan",
6
+ "expectedOutputFormat": "float"
7
+ },
8
+ {
9
+ "entityId": 1,
10
+ "entityName": "Pre-existing diseases (PED) Waiting period",
11
+ "entityDesc": "Waiting period on any pre-existing diseases after the policy start date for the selected insurance plan",
12
+ "expectedOutputFormat": "number of months in float"
13
+ },
14
+ {
15
+ "entityId": 2,
16
+ "entityName": "30-Day Waiting Period",
17
+ "entityDesc": "Is there waiting period on first 30 days after the policy start date for selected insurance plan?",
18
+ "expectedOutputFormat": "true/false"
19
+ },
20
+ {
21
+ "entityId": 3,
22
+ "entityName": "Specific Illness Waiting Period",
23
+ "entityDesc": "Waiting period on any specific illnesses after the policy start date for selected insurance plan",
24
+ "expectedOutputFormat": "number of months in float"
25
+ },
26
+ {
27
+ "entityId": 4,
28
+ "entityName": "Maternity waiting period",
29
+ "entityDesc": "Waiting period on maternity treatments and causes after policy start date for selected insurance plan",
30
+ "expectedOutputFormat": "number of months in float"
31
+ },
32
+ {
33
+ "entityId": 5,
34
+ "entityName": "Exclusions",
35
+ "entityDesc": "Diseases/procedures/treatments etc that are excluded from coverage of the policy under selected insurance plan",
36
+ "expectedOutputFormat": "stringified JSON list"
37
+ },
38
+ {
39
+ "entityId": 6,
40
+ "entityName": "Maternity benefits",
41
+ "entityDesc": "Are there any additional benefits for maternity illnesses, treatments or procedures under selected insurance plan?",
42
+ "expectedOutputFormat": "true/false"
43
+ },
44
+ {
45
+ "entityId": 7,
46
+ "entityName": "OPD",
47
+ "entityDesc": "Amount covering outpatient consultations like physician visits, medicines, etc for the customer under selected insurance plan",
48
+ "expectedOutputFormat": "float"
49
+ },
50
+ {
51
+ "entityId": 8,
52
+ "entityName": "Copay",
53
+ "entityDesc": "% of amount that a customer must bear themselves and remainder is paid by the insurer under selected insurance plan",
54
+ "expectedOutputFormat": "float"
55
+ },
56
+ {
57
+ "entityId": 9,
58
+ "entityName": "Deductible",
59
+ "entityDesc": "Amount above which the Sum Insured can be claimed in case of super topup for selected insurance plan",
60
+ "expectedOutputFormat": "float"
61
+ },
62
+ {
63
+ "entityId": 10,
64
+ "entityName": "Daycare treatment",
65
+ "entityDesc": "Daycare treatments and procedures where hospitalization may be required for < 24 hours under selected insurance plan",
66
+ "expectedOutputFormat": "stringified JSON list"
67
+ },
68
+ {
69
+ "entityId": 11,
70
+ "entityName": "Free Health checkup",
71
+ "entityDesc": "Are free full body health checkups offered under selected insurance plan?",
72
+ "expectedOutputFormat": "true/false"
73
+ },
74
+ {
75
+ "entityId": 12,
76
+ "entityName": "Restoration benefit",
77
+ "entityDesc": "Does insured sum get restored after if it gets exhausted for selected insurance plan?",
78
+ "expectedOutputFormat": "true/false"
79
+ },
80
+ {
81
+ "entityId": 13,
82
+ "entityName": "Sublimits",
83
+ "entityDesc": "Coverage amount on a claim for specific diseases and treatments under selected insurance plan. Extract all sublimits as JSON list of dictionaries of following schema: [{\"sublimit_name\": \"Name of sublimit\", \"sublimit_value\": Value of sublimit in float}]",
84
+ "expectedOutputFormat": "stringified JSON list"
85
+ },
86
+ {
87
+ "entityId": 14,
88
+ "entityName": "Room rent limit (proportionate deduction)",
89
+ "entityDesc": "% of total sum insured or amount of total sum insured that insurer shall pay for hospitalisation room rent for selected insurance plan. If given upto SI, find the amount or percentage.",
90
+ "expectedOutputFormat": "float or string"
91
+ },
92
+ {
93
+ "entityId": 15,
94
+ "entityName": "Pre & Post Hospitalization",
95
+ "entityDesc": "Are medical expenses leading to and after hospitalization covered under selected insurance plan?",
96
+ "expectedOutputFormat": "true/false"
97
+ },
98
+ {
99
+ "entityId": 16,
100
+ "entityName": "Domiciliary Cover",
101
+ "entityDesc": "Is home treatment covered under selected insurance plan?",
102
+ "expectedOutputFormat": "true/false"
103
+ },
104
+ {
105
+ "entityId": 17,
106
+ "entityName": "No claim bonus",
107
+ "entityDesc": "Reward offered to customer for selected insurance plan for not raising any claims in the past",
108
+ "expectedOutputFormat": "float"
109
+ },
110
+ {
111
+ "entityId": 18,
112
+ "entityName": "Ambulance cover",
113
+ "entityDesc": "Coverage for ambulance charges under selected insurance plan",
114
+ "expectedOutputFormat": "float"
115
+ },
116
+ {
117
+ "entityId": 19,
118
+ "entityName": "International coverage",
119
+ "entityDesc": "Are treatments administered outside India covered under selected insurance plan?",
120
+ "expectedOutputFormat": "true/false"
121
+ },
122
+ {
123
+ "entityId": 20,
124
+ "entityName": "Dental treatment",
125
+ "entityDesc": "Coverage for dental treatments and procedures under selected insurance plan",
126
+ "expectedOutputFormat": "float"
127
+ },
128
+ {
129
+ "entityId": 21,
130
+ "entityName": "AYUSH treatment",
131
+ "entityDesc": "Are Ayurvedic, Homeopathic and other alternative treatments covered under selected insurance plan?",
132
+ "expectedOutputFormat": "true/false"
133
+ },
134
+ {
135
+ "entityId": 22,
136
+ "entityName": "Health incentives",
137
+ "entityDesc": "Are any benefits for healthy habits offered under selected insurance plan?",
138
+ "expectedOutputFormat": "true/false"
139
+ },
140
+ {
141
+ "entityId": 23,
142
+ "entityName": "Wellness Services",
143
+ "entityDesc": "Are any complementary services and benefits offered under selected insurance plan?",
144
+ "expectedOutputFormat": "true/false"
145
+ },
146
+ {
147
+ "entityId": 24,
148
+ "entityName": "Consumables/ Non medical expenses",
149
+ "entityDesc": "Are expenses related to consumables like syringes, bandages, etc in the course of treatment covered under selected insurance plan?",
150
+ "expectedOutputFormat": "true/false"
151
+ },
152
+ {
153
+ "entityId": 25,
154
+ "entityName": "Hospital Cash",
155
+ "entityDesc": "Are amounts paid in hospital bills reimbursed under selected insurance plan?",
156
+ "expectedOutputFormat": "true/false"
157
+ },
158
+ {
159
+ "entityId": 26,
160
+ "entityName": "Policy Holder's Details",
161
+ "entityDesc": "Details of primary policy holder as a JSON dictionary of the following schema : {\"name\": \"Name of primary policy holder\", \"age\": \"Age of primary policy holder in float number\", \"date_of_birth\": \"Date of birth of primary policy holder in dd/mm/yyyy format\", \"address\": \"Residential address of primary policy holder\", \"city\": \"City of residence of primary policy holder\", \"email_address\": \"Email address of primary policy holder\", \"phone_number\": \"Phone number of primary policy holder in integer or string\", \"pan\": \"Permanent Account Number (PAN) of primary policy holder\"}",
162
+ "expectedOutputFormat": "stringified JSON dictionary"
163
+ },
164
+ {
165
+ "entityId": 27,
166
+ "entityName": "Insured Persons details",
167
+ "entityDesc": "Details of insured persons as a JSON list of dictionaries of the following schema : [{\"name\": \"Name of Insured person\", \"age\": \"Age of Insured person in float\", \"date_of_birth\": \"Date of birth of Insured person in dd/mm/yyyy format\", \"gender\": \"Gender of Insured person, one of [Male, Female, Other]\", \"relation\": \"Relationship of insured person with policy holder, one of [Self, Spouse, Mother, Father, Son, Daughter, Mother-in-law, Father-in-law]\", \"pre_existing_diseases\": \"Pre-existing diseases that Insured person has. Separate several diseases with comma in JSON list\"}]",
168
+ "expectedOutputFormat": "stringified JSON list"
169
+ },
170
+ {
171
+ "entityId": 28,
172
+ "entityName": "Policy Details",
173
+ "entityDesc": "Details of the selected insurance plan policy issued to customer as a JSON dictionary of the following schema : {\"insurance_company_name\": \"Name of the insurance company that has issued the given insurance policy\", \"policy_name\": \"Name of the given insurance policy as string\", \"policy_number\": \"Policy number of the given insurance policy\", \"date_of_issue\": \"Date on which the policy was issued to the policy holder in dd/mm/yyy format\", \"policy_start_date\": \"Date on which policy activated in dd/mm/yyyy format\", \"policy_end_date\": \"Date on which policy expires in dd/mm/yyyy format\", \"renewal_date\": \"Date on which policy was renewed in dd/mm/yyyy format\"}",
174
+ "expectedOutputFormat": "stringified JSON dictionary"
175
+ }
176
+ ]
policy_analyser/extraction.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Extraction
3
+ @author : Sakshi Tantak
4
+ """
5
+
6
+ # Imports
7
+ import os
8
+ import re
9
+ import json
10
+
11
+ from policy_analyser import EXTRACTION_PROMPT
12
+ from policy_analyser.llm import call_openai
13
+
14
+
15
+ def extract(text):
16
+ raw_response = ''
17
+ try:
18
+ raw_response = call_openai(EXTRACTION_PROMPT + '\nPolicy Document : ' + text)
19
+ print(raw_response)
20
+ response = re.sub(r'`|json', '', raw_response)
21
+ try:
22
+ response = json.loads(response)
23
+ response = [{'entityName' : k, 'entityValue' : v} for k, v in response.items()]
24
+ except:
25
+ response = []
26
+ except Exception as e:
27
+ print(e)
28
+ response = []
29
+ return raw_response, response
policy_analyser/llm.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Call OpenAI
3
+ @author : Sakshi Tantak
4
+ """
5
+
6
+ # Imports
7
+ import os
8
+
9
+ from openai import AzureOpenAI
10
+
11
+ from policy_analyser import GPT_ENGINE, GPT_API_BASE, GPT_KEY, GPT_VERSION
12
+
13
+ CLIENT = AzureOpenAI(
14
+ azure_endpoint = GPT_API_BASE,
15
+ api_key = GPT_KEY,
16
+ api_version = GPT_VERSION
17
+ )
18
+
19
+ def call_openai(system_prompt, seed = 42):
20
+ print('Calling openai')
21
+ # messages = [{'role' : 'system', 'content' : system_prompt},
22
+ # {'role' : 'user', 'content' : document}]
23
+ messages = [{'role' : 'user', 'content' : system_prompt}]
24
+ response = CLIENT.chat.completions.create(
25
+ model = GPT_ENGINE,
26
+ messages = messages,
27
+ # response_format = response_format,
28
+ reasoning_effort = 'low'
29
+ )
30
+
31
+ return response.choices[0].message.content
policy_analyser/ocr.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OCR
3
+ @author : Sakshi Tantak
4
+ """
5
+
6
+ # Imports
7
+ import json
8
+
9
+ from azure.core.credentials import AzureKeyCredential
10
+ from azure.ai.formrecognizer import DocumentAnalysisClient
11
+ import pymupdf4llm, pymupdf
12
+
13
+ from policy_analyser import CREDENTIALS
14
+
15
+ def convert_nested_complex_obj_to_json(result):
16
+ result = json.loads(json.dumps(result, default = lambda o : o.__dict__))
17
+ return result
18
+
19
+ class AzureLayoutOCR:
20
+ def __init__(self):
21
+ self.client = self._authenticate()
22
+ self.engine = 'azure/layout'
23
+
24
+ def _authenticate(self):
25
+ client = DocumentAnalysisClient(
26
+ endpoint=CREDENTIALS['azure']['layout']['endpoint'],
27
+ credential=AzureKeyCredential(CREDENTIALS['azure']['layout']['key']),
28
+ connection_verify=False
29
+ )
30
+ return client
31
+
32
+ def _table2md(self, table, **kwargs):
33
+ row_count, column_count = table['row_count'], table['column_count']
34
+ cells = table['cells']
35
+
36
+ markdown_table = []
37
+ table_offsets = (table['spans'][0]['offset'], table['spans'][-1]['offset'] + table['spans'][-1]['length'])
38
+
39
+ for _ in range(row_count + 1):
40
+ row = [''] * column_count
41
+ markdown_table.append(row)
42
+
43
+ header_row_idx = [0]
44
+ for cell in cells:
45
+ row_index = cell['row_index']
46
+ if cell['kind'] == 'columnHeader':
47
+ # Headers are in the first row of markdown_table, which is row_index 0
48
+ markdown_table[row_index + 1][cell['column_index']] = '**' + cell['content'].replace('|', '') + '**'
49
+ header_row_idx.append(row_index + 1)
50
+ else:
51
+ # Content cells are offset by 1 due to headers
52
+ markdown_table[row_index + 1][cell['column_index']] = cell['content'].replace('|', '')
53
+
54
+ markdown_output = ''
55
+ for row in markdown_table:
56
+ markdown_output += '| ' + ' | '.join(row) + ' |\n'
57
+ if markdown_table.index(row) in header_row_idx:
58
+ # if markdown_table.index(row) == 0:
59
+ # Add a separator after the header
60
+ markdown_output += '| ' + ' | '.join(['---'] * column_count) + ' |\n'
61
+
62
+ return markdown_output, table_offsets
63
+
64
+ def _paragraphs2md(self, paragraph, element_offsets, **kwargs):
65
+ paragraph_offsets = (
66
+ paragraph['spans'][0]['offset'], paragraph['spans'][-1]['offset'] + paragraph['spans'][-1]['length'])
67
+ for offset in element_offsets:
68
+ if paragraph_offsets[0] >= offset[0] and paragraph['spans'][0]['offset'] <= offset[1]:
69
+ return None, None
70
+
71
+ markdown_text = ''
72
+
73
+ if paragraph['role'] == 'title':
74
+ markdown_text += f'# {paragraph["content"]}'
75
+ elif paragraph == "sectionHeading":
76
+ markdown_text += f'## {paragraph["content"]}'
77
+ else:
78
+ markdown_text += f'{paragraph["content"]}'
79
+ return markdown_text, paragraph_offsets
80
+
81
+ def _stitch_paragraphs_elements(self, paragraphs, elements, **kwargs):
82
+ new_list = paragraphs + elements
83
+ sorted_new_list = sorted(new_list, key=lambda x: x['offset'][0])
84
+ return sorted_new_list
85
+
86
+ def _convert2md(self, result, **kwargs):
87
+ paragraphs, tables = result['paragraphs'], result['tables']
88
+ md_tables = []
89
+ for table in tables:
90
+ md, offset = self._table2md(table, requestId=kwargs.get('requestId'))
91
+ md_tables.append({'content': md, 'offset': offset})
92
+
93
+ table_offsets = [element['offset'] for element in md_tables]
94
+ md_paragraphs = []
95
+
96
+ for para in paragraphs:
97
+ md, offset = self._paragraphs2md(para, table_offsets, requestId=kwargs.get('requestId'))
98
+ if md is not None:
99
+ md_paragraphs.append({'content': md, 'offset': offset})
100
+
101
+ all_md_elements = self._stitch_paragraphs_elements(md_paragraphs, md_tables, requestId=kwargs.get('requestId'))
102
+ full_md = '\n\n'.join([record['content'] for record in all_md_elements])
103
+ return full_md
104
+
105
+ def _call_engine(self, image_reader, **kwargs):
106
+ poller = self.client.begin_analyze_document(
107
+ CREDENTIALS['azure']['layout']['model'],
108
+ image_reader
109
+ )
110
+ result = poller.result()
111
+
112
+ result = convert_nested_complex_obj_to_json(result)
113
+ md_text = self._convert2md(result, requestId=kwargs.get('requestId'))
114
+
115
+ return md_text, result
116
+
117
+ def __call__(self, file_bytes):
118
+ text, raw_response = self._call_engine(file_bytes)
119
+ return text, raw_response
120
+
121
+ class PyMuPDF4LLMOCR:
122
+ def __init__(self):
123
+ self.engine = 'open-source/pymupdf4llm'
124
+ self.file_type = 'pdf'
125
+
126
+ def _create_document(self, file_bytes, file_type = None):
127
+ return pymupdf.open(stream = file_bytes, filetype = self.file_type if file_type is None else file_type)
128
+
129
+ def __call__(self, file_bytes, file_type = None):
130
+ document = self._create_document(file_bytes, file_type)
131
+ response = pymupdf4llm.to_markdown(document)
132
+ return response, None
133
+
134
+ if __name__ == '__main__':
135
+ import sys
136
+ filepath = sys.argv[1]
137
+ file_bytes = open(filepath, 'rb').read()
138
+ ocr = AzureLayoutOCR()
139
+ text, raw_response = ocr(file_bytes)
140
+ print(text)
policy_analyser/prompts/__init__.py ADDED
File without changes
policy_analyser/prompts/analysis.txt ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Given the markdown content of a customer's health insurance policy, analyse the insurance policy for the customer by applying given rules for specific factors of the policy.
2
+
3
+ Apply the following rules enclosed in triple backticks on the policy to analyse it.
4
+ Make sure you are consider values for analysis factors on basis of customer's selected insurance plan when multiple plans are described in the policy terms.
5
+ Make sure all factors appear in one of Good, Average or Bad only. No factor should be repeated in more than 1 verdict table
6
+ Note : Top cities = [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]
7
+ ```
8
+ IF Adults == 1:
9
+ IF Is_Top_City:
10
+ IF Sum_Insured >= 2500000:
11
+ Verdict = "Good"
12
+ ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
13
+ Verdict = "Average"
14
+ ELSE:
15
+ Verdict = "Bad"
16
+ ELSE:
17
+ IF Sum_Insured >= 1000000:
18
+ Verdict = "Good"
19
+ ELSE IF Sum_Insured >= 500000 AND Sum_Insured < 1000000:
20
+ Verdict = "Average"
21
+ ELSE:
22
+ Verdict = "Bad"
23
+
24
+ IF Adults >= 2:
25
+ IF Children == 0:
26
+ IF Is_Top_City:
27
+ IF Sum_Insured >= 5000000:
28
+ Verdict = "Good"
29
+ ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
30
+ Verdict = "Average"
31
+ ELSE:
32
+ Verdict = "Bad"
33
+ ELSE:
34
+ IF Sum_Insured >= 2500000:
35
+ Verdict = "Good"
36
+ ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
37
+ Verdict = "Average"
38
+ ELSE:
39
+ Verdict = "Bad"
40
+
41
+ IF Children >= 1:
42
+ IF Children > 1 OR Is_Top_City:
43
+ IF Sum_Insured >= 10000000:
44
+ Verdict = "Good"
45
+ ELSE IF Sum_Insured >= 5000000 AND Sum_Insured < 10000000:
46
+ Verdict = "Average"
47
+ ELSE:
48
+ Verdict = "Bad"
49
+ ELSE:
50
+ IF Sum_Insured >= 5000000:
51
+ Verdict = "Good"
52
+ ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
53
+ Verdict = "Average"
54
+ ELSE:
55
+ Verdict = "Bad"
56
+
57
+ # Room Rent Limit
58
+ IF Room_Rent_Limit > 0:
59
+ Verdict = "Bad"
60
+ ELSE:
61
+ Verdict = "Good"
62
+
63
+ # Deductibles
64
+ IF Deductible > 0:
65
+ Verdict = "Bad"
66
+ ELSE:
67
+ Verdict = "Good"
68
+
69
+ # Sublimits
70
+ IF Sublimits == EMPTY:
71
+ Verdict = "Good"
72
+ ELSE:
73
+ Verdict = "Bad"
74
+
75
+ # Copayment
76
+ IF Copay <= 5:
77
+ Verdict = "Good"
78
+ ELSE IF Copay > 5 AND Copay <= 10:
79
+ Verdict = "Average"
80
+ ELSE:
81
+ Verdict = "Bad"
82
+
83
+ # Pre-existing Diseases (PED) Waiting Period
84
+ IF PED_Waiting_Period > 0:
85
+ IF Policy_Age > PED_Waiting_Period:
86
+ Verdict = "Good"
87
+ ELSE:
88
+ Verdict = "Bad"
89
+ ELSE:
90
+ Verdict = "Good"
91
+
92
+ # 30-Day Waiting Period
93
+ IF Thirty_Day_Waiting_Period:
94
+ IF Policy_Age > 1:
95
+ Verdict = "Good"
96
+ ELSE:
97
+ Verdict = "Bad"
98
+ ELSE:
99
+ Verdict = "Good"
100
+
101
+ # Specific Illness Waiting Period
102
+ IF Specific_Illness_Waiting_Period > 0:
103
+ IF Policy_Age > Specific_Illness_Waiting_Period:
104
+ Verdict = "Good"
105
+ ELSE:
106
+ Verdict = "Bad"
107
+ ELSE:
108
+ Verdict = "Good"
109
+
110
+ # Maternity Benefits
111
+ IF Maternity_Benefits:
112
+ Verdict = "Good"
113
+ IF Maternity_Waiting_Period > 0:
114
+ IF Policy_Age > Maternity_Waiting_Period:
115
+ Verdict = "Good"
116
+ ELSE:
117
+ Verdict = "Bad"
118
+ ELSE:
119
+ Verdict = "Good"
120
+ ELSE:
121
+ Verdict = "Bad"
122
+ ```
123
+
124
+ Format your response in the following way, to present analysis to customer. Use appropriate language and emojis to portray analysis and verdicts to the customer. Be discrete about rules, do not expose rules to customer but use them to explain reasoning and analysis:
125
+
126
+ [CUSTOMER_RESPONSE]
127
+ # Our Analysis of your policy [Name of policy] by [Name of insurance company]
128
+ ## Good Factors
129
+ | Factor | Your policy | Our Analysis |
130
+ | --- | --- | --- |
131
+ | Sum Insured | Value of sum insred in the policy | Analysis of why sum insured is good based on the given rules |
132
+
133
+ ## Average Factors
134
+ | Factor | Your policy | Our Analysis |
135
+ | --- | --- | --- |
136
+ | Copay | Value of copayment in the policy | Analysis of why copay is average based on the given rules |
137
+
138
+ ## Bad Factors
139
+ | Factor | Your policy | Our Analysis |
140
+ | --- | --- | --- |
141
+ | Deductible | Value of deductible in the policy | Analysis of why deductible is bad based on the given rules |
142
+
143
+ [/CUSTOMER_RESPONSE]
policy_analyser/prompts/extraction.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Given markdown content of a policy document, extract given list of entities.
2
+
3
+ Format your response as a JSON dictionary exactly in the following schema:
4
+ ```
5
+ {
6
+ "Name of entity from entities list" : "Entity value for entity name in `expectedOutputFormat` based on entity description given in `entityDesc`"
7
+ }
8
+ ```
9
+ If any numerical or quantitative entities are absent in the document, default entityValue to 0.
10
+
11
+ Entities:
policy_analyser/prompts/suggest.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ A customer's insurance policy was analysed by you. Using the analysis and Acko's health insurance policy, pitch Acko's policy to the customer by comparing it with their policy on the basis of the analysis and reasoning on the advantages and disadvantages of their policy.
2
+ Format your response in the following format in less than 150 words.
3
+ Format in bullet points with respect to comparing factors and include catchy emojis wherever required:
4
+
5
+ [POLICY_PITCH] Policy pitch [/POLICY_PITCH]
policy_analyser/rules.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Rules for policy analyser
3
+ @author : Sakshi Tantak
4
+ """
5
+
6
+ # Imports
7
+ import json
8
+ from datetime import datetime
9
+
10
+ from policy_analyser.llm import call_openai
11
+
12
+ def prepare_payload(extraction):
13
+ payload = {
14
+ 'Sum Insured (SI)' : 0,
15
+ 'Pre-existing diseases (PED) Waiting period' : 0,
16
+ '30-Day Waiting Period' : False,
17
+ 'Specific Illness Waiting Period' : 0,
18
+ 'Maternity waiting period' : 0,
19
+ 'Exclusions' : [],
20
+ 'Maternity benefits' : False,
21
+ 'OPD' : 0,
22
+ 'Copay' : 0,
23
+ 'Deductible' : 0,
24
+ 'Daycare treatment' : [],
25
+ 'Free Health checkup' : False,
26
+ 'Restoration benefit' : False,
27
+ 'Sublimits' : [],
28
+ 'Room rent limit (proportionate deduction)' : 100,
29
+ 'Pre & Post Hospitalization' : False,
30
+ 'Domiciliary Cover' : False,
31
+ 'No claim bonus' : 0,
32
+ 'Ambulance cover' : 0,
33
+ 'International coverage' : False,
34
+ 'Dental treatment' : 0,
35
+ 'AYUSH treatment' : False,
36
+ 'Health incentives' : False,
37
+ 'Wellness Services' : False,
38
+ 'Consumables/ Non medical expenses' : False,
39
+ 'Hospital Cash' : False,
40
+ 'Adults' : 0,
41
+ 'Children' : 0,
42
+ 'City' : '',
43
+ 'Is Top City' : True,
44
+ 'Policy Age' : 0
45
+ }
46
+ num_adults, num_children, is_top_city = 0, 0, True
47
+ today = datetime.today()
48
+ for entity in extraction:
49
+ if entity['entityName'] in ['Exclusions', 'Daycare treatment', 'Sublimits']:
50
+ try:
51
+ value = json.loads(entity['entityValue'])
52
+ payload[entity['entityName']] = value
53
+ except:
54
+ pass
55
+
56
+ if entity['entityName'] == "Policy Holder's Details":
57
+ value = entity['entityValue']
58
+ city = ''
59
+ try:
60
+ value = json.loads(value)
61
+ if 'city' in value:
62
+ city = value['city']
63
+ try:
64
+ response = call_openai('Does a given city string belong to set of given cities : [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]. Answer in true/false only', city)
65
+ is_top_city = True if response == 'true' else False
66
+ except:
67
+ pass
68
+ except:
69
+ pass
70
+ payload['Is Top City'] = is_top_city
71
+ payload['City'] = city
72
+
73
+ if entity['entityName'] == 'Insured Persons details':
74
+ value = entity['entityValue']
75
+ try:
76
+ value = json.loads(value)
77
+ for person in value:
78
+ if 'date_of_birth' in person:
79
+ dob = person['date_of_birth']
80
+ dob = datetime.strptime(dob, '%d/%m/%Y')
81
+ age = (today - dob).days / 365
82
+ elif 'age' in person:
83
+ age = person['age']
84
+
85
+ if age >= 18:
86
+ num_adults += 1
87
+ else:
88
+ num_children += 1
89
+ except:
90
+ num_adults = 1
91
+ payload['Adults'] = num_adults
92
+ payload['Children'] = num_children
93
+
94
+ if entity['entityName'] == 'Policy Details':
95
+ try:
96
+ value = json.loads(entity['entityValue'])
97
+ if 'policy_start_date' in value:
98
+ payload['Policy Age'] = ((today - datetime.strptime(value['policy_start_date'], '%d/%m/%Y')).days / 365) * 12
99
+ except:
100
+ pass
101
+
102
+ if entity['entityName'] in ['Sum Insured (SI)', 'Pre-existing diseases (PED) Waiting period', 'Specific Illness Waiting Period',
103
+ 'Maternity waiting period', 'OPD', 'Copay', 'Deductible', 'No claim bonus', 'Ambulance cover',
104
+ 'Dental treatment', 'Room rent limit (proportionate deduction)']:
105
+ value = entity['entityValue']
106
+ if isinstance(value, (float, int)):
107
+ payload[entity['entityName']] = value
108
+ else:
109
+ try:
110
+ value = float(value)
111
+ payload[entity['entityName']] = value
112
+ except:
113
+ pass
114
+
115
+ if entity['entityName'] in ['30-Day Waiting Period', 'Maternity benefits', 'Free Health checkup',
116
+ 'Restoration benefit', 'Pre & Post Hospitalization', 'Domiciliary Cover',
117
+ 'International coverage', 'AYUSH treatment', 'Health incentives', 'Wellness Services',
118
+ 'Consumables/ Non medical expenses', 'Hospital Cash']:
119
+ value = entity['entityValue']
120
+ if isinstance(value, bool):
121
+ payload[entity['entityName']] = value
122
+ else:
123
+ payload[entity['entityName']] = True if 'true' in value else False
124
+
125
+ return payload
126
+
127
+ def rules(payload):
128
+ analysis = []
129
+ if payload['Adults'] == 1:
130
+ if payload['Is Top City']:
131
+ if payload['Sum Insured (SI)'] >= 2500000:
132
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L for an adult in {payload["City"]}'
133
+ if payload['Sum Insured (SI)'] >= 1000000 and payload['Sum Insured (SI)'] < 2500000:
134
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L but > 10L for an adult in {payload["City"]}'
135
+ if payload['Sum Insured (SI)'] < 1000000:
136
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 10L for an adult in {payload["City"]}'
137
+ else:
138
+ if payload['Sum Insured (SI)'] >= 1000000:
139
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 10L for an adult in {payload["City"]}'
140
+ if payload['Sum Insured (SI)'] >= 500000 and payload['Sum Insured (SI)'] < 1000000:
141
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 5L but < 10L for an adult in {payload["City"]}'
142
+ if payload['Sum Insured (SI)'] < 500000:
143
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 5L for an adult in {payload["City"]}'
144
+
145
+ if payload['Adults'] >= 2:
146
+ if payload['Children'] == 0:
147
+ if payload['Is Top City']:
148
+ if payload['Sum Insured (SI)'] >= 5000000:
149
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L for {payload["Adults"]} adults in {payload["City"]}'
150
+ if payload['Sum Insured (SI)'] >= 2500000 and payload['Sum Insured (SI)'] < 5000000:
151
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L but < 50L for {payload["Adults"]} adults in {payload["City"]}'
152
+ if payload['Sum Insured (SI)'] < 2500000:
153
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L for {payload["Adults"]} adults in {payload["City"]}'
154
+ else:
155
+ if payload['Sum Insured (SI)'] >= 2500000:
156
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L for {payload["Adults"]} adults in {payload["City"]}'
157
+ if payload['Sum Insured (SI)'] >= 1000000 and payload['Sum Insured (SI)'] < 2500000:
158
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 10L but < 25L for {payload["Adults"]} adults in {payload["City"]}'
159
+ if payload['Sum Insured (SI)'] < 1000000:
160
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 10L for {payload["Adults"]} adults in {payload["City"]}'
161
+
162
+ if payload['Children'] >= 1:
163
+ if payload['Children'] > 1 or payload['Is Top City']:
164
+ if payload['Sum Insured (SI)'] >= 10000000:
165
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 1 CR for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
166
+ if payload['Sum Insured (SI)'] >= 5000000 and payload['Sum Insured (SI)'] < 10000000:
167
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L but < 1 CR for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
168
+ if payload['Sum Insured (SI)'] < 5000000:
169
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
170
+ else:
171
+ if payload['Sum Insured (SI)'] >= 5000000:
172
+ verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
173
+ if payload['Sum Insured (SI)'] >= 2500000 and payload['Sum Insured (SI)'] < 5000000:
174
+ verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L but < 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
175
+ if payload['Sum Insured (SI)'] < 2500000:
176
+ verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
177
+ analysis.append(
178
+ {
179
+ 'factor' : 'Sum Insured (SI)',
180
+ 'verdict' : verdict,
181
+ 'reason' : reason
182
+ }
183
+ )
184
+
185
+ if payload['Room rent limit (proportionate deduction)'] > 0:
186
+ verdict, reason = 'Bad', f'There is cap of {payload["Room rent limit (proportionate deduction)"]} on room rent'
187
+ else:
188
+ verdict, reason = 'Good', 'There is no cap on room rent'
189
+ analysis.append({'factor' : 'Room rent limit (proportionate deduction)', 'verdict' : verdict, 'reason' : reason})
190
+
191
+ if payload['Deductible'] > 0:
192
+ verdict, reason = 'Bad', f'There is a deductible of {payload["Deductible"]}'
193
+ else:
194
+ verdict, reason = 'Good', 'No deductible'
195
+ analysis.append({'factor' : 'Deductible', 'verdict' : verdict, 'reason' : reason})
196
+
197
+ if payload['Sublimits'] == []:
198
+ verdict, reason = 'Good', 'There are no sublimits on any treatments or diseases'
199
+ else:
200
+ verdict = 'Bad'
201
+ sublimits_str = '\n'.join([f'{sublimit["sublimit_name"]}: {sublimit["sublimit_value"]}' for sublimit in payload['Sublimits']])
202
+ reason = f'Following sublimits were found in your policy:\n{sublimits_str}'
203
+ analysis.append({'factor' :'Sublimits', 'verdict' : verdict, 'reason' : reason})
204
+
205
+ if payload['Copay'] == 0 and payload['Copay'] <= 5:
206
+ verdict, reason = 'Good', f'Copayment ({payload["Copay"]}) < 5%'
207
+ elif payload['Copay'] > 5 and payload['Copay'] <= 10:
208
+ verdict, reason = 'Average', f'Copayment ({payload["Copay"]}) > 5% but < 10%'
209
+ elif payload['Copay'] > 10:
210
+ verdict, reason = 'Bad', f'Copayment (({payload["Copay"]})) > 10%'
211
+ analysis.append({'factor' : 'Copay', 'verdict' : verdict, 'reason' : reason})
212
+
213
+ if payload['Pre-existing diseases (PED) Waiting period'] > 0:
214
+ if payload['Policy Age'] > payload['Pre-existing diseases (PED) Waiting period']:
215
+ verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Pre-existing diseases (PED) Waiting period"]} months on pre-existing diseases but the waiting period has expired as of today'
216
+ else:
217
+ verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Pre-existing diseases (PED) Waiting period"]} months on pre-existing diseases which is yet to expire'
218
+ else:
219
+ verdict, reason = 'Good', f'Your policy has no waiting period on pre-existing diseases'
220
+ analysis.append({'factor' : 'Pre-existing diseases (PED) Waiting period', 'verdict' : verdict, 'reason' : reason})
221
+
222
+ if payload['30-Day Waiting Period']:
223
+ if payload['Policy Age'] > 1:
224
+ verdict, reason = 'Good', f'Your policy has a 30 day waiting period but it has expired as of today'
225
+ else:
226
+ verdict, reason = 'Bad', f'Your policy has a 30 day waiting period which is yet to expire'
227
+ else:
228
+ verdict, reason = 'Good', f'Your policy has no 30 day waiting period'
229
+ analysis.append({'factor' : '30-Day Waiting Period', 'verdict' : verdict, 'reason' : reason})
230
+
231
+ if payload['Specific Illness Waiting Period'] > 0:
232
+ if payload['Policy Age'] > payload['Specific Illness Waiting Period']:
233
+ verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Specific Illness Waiting Period"]} on specific illnesses but the waiting period has expired as of today'
234
+ else:
235
+ verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Specific Illness Waiting Period"]} on specific illnesses which is yet to expire'
236
+ else:
237
+ verdict, reason = 'Good', f'Your policy has no waiting period any on specific illnesses'
238
+ analysis.append({'factor' : 'Specific Illness Waiting Period', 'verdict' : verdict, 'reason' : reason})
239
+
240
+ if payload['Maternity benefits']:
241
+ analysis.append(
242
+ {
243
+ 'factor' : 'Maternity benefits',
244
+ 'verdict' : 'Good',
245
+ 'reason' : 'Maternity benefits present, check waiting period'
246
+ }
247
+ )
248
+ if payload['Maternity waiting period'] > 0:
249
+ if payload['Policy Age'] > payload['Maternity waiting period']:
250
+ verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Maternity waiting period"]} for maternity cases but it has expired as of today'
251
+ else:
252
+ verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Maternity waiting period"]} for maternity cases which is yet to expire'
253
+ else:
254
+ verdict, reason = 'Good', f'Your policy has a no waiting period for maternity cases'
255
+ analysis.append({'factor' : 'Maternity waiting period', 'verdict' : verdict, 'reason' : reason})
256
+ else:
257
+ analysis.append(
258
+ {
259
+ 'factor' : 'Maternity benefits',
260
+ 'verdict' : 'Bad',
261
+ 'reason' : 'No maternity benefits'
262
+ }
263
+ )
264
+
265
+ return analysis
266
+
267
+ if __name__ == '__main__':
268
+ import json
269
+ import glob
270
+ dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
271
+ for file in glob.glob(f'{dirpath}/*.analysis.json'):
272
+ json_data = json.load(open(file))
273
+ payload = prepare_payload(json_data[1]['response']['processed'])
274
+ json_data.append({
275
+ 'stage' : 'POST_PROCESS',
276
+ 'response' : payload,
277
+ 'time' : 0
278
+ })
279
+ # print(json_data)
280
+ with open(file, 'w') as f:
281
+ json.dump(json_data, f, indent = 4)
styles.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def apply_custom_styles():
4
+ st.markdown("""
5
+ <style>
6
+ .stApp {
7
+ max-width: 1200px;
8
+ margin: 0 auto;
9
+ background: linear-gradient(135deg, #f5f7fa 0%, #e4e9f2 100%);
10
+ background-attachment: fixed;
11
+ min-height: 100vh;
12
+ }
13
+ .upload-container {
14
+ border: 2px dashed #0066cc;
15
+ border-radius: 10px;
16
+ padding: 20px;
17
+ text-align: center;
18
+ margin: 20px 0;
19
+ background: rgba(255, 255, 255, 0.9);
20
+ backdrop-filter: blur(5px);
21
+ }
22
+ .factor-card {
23
+ background-color: rgba(255, 255, 255, 0.95);
24
+ padding: 20px;
25
+ border-radius: 10px;
26
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
27
+ margin: 10px 0;
28
+ backdrop-filter: blur(5px);
29
+ height: 100%;
30
+ }
31
+ .good-factor {
32
+ border-left: 4px solid #28a745;
33
+ }
34
+ .average-factor {
35
+ border-left: 4px solid #ffc107;
36
+ }
37
+ .bad-factor {
38
+ border-left: 4px solid #dc3545;
39
+ }
40
+ .header-container {
41
+ padding: 2rem 0;
42
+ margin-bottom: 2rem;
43
+ background: linear-gradient(90deg, #0066cc 0%, #0099ff 100%);
44
+ color: white;
45
+ border-radius: 10px;
46
+ text-align: center;
47
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
48
+ }
49
+ .detailed-factor {
50
+ padding: 15px;
51
+ border-radius: 8px;
52
+ margin: 10px 0;
53
+ background: rgba(255, 255, 255, 0.9);
54
+ border-left: 4px solid #666;
55
+ }
56
+ .detailed-factor.good {
57
+ border-left-color: #28a745;
58
+ background: rgba(40, 167, 69, 0.1);
59
+ }
60
+ .detailed-factor.average {
61
+ border-left-color: #ffc107;
62
+ background: rgba(255, 193, 7, 0.1);
63
+ }
64
+ .detailed-factor.bad {
65
+ border-left-color: #dc3545;
66
+ background: rgba(220, 53, 69, 0.1);
67
+ }
68
+ .comparison-table {
69
+ background: white;
70
+ border-radius: 10px;
71
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
72
+ margin: 20px 0;
73
+ }
74
+ </style>
75
+ """, unsafe_allow_html=True)
76
+
77
+ def show_factor_section(title, factors, color):
78
+ if factors:
79
+ st.markdown(f"""
80
+ <div class="factor-card {color}-factor">
81
+ <h3 style="color: #333;">{title}</h3>
82
+ <ul style="list-style-type: none; padding-left: 0;">
83
+ {"".join(f'<li style="margin: 10px 0; padding: 10px; background: rgba(248, 249, 250, 0.8); border-radius: 5px;">{factor}</li>' for factor in factors)}
84
+ </ul>
85
+ </div>
86
+ """, unsafe_allow_html=True)
87
+
88
+ def show_detailed_factors(good_factors, average_factors, bad_factors):
89
+ for factor in good_factors:
90
+ name, explanation = factor.split(':', 1)
91
+ st.markdown(f"""
92
+ <div class="detailed-factor good">
93
+ <strong>{name}</strong>
94
+ <p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
95
+ </div>
96
+ """, unsafe_allow_html=True)
97
+
98
+ for factor in average_factors:
99
+ name, explanation = factor.split(':', 1)
100
+ st.markdown(f"""
101
+ <div class="detailed-factor average">
102
+ <strong>{name}</strong>
103
+ <p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
104
+ </div>
105
+ """, unsafe_allow_html=True)
106
+
107
+ for factor in bad_factors:
108
+ name, explanation = factor.split(':', 1)
109
+ st.markdown(f"""
110
+ <div class="detailed-factor bad">
111
+ <strong>{name}</strong>
112
+ <p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
113
+ </div>
114
+ """, unsafe_allow_html=True)
115
+
116
+ def show_factor_summary(summary, verdict, sentiment_title):
117
+ if len(summary) > 0:
118
+ st.markdown(f"""
119
+ <div class="detailed-factor {verdict}">
120
+ <strong>{sentiment_title}</strong>
121
+ <p style="margin: 5px 0 0 0; color: #666;">{summary}</p>
122
+ </div>
123
+ """, unsafe_allow_html=True)
ui.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Rules for Policy Analyser
3
+ """
4
+
5
+ # Imports
6
+ import os
7
+ import glob
8
+ import json
9
+ from datetime import datetime
10
+ import base64
11
+
12
+ import openai
13
+ import streamlit as st
14
+ import pandas as pd
15
+
16
+ from policy_analyser.analyse import analyse
17
+
18
+ st.set_page_config('Policy Analyser', '🧐', layout = 'wide')
19
+
20
+ # def call_openai(system_prompt, document, seed = 42):
21
+ # messages = [{'role' : 'system', 'content' : system_prompt},
22
+ # {'role' : 'user', 'content' : document}]
23
+ # response = openai.ChatCompletion.create(
24
+ # engine = 'AskoGPT4-1106',
25
+ # messages = messages,
26
+ # seed = seed,
27
+ # temperature = 0.3,
28
+ # api_key = 'a033560cc39647da989fcce2910f6e84',
29
+ # api_version = '2023-07-01-preview',
30
+ # api_type = 'azure',
31
+ # api_base = 'https://asko-v1.openai.azure.com/'
32
+ # )
33
+ # return response.choices[0].message.content
34
+
35
+
36
+ def displayPDF(file):
37
+ # Opening file from file path
38
+ if isinstance(file, str):
39
+ file_bytes = open(file, 'rb').read()
40
+ else:
41
+ file_bytes = file
42
+ # with open(file, "rb") as f:
43
+ base64_pdf = base64.b64encode(file_bytes).decode('utf-8')
44
+
45
+ # Embedding PDF in HTML
46
+ pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
47
+
48
+ # Displaying File
49
+ st.markdown(pdf_display, unsafe_allow_html=True)
50
+
51
+ def view_saved_results():
52
+ dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
53
+ files = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')]
54
+ file = st.sidebar.selectbox('Select Result to view', options = files)
55
+ if file is not None:
56
+ file = os.path.join(dirpath, file)
57
+ pdf_path = file.replace('.analysis.json', '.pdf')
58
+ displayPDF(pdf_path)
59
+ analysis = json.load(open(file))
60
+ if len(analysis) > 0:
61
+ for stage in analysis:
62
+ if stage['stage'] == 'EXTRACTION':
63
+ st.sidebar.json(stage['response']['processed'])
64
+ if stage['stage'] == 'POST_PROCESS':
65
+ st.sidebar.json(stage['response'])
66
+ if stage['stage'] == 'ANALYSE':
67
+ df = pd.DataFrame.from_records(stage['response'])
68
+ for verdict in ['Good', 'Average', 'Bad']:
69
+ df_tmp = df.loc[df['verdict'] == verdict]
70
+ if len(df_tmp) > 0:
71
+ st.sidebar.markdown(f'**{verdict}**')
72
+ st.sidebar.table(df_tmp)
73
+
74
+ def run():
75
+ file = st.sidebar.file_uploader('Upload PDF')
76
+ if file is not None:
77
+ file_bytes = file.getvalue()
78
+ displayPDF(file_bytes)
79
+ analysis = analyse(file_bytes)
80
+ st.json(analysis)
81
+ for stage in analysis:
82
+ if stage['stage']['ANALYSE']:
83
+ df = pd.DataFrame(stage['response'])
84
+ for verdict in ['Good', 'Average', 'Bad']:
85
+ df_tmp = df.loc[df['verdict'] == verdict]
86
+ if len(df_tmp) > 0:
87
+ st.sidebar.markdown(f'**{verdict}**')
88
+ st.sidebar.table(df_tmp)
89
+
90
+ def validate_results():
91
+ dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
92
+ file = st.sidebar.selectbox('Select file to validation', options = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')])
93
+ if file is not None:
94
+ filepath = os.path.join(dirpath, file)
95
+ json_data = json.load(open(filepath))
96
+ if len(json_data) > 0:
97
+ for stage in json_data:
98
+ if stage['stage'] == 'EXTRACTION':
99
+ entities = stage['response']['processed']
100
+ for entity in entities:
101
+ entity.update(
102
+ {'entityValueQC' : entity['entityValue'], 'isRight' : False, 'QCRemarks' : ''}
103
+ )
104
+ df = pd.DataFrame.from_records(entities)
105
+ df = df[['isRight', 'entityName', 'entityValue', 'entityValueQC', 'QCRemarks']]
106
+ df = df.astype({'entityValueQC' : str})
107
+ df = st.data_editor(df)
108
+ displayPDF(filepath.replace('.analysis.json', '.pdf'))
109
+ print(df)
110
+ if st.button('Save?'):
111
+ df.to_csv(filepath.replace('.analysis.json', '.qc-entities.csv'), index = False)
112
+
113
+
114
+ def main():
115
+ view_results_page = st.Page(view_saved_results, title = 'View Save Results')
116
+ run_page = st.Page(run, title = 'Run your PDF')
117
+ qc = st.Page(validate_results, title = 'QC')
118
+ pages = st.navigation(
119
+ {
120
+ 'View' : [view_results_page, qc],
121
+ 'Run & Analyse' : [run_page]
122
+ }
123
+ )
124
+ pages.run()
125
+
126
+ if __name__ == '__main__':
127
+ main()
utils.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import requests
4
+ from typing import Dict, List, Tuple
5
+ import streamlit as st
6
+
7
+ def mock_api_call(pdf_bytes: bytes) -> List:
8
+ """
9
+ Simulates an API call for policy analysis.
10
+ In production, this would make a real API call.
11
+ """
12
+ # Mock response based on the provided sample
13
+ return [{
14
+ "stage": "ANALYSE",
15
+ "response": [
16
+ {
17
+ "factor": "Room rent limit",
18
+ "verdict": "Bad",
19
+ "reason": "There is cap of 100 on room rent"
20
+ },
21
+ {
22
+ "factor": "Deductible",
23
+ "verdict": "Good",
24
+ "reason": "No deductible"
25
+ },
26
+ {
27
+ "factor": "Copay",
28
+ "verdict": "Good",
29
+ "reason": "Copayment (0.0) < 5%"
30
+ }
31
+ ]
32
+ },
33
+ {
34
+ "stage" : "ANALYSIS_SUMMARY",
35
+ "response" : {
36
+ "Good" : "This was great!",
37
+ # "Average" : "This was okay",
38
+ "Bad" : "This was meh :/"
39
+ }
40
+ }
41
+ ]
42
+
43
+ def parse_analysis_response(response: List) -> Tuple[List[str], List[str], List[str]]:
44
+ """
45
+ Parses the API response and extracts categorized factors with their reasons.
46
+ Handles the verdict-based factor format.
47
+ """
48
+ try:
49
+ # Find the analysis stage in the response list
50
+ analysis_item = next(
51
+ (item for item in response if item.get("stage") == "ANALYSE"),
52
+ None
53
+ )
54
+
55
+ if not analysis_item:
56
+ st.error("No analysis data found in the response")
57
+ return [], [], []
58
+
59
+ # Get the response list from analysis item
60
+ analysis_list = analysis_item.get("response", [])
61
+
62
+ if not analysis_list or not isinstance(analysis_list, list):
63
+ st.error("Invalid analysis response format")
64
+ return [], [], []
65
+
66
+ # Initialize categorized factors
67
+ good_factors = []
68
+ average_factors = []
69
+ bad_factors = []
70
+
71
+ # Categorize factors based on verdict
72
+ for item in analysis_list:
73
+ factor_text = f"{item.get('factor')}: {item.get('reason')}"
74
+ verdict = item.get('verdict', '').lower()
75
+
76
+ if verdict == 'good':
77
+ good_factors.append(factor_text)
78
+ elif verdict == 'average':
79
+ average_factors.append(factor_text)
80
+ elif verdict == 'bad':
81
+ bad_factors.append(factor_text)
82
+
83
+ return good_factors, average_factors, bad_factors
84
+ except Exception as e:
85
+ st.error(f"Error parsing analysis response: {str(e)}")
86
+ return [], [], []
87
+
88
+ def validate_pdf(pdf_bytes: bytes) -> bool:
89
+ """
90
+ Validates the uploaded PDF file.
91
+ """
92
+ if not pdf_bytes:
93
+ return False
94
+
95
+ # Check file signature for PDF (%PDF-)
96
+ return pdf_bytes.startswith(b'%PDF-')
97
+
98
+ def displayPDF(file):
99
+ # Opening file from file path
100
+ if isinstance(file, str):
101
+ file_bytes = open(file, 'rb').read()
102
+ else:
103
+ file_bytes = file
104
+ # with open(file, "rb") as f:
105
+ base64_pdf = base64.b64encode(file_bytes).decode('utf-8')
106
+
107
+ # Embedding PDF in HTML
108
+ pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
109
+
110
+ # Displaying File
111
+ st.markdown(pdf_display, unsafe_allow_html=True)