Spaces:
Sleeping
Sleeping
Sakshi
commited on
Commit
·
0106d5f
1
Parent(s):
3586943
policy analyser app
Browse files- .gitignore +4 -0
- __init__.py +0 -0
- app.py +129 -0
- main.py +154 -0
- policy_analyser/__init__.py +43 -0
- policy_analyser/analyse.py +232 -0
- policy_analyser/data/Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md +0 -0
- policy_analyser/data/__init__.py +0 -0
- policy_analyser/data/policy_analyser_entities.json +176 -0
- policy_analyser/extraction.py +29 -0
- policy_analyser/llm.py +31 -0
- policy_analyser/ocr.py +140 -0
- policy_analyser/prompts/__init__.py +0 -0
- policy_analyser/prompts/analysis.txt +143 -0
- policy_analyser/prompts/extraction.txt +11 -0
- policy_analyser/prompts/suggest.txt +5 -0
- policy_analyser/rules.py +281 -0
- styles.py +123 -0
- ui.py +127 -0
- utils.py +111 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
*.pycache
|
3 |
+
*.pyc
|
4 |
+
*.env
|
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from utils import validate_pdf
|
6 |
+
from styles import apply_custom_styles
|
7 |
+
from policy_analyser.analyse import analyse
|
8 |
+
|
9 |
+
if 'GPT_KEY' not in os.environ or os.environ.get('GPT_KEY') in [None, '']:
|
10 |
+
os.environ['GPT_KEY'] = st.secrets['GPT_KEY']
|
11 |
+
|
12 |
+
def main():
|
13 |
+
# Apply custom styles
|
14 |
+
apply_custom_styles()
|
15 |
+
|
16 |
+
# Header
|
17 |
+
st.markdown("""
|
18 |
+
<div class="header-container">
|
19 |
+
<img src="https://acko-brand.ackoassets.com/brand/vector-svg/gradient/horizontal-reverse.svg" height=50 width=100>
|
20 |
+
<h1>Insurance Policy Analyzer</h1>
|
21 |
+
<p>Upload and compare insurance policies</p>
|
22 |
+
</div>
|
23 |
+
""", unsafe_allow_html=True)
|
24 |
+
|
25 |
+
# File upload section
|
26 |
+
st.markdown('<div class="upload-container">', unsafe_allow_html=True)
|
27 |
+
uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
|
28 |
+
print(uploaded_files)
|
29 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
30 |
+
|
31 |
+
if uploaded_files and st.button('Analyse'):
|
32 |
+
# Create tabs for different views
|
33 |
+
tab1, tab2 = st.tabs(["Summary View", "Detailed Comparison"])
|
34 |
+
|
35 |
+
# Store analysis results
|
36 |
+
all_analyses = []
|
37 |
+
|
38 |
+
# Process each uploaded file
|
39 |
+
for uploaded_file in uploaded_files:
|
40 |
+
# Read PDF content
|
41 |
+
pdf_bytes = uploaded_file.read()
|
42 |
+
# displayPDF(pdf_bytes)
|
43 |
+
|
44 |
+
# Validate PDF
|
45 |
+
if not validate_pdf(pdf_bytes):
|
46 |
+
st.error(f"Invalid PDF file: {uploaded_file.name}")
|
47 |
+
continue
|
48 |
+
|
49 |
+
# Show loading state
|
50 |
+
with st.spinner(f"Analyzing {uploaded_file.name}..."):
|
51 |
+
try:
|
52 |
+
# Make API call
|
53 |
+
response = analyse(pdf_bytes, True)
|
54 |
+
analysis = next(
|
55 |
+
(item for item in response if item.get("stage") == "ANALYSE"), None
|
56 |
+
)['response']
|
57 |
+
analysis = analysis.split('[CUSTOMER_RESPONSE]')[-1].split('[/CUSTOMER_RESPONSE]')[0]
|
58 |
+
suggestion = next(
|
59 |
+
(item for item in response if item.get("stage") == "SUGGEST"), None
|
60 |
+
)['response']
|
61 |
+
suggestion = suggestion.split('[POLICY_PITCH]')[-1].split('[/POLICY_PITCH]')[0]
|
62 |
+
# Store results
|
63 |
+
all_analyses.append({
|
64 |
+
'name': uploaded_file.name,
|
65 |
+
'analysis' : analysis,
|
66 |
+
'suggestion' : suggestion
|
67 |
+
})
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
st.error(f"Error analyzing {uploaded_file.name}: {str(e)}")
|
71 |
+
|
72 |
+
# Summary View Tab
|
73 |
+
with tab1:
|
74 |
+
for idx, analysis in enumerate(all_analyses):
|
75 |
+
with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
|
76 |
+
with st.container():
|
77 |
+
st.markdown(analysis['analysis'])
|
78 |
+
with st.container():
|
79 |
+
st.markdown('# Why Acko? 🚀')
|
80 |
+
st.markdown(analysis['suggestion'])
|
81 |
+
|
82 |
+
# Detailed Comparison Tab
|
83 |
+
with tab2:
|
84 |
+
if len(all_analyses) > 1:
|
85 |
+
# Create comparison matrix
|
86 |
+
factors_to_compare = set()
|
87 |
+
for analysis in all_analyses:
|
88 |
+
factors_to_compare.update(
|
89 |
+
[f.split(':')[0] for f in analysis['good_factors'] +
|
90 |
+
analysis['average_factors'] + analysis['bad_factors']]
|
91 |
+
)
|
92 |
+
|
93 |
+
# Create comparison table
|
94 |
+
st.markdown("### Policy Comparison Matrix")
|
95 |
+
|
96 |
+
comparison_data = []
|
97 |
+
for factor in sorted(factors_to_compare):
|
98 |
+
row = {'Factor': factor}
|
99 |
+
for idx, analysis in enumerate(all_analyses):
|
100 |
+
policy_name = f"Policy {idx + 1}"
|
101 |
+
verdict = 'Not Found'
|
102 |
+
for category in ['good_factors', 'average_factors', 'bad_factors']:
|
103 |
+
for item in analysis[category]:
|
104 |
+
if item.split(':')[0] == factor:
|
105 |
+
verdict = category.split('_')[0].title()
|
106 |
+
break
|
107 |
+
row[policy_name] = verdict
|
108 |
+
comparison_data.append(row)
|
109 |
+
|
110 |
+
# Display comparison table
|
111 |
+
st.table(comparison_data)
|
112 |
+
else:
|
113 |
+
st.info("Upload multiple policies to see comparison")
|
114 |
+
|
115 |
+
# Footer
|
116 |
+
st.markdown("""
|
117 |
+
<div style="margin-top: 50px; text-align: center; color: #666;">
|
118 |
+
<p>Upload one or more insurance policy PDFs to get detailed analysis and comparison.</p>
|
119 |
+
<p>We support all major insurance providers.</p>
|
120 |
+
</div>
|
121 |
+
""", unsafe_allow_html=True)
|
122 |
+
|
123 |
+
if __name__ == "__main__":
|
124 |
+
st.set_page_config(
|
125 |
+
page_title="Insurance Policy Analyzer",
|
126 |
+
page_icon="📋",
|
127 |
+
layout="wide"
|
128 |
+
)
|
129 |
+
main()
|
main.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import io
|
3 |
+
from utils import mock_api_call, parse_analysis_response, validate_pdf, displayPDF
|
4 |
+
from styles import apply_custom_styles, show_factor_section, show_detailed_factors, show_factor_summary
|
5 |
+
|
6 |
+
from policy_analyser.analyse import analyse
|
7 |
+
|
8 |
+
def main():
|
9 |
+
# Apply custom styles
|
10 |
+
apply_custom_styles()
|
11 |
+
|
12 |
+
# Header
|
13 |
+
st.markdown("""
|
14 |
+
<div class="header-container">
|
15 |
+
<img src="https://acko-brand.ackoassets.com/brand/vector-svg/gradient/horizontal-reverse.svg" height=50 width=100>
|
16 |
+
<h1>Insurance Policy Analyzer</h1>
|
17 |
+
<p>Upload and compare insurance policies</p>
|
18 |
+
</div>
|
19 |
+
""", unsafe_allow_html=True)
|
20 |
+
|
21 |
+
# File upload section
|
22 |
+
st.markdown('<div class="upload-container">', unsafe_allow_html=True)
|
23 |
+
uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
|
24 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
25 |
+
|
26 |
+
if uploaded_files:
|
27 |
+
# Create tabs for different views
|
28 |
+
tab1, tab2 = st.tabs(["Summary View", "Detailed Comparison"])
|
29 |
+
|
30 |
+
# Store analysis results
|
31 |
+
all_analyses = []
|
32 |
+
|
33 |
+
# Process each uploaded file
|
34 |
+
for uploaded_file in uploaded_files:
|
35 |
+
# Read PDF content
|
36 |
+
pdf_bytes = uploaded_file.read()
|
37 |
+
# displayPDF(pdf_bytes)
|
38 |
+
|
39 |
+
# Validate PDF
|
40 |
+
if not validate_pdf(pdf_bytes):
|
41 |
+
st.error(f"Invalid PDF file: {uploaded_file.name}")
|
42 |
+
continue
|
43 |
+
|
44 |
+
# Show loading state
|
45 |
+
with st.spinner(f"Analyzing {uploaded_file.name}..."):
|
46 |
+
try:
|
47 |
+
# Make API call
|
48 |
+
response = analyse(pdf_bytes)
|
49 |
+
print(response)
|
50 |
+
summary = next(
|
51 |
+
(item for item in response if item.get("stage") == "ANALYSIS_SUMMARY"), None
|
52 |
+
)['response']
|
53 |
+
|
54 |
+
# Parse response
|
55 |
+
good_factors, average_factors, bad_factors = parse_analysis_response(response)
|
56 |
+
|
57 |
+
# Store results
|
58 |
+
all_analyses.append({
|
59 |
+
'name': uploaded_file.name,
|
60 |
+
'good_factors': good_factors,
|
61 |
+
'average_factors': average_factors,
|
62 |
+
'bad_factors': bad_factors
|
63 |
+
})
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
st.error(f"Error analyzing {uploaded_file.name}: {str(e)}")
|
67 |
+
|
68 |
+
# Summary View Tab
|
69 |
+
with tab1:
|
70 |
+
for idx, analysis in enumerate(all_analyses):
|
71 |
+
with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
|
72 |
+
with st.container():
|
73 |
+
cols = st.columns(2)
|
74 |
+
displayed = 0
|
75 |
+
for verdict in ['Good', 'Average', 'Bad']:
|
76 |
+
lst = [f.split(':')[0] for f in analysis[f'{verdict.lower()}_factors']]
|
77 |
+
if len(lst) > 0:
|
78 |
+
title = f'{verdict} Factors'
|
79 |
+
if verdict == 'Good':
|
80 |
+
title += '✅'
|
81 |
+
sentiment = 'Yay!'
|
82 |
+
elif verdict == 'Average':
|
83 |
+
title += '⚠️'
|
84 |
+
sentiment = 'Hmmmm'
|
85 |
+
else:
|
86 |
+
title += '❌'
|
87 |
+
sentiment = 'Meh'
|
88 |
+
cols = st.columns(2)
|
89 |
+
with st.container():
|
90 |
+
with cols[0]:
|
91 |
+
if displayed % 3 == 0:
|
92 |
+
show_factor_section(title, lst, verdict.lower())
|
93 |
+
else:
|
94 |
+
show_factor_summary(summary[verdict], verdict.lower(), sentiment)
|
95 |
+
with cols[1]:
|
96 |
+
if displayed % 3 == 0:
|
97 |
+
show_factor_summary(summary[verdict], verdict.lower(), sentiment)
|
98 |
+
else:
|
99 |
+
show_factor_section(title, lst, verdict.lower())
|
100 |
+
displayed += 1
|
101 |
+
|
102 |
+
st.markdown('-----')
|
103 |
+
# with st.container():
|
104 |
+
# suggestion = next((item for item in response if item.get("stage") == "SUGGEST"), None)['response']
|
105 |
+
# show_factor_summary(suggestion, 'Good', 'What we have ⛑️')
|
106 |
+
|
107 |
+
# Detailed Comparison Tab
|
108 |
+
with tab2:
|
109 |
+
if len(all_analyses) > 1:
|
110 |
+
# Create comparison matrix
|
111 |
+
factors_to_compare = set()
|
112 |
+
for analysis in all_analyses:
|
113 |
+
factors_to_compare.update(
|
114 |
+
[f.split(':')[0] for f in analysis['good_factors'] +
|
115 |
+
analysis['average_factors'] + analysis['bad_factors']]
|
116 |
+
)
|
117 |
+
|
118 |
+
# Create comparison table
|
119 |
+
st.markdown("### Policy Comparison Matrix")
|
120 |
+
|
121 |
+
comparison_data = []
|
122 |
+
for factor in sorted(factors_to_compare):
|
123 |
+
row = {'Factor': factor}
|
124 |
+
for idx, analysis in enumerate(all_analyses):
|
125 |
+
policy_name = f"Policy {idx + 1}"
|
126 |
+
verdict = 'Not Found'
|
127 |
+
for category in ['good_factors', 'average_factors', 'bad_factors']:
|
128 |
+
for item in analysis[category]:
|
129 |
+
if item.split(':')[0] == factor:
|
130 |
+
verdict = category.split('_')[0].title()
|
131 |
+
break
|
132 |
+
row[policy_name] = verdict
|
133 |
+
comparison_data.append(row)
|
134 |
+
|
135 |
+
# Display comparison table
|
136 |
+
st.table(comparison_data)
|
137 |
+
else:
|
138 |
+
st.info("Upload multiple policies to see comparison")
|
139 |
+
|
140 |
+
# Footer
|
141 |
+
st.markdown("""
|
142 |
+
<div style="margin-top: 50px; text-align: center; color: #666;">
|
143 |
+
<p>Upload one or more insurance policy PDFs to get detailed analysis and comparison.</p>
|
144 |
+
<p>We support all major insurance providers.</p>
|
145 |
+
</div>
|
146 |
+
""", unsafe_allow_html=True)
|
147 |
+
|
148 |
+
if __name__ == "__main__":
|
149 |
+
st.set_page_config(
|
150 |
+
page_title="Insurance Policy Analyzer",
|
151 |
+
page_icon="📋",
|
152 |
+
layout="wide"
|
153 |
+
)
|
154 |
+
main()
|
policy_analyser/__init__.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
try:
|
6 |
+
load_dotenv('.env')
|
7 |
+
except:
|
8 |
+
pass
|
9 |
+
|
10 |
+
PACKAGE = 'policy_analyser'
|
11 |
+
PROJECT_DIR = os.getcwd()
|
12 |
+
PACKAGE_PATH = os.path.join(PROJECT_DIR, PACKAGE)
|
13 |
+
PROMPTS_DIR = os.path.join(PACKAGE_PATH, 'prompts')
|
14 |
+
DATA_DIR = os.path.join(PACKAGE_PATH, 'data')
|
15 |
+
|
16 |
+
CREDENTIALS = {
|
17 |
+
'azure' : {
|
18 |
+
'plain-text' : {
|
19 |
+
'endpoint' : os.environ.get('AZURE_PLAIN_TEXT_ENDPOINT', ''),
|
20 |
+
'key' : os.environ.get('AZURE_PLAIN_TEXT_KEY')
|
21 |
+
},
|
22 |
+
'layout' : {
|
23 |
+
'endpoint' : os.environ.get('AZURE_LAYOUT_ENDPOINT', ''),
|
24 |
+
'key' : os.environ.get('AZURE_LAYOUT_KEY', ''),
|
25 |
+
'model' : os.environ.get('AZURE_LAYOUT_MODEL', '')
|
26 |
+
}
|
27 |
+
}
|
28 |
+
}
|
29 |
+
|
30 |
+
GPT_ENGINE = 'o3-mini'
|
31 |
+
GPT_KEY = os.environ.get('GPT_KEY', '')
|
32 |
+
GPT_VERSION = '2024-12-01-preview'
|
33 |
+
GPT_API_BASE = 'https://ai-ackods910341544474.openai.azure.com/'
|
34 |
+
|
35 |
+
EXTRACTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'extraction.txt')).read()
|
36 |
+
entities = json.load(open(os.path.join(DATA_DIR, 'policy_analyser_entities.json')))
|
37 |
+
for entity in entities:
|
38 |
+
del entity['entityId']
|
39 |
+
entities_str = '\n---\n'.join(['\n'.join([f'{k} : {v}' for k, v in entity.items()]) for entity in entities])
|
40 |
+
EXTRACTION_PROMPT += entities_str
|
41 |
+
ANALYSIS_PROMPT = open(os.path.join(PROMPTS_DIR, 'analysis.txt')).read().strip()
|
42 |
+
SUGGESTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'suggest.txt')).read().strip()
|
43 |
+
ACKO_POLICY = open(os.path.join(DATA_DIR, 'Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md')).read()
|
policy_analyser/analyse.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Run analysis
|
3 |
+
@author : Sakshi Tantak
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Imports
|
7 |
+
from time import time
|
8 |
+
from datetime import datetime
|
9 |
+
|
10 |
+
from policy_analyser import ACKO_POLICY, ANALYSIS_PROMPT, SUGGESTION_PROMPT
|
11 |
+
from policy_analyser.ocr import AzureLayoutOCR, PyMuPDF4LLMOCR
|
12 |
+
from policy_analyser.extraction import extract
|
13 |
+
from policy_analyser.rules import prepare_payload, rules
|
14 |
+
from policy_analyser.llm import call_openai
|
15 |
+
|
16 |
+
# OCR = AzureLayoutOCR()
|
17 |
+
OCR = PyMuPDF4LLMOCR()
|
18 |
+
|
19 |
+
def analyse(file_bytes, end2end = False):
|
20 |
+
print('OCR Started ...')
|
21 |
+
ocr_start = time()
|
22 |
+
if isinstance(file_bytes, str):
|
23 |
+
text = file_bytes
|
24 |
+
elif isinstance(file_bytes, (bytearray, bytes)):
|
25 |
+
text, _ = OCR(file_bytes)
|
26 |
+
ocr_end = time()
|
27 |
+
print(f'OCR done [{ocr_end - ocr_start}]')
|
28 |
+
if len(text) > 0:
|
29 |
+
if not end2end:
|
30 |
+
print('Extraction Started ...')
|
31 |
+
ext_start = time()
|
32 |
+
raw_response, entities = extract(text)
|
33 |
+
ext_end = time()
|
34 |
+
print(f'Extraction done [{ext_end - ext_start}]')
|
35 |
+
if len(entities) > 0:
|
36 |
+
print('Preparing payload for analysis ...')
|
37 |
+
payload = prepare_payload(entities)
|
38 |
+
print('Payload prepared for analysis')
|
39 |
+
print('Analysing ...')
|
40 |
+
analysis_start = time()
|
41 |
+
analysis = rules(payload)
|
42 |
+
analysis_end = time()
|
43 |
+
print(f'Analysed [{analysis_end - analysis_start}]')
|
44 |
+
print('Summarising ...')
|
45 |
+
summary = {}
|
46 |
+
summary_start = time()
|
47 |
+
for verdict in ['Good', 'Average', 'Bad']:
|
48 |
+
descriptions = '\n'.join([factor['reason'] for factor in analysis if factor['verdict'] == verdict])
|
49 |
+
if len(descriptions) > 0:
|
50 |
+
prompt = f"""Given the following analysis on the {verdict} factors of a customer's policy that they have bought, generate a crisp and catchy summary of the factors for a customer. Try to make it factor-wise with bullet points
|
51 |
+
NOTE : THE POLICY WAS NOT SOLD BY US
|
52 |
+
analysis : {descriptions}
|
53 |
+
summary : """
|
54 |
+
response = call_openai(prompt)
|
55 |
+
print(response)
|
56 |
+
else:
|
57 |
+
response = ''
|
58 |
+
summary[verdict] = response
|
59 |
+
summary_end = time()
|
60 |
+
# print(f'Summarised [{summary_end - summary_start}]')
|
61 |
+
# factors_str = ''
|
62 |
+
# for verdict in ['Good', 'Average', 'Bad']:
|
63 |
+
# factors_str += verdict + ' Factors:'
|
64 |
+
# factors_str += '\n' + '\n'.join([f"{factor['factor']}: {factor['reason']}" for factor in analysis if factor['verdict'] == verdict])
|
65 |
+
# print('Suggesting ...')
|
66 |
+
# suggestion_start = time()
|
67 |
+
# suggestion = call_openai(f"""Given the following main factors and their values of a customer's health insurance policy, use these factors to compare with given Acko's health policy and suggest to the customer how the Average and Bad factors maybe covered better by Acko's policy.
|
68 |
+
# Format response in less than 50 words and make it factor-wise. Try to format in points. Include emojis to make it catchy.
|
69 |
+
# Customer Poliocy Factors:
|
70 |
+
# {factors_str}
|
71 |
+
|
72 |
+
# Acko Policy : {ACKO_POLICY}
|
73 |
+
|
74 |
+
# Customer Suggestion : """)
|
75 |
+
# suggestion_end = time()
|
76 |
+
# print(f'Suggested [{suggestion_end - suggestion_start}]')
|
77 |
+
response = [
|
78 |
+
{
|
79 |
+
'stage' : 'OCR',
|
80 |
+
'response' : text,
|
81 |
+
'time' : ocr_end - ocr_start
|
82 |
+
},
|
83 |
+
{
|
84 |
+
'stage' : 'EXTRACTION',
|
85 |
+
'response' : {
|
86 |
+
'raw' : raw_response,
|
87 |
+
'processed' : entities
|
88 |
+
},
|
89 |
+
'time' : ext_end - ext_start
|
90 |
+
},
|
91 |
+
{
|
92 |
+
'stage' : 'POST_PROCESS',
|
93 |
+
'response' : payload,
|
94 |
+
'time' : 0
|
95 |
+
},
|
96 |
+
{
|
97 |
+
'stage' : 'ANALYSE',
|
98 |
+
'response' : analysis,
|
99 |
+
'time' : analysis_end - analysis_start
|
100 |
+
},
|
101 |
+
{
|
102 |
+
'stage' : 'ANALYSIS_SUMMARY',
|
103 |
+
'response' : summary,
|
104 |
+
'time' : summary_end - summary_start
|
105 |
+
},
|
106 |
+
# {
|
107 |
+
# 'stage' : 'SUGGEST',
|
108 |
+
# 'response' : suggestion,
|
109 |
+
# 'time' : suggestion_end - suggestion_start
|
110 |
+
# }
|
111 |
+
]
|
112 |
+
return response
|
113 |
+
|
114 |
+
response = [
|
115 |
+
{
|
116 |
+
'stage' : 'OCR',
|
117 |
+
'response' : text,
|
118 |
+
'time' : 0
|
119 |
+
},
|
120 |
+
{
|
121 |
+
'stage' : 'EXTRACTION',
|
122 |
+
'response' : {
|
123 |
+
'raw' : '',
|
124 |
+
'processed' : []
|
125 |
+
},
|
126 |
+
'time' : 0
|
127 |
+
},
|
128 |
+
{
|
129 |
+
'stage' : 'POST_PROCESS',
|
130 |
+
'response' : {},
|
131 |
+
'time' : 0
|
132 |
+
},
|
133 |
+
{
|
134 |
+
'stage' : 'ANALYSE',
|
135 |
+
'response' : [],
|
136 |
+
'time' : 0
|
137 |
+
},
|
138 |
+
{
|
139 |
+
'stage' : 'ANALYSIS_SUMMARY',
|
140 |
+
'response' : {'Good' : '', 'Average' : '', 'Bad' : ''},
|
141 |
+
'time' : 0
|
142 |
+
},
|
143 |
+
# {
|
144 |
+
# 'stage' : 'SUGGEST',
|
145 |
+
# 'response' : '',
|
146 |
+
# 'time' : 0
|
147 |
+
# }
|
148 |
+
]
|
149 |
+
return response
|
150 |
+
|
151 |
+
else:
|
152 |
+
response = [
|
153 |
+
{
|
154 |
+
'stage' : 'OCR',
|
155 |
+
'response' : text,
|
156 |
+
'time' : ocr_end - ocr_start
|
157 |
+
}
|
158 |
+
]
|
159 |
+
try:
|
160 |
+
print('Analysing ...')
|
161 |
+
analysis_start = time()
|
162 |
+
raw_response = call_openai(ANALYSIS_PROMPT + 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates")
|
163 |
+
analysis_end = time()
|
164 |
+
print(f'Analysed [{analysis_end - analysis_start}]')
|
165 |
+
if raw_response is not None:
|
166 |
+
response.append(
|
167 |
+
{
|
168 |
+
'stage' : 'ANALYSE',
|
169 |
+
'response' : raw_response,
|
170 |
+
'time' : analysis_end - analysis_start
|
171 |
+
}
|
172 |
+
)
|
173 |
+
print('Suggesting our policy ...')
|
174 |
+
suggestion_start = time()
|
175 |
+
suggestion = call_openai(SUGGESTION_PROMPT + "\nCustomer Policy Analysis : " + raw_response + "\nAcko's Policy : " + ACKO_POLICY)
|
176 |
+
suggestion_end = time()
|
177 |
+
print(f'Suggested [{suggestion_end - suggestion_start}]')
|
178 |
+
if suggestion is not None:
|
179 |
+
response.append({
|
180 |
+
'stage' : 'SUGGEST',
|
181 |
+
'response' : suggestion,
|
182 |
+
'time' : suggestion_end - suggestion_start
|
183 |
+
}
|
184 |
+
)
|
185 |
+
return response
|
186 |
+
except Exception as e:
|
187 |
+
print(e)
|
188 |
+
response.extend(
|
189 |
+
[
|
190 |
+
{
|
191 |
+
'stage' : 'ANALYSE',
|
192 |
+
'response' : '',
|
193 |
+
'time' : 0
|
194 |
+
},
|
195 |
+
{
|
196 |
+
'stage' : 'SUGGEST',
|
197 |
+
'response' : '',
|
198 |
+
'time' : 0
|
199 |
+
}
|
200 |
+
]
|
201 |
+
)
|
202 |
+
return response
|
203 |
+
|
204 |
+
if __name__ == '__main__':
|
205 |
+
import os
|
206 |
+
import json
|
207 |
+
import sys
|
208 |
+
from tqdm import tqdm
|
209 |
+
filepaths = sys.argv[1:]
|
210 |
+
|
211 |
+
for filepath in tqdm(filepaths):
|
212 |
+
# if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
|
213 |
+
# continue
|
214 |
+
if '.analysis' in filepath or '.e2e-analysis' in filepath:
|
215 |
+
continue
|
216 |
+
print(filepath)
|
217 |
+
if filepath.endswith('.pdf'):
|
218 |
+
file_bytes = open(filepath, 'rb').read()
|
219 |
+
elif filepath.endswith(('.txt', '.md')):
|
220 |
+
file_bytes = open(filepath).read()
|
221 |
+
end2end = True
|
222 |
+
analysis = analyse(file_bytes, True)
|
223 |
+
# print(analysis)
|
224 |
+
basepath = os.path.splitext(filepath)[0]
|
225 |
+
if not end2end:
|
226 |
+
with open(os.path.splitext(filepath)[0] + '.analysis.json', 'w') as f:
|
227 |
+
json.dump(analysis, f, indent = 4)
|
228 |
+
else:
|
229 |
+
with open(os.path.splitext(filepath)[0] + '.e2e-analysis.json', 'w') as f:
|
230 |
+
json.dump(analysis, f, indent = 4)
|
231 |
+
with open(os.path.splitext(filepath)[0] + '.e2e-analysis.md', 'w') as f:
|
232 |
+
f.write(analysis[1]['response'])
|
policy_analyser/data/Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|
policy_analyser/data/__init__.py
ADDED
File without changes
|
policy_analyser/data/policy_analyser_entities.json
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"entityId": 0,
|
4 |
+
"entityName": "Sum Insured (SI)",
|
5 |
+
"entityDesc": "Total sum insured according to selected insurance plan",
|
6 |
+
"expectedOutputFormat": "float"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"entityId": 1,
|
10 |
+
"entityName": "Pre-existing diseases (PED) Waiting period",
|
11 |
+
"entityDesc": "Waiting period on any pre-existing diseases after the policy start date for the selected insurance plan",
|
12 |
+
"expectedOutputFormat": "number of months in float"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"entityId": 2,
|
16 |
+
"entityName": "30-Day Waiting Period",
|
17 |
+
"entityDesc": "Is there waiting period on first 30 days after the policy start date for selected insurance plan?",
|
18 |
+
"expectedOutputFormat": "true/false"
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"entityId": 3,
|
22 |
+
"entityName": "Specific Illness Waiting Period",
|
23 |
+
"entityDesc": "Waiting period on any specific illnesses after the policy start date for selected insurance plan",
|
24 |
+
"expectedOutputFormat": "number of months in float"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"entityId": 4,
|
28 |
+
"entityName": "Maternity waiting period",
|
29 |
+
"entityDesc": "Waiting period on maternity treatments and causes after policy start date for selected insurance plan",
|
30 |
+
"expectedOutputFormat": "number of months in float"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"entityId": 5,
|
34 |
+
"entityName": "Exclusions",
|
35 |
+
"entityDesc": "Diseases/procedures/treatments etc that are excluded from coverage of the policy under selected insurance plan",
|
36 |
+
"expectedOutputFormat": "stringified JSON list"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"entityId": 6,
|
40 |
+
"entityName": "Maternity benefits",
|
41 |
+
"entityDesc": "Are there any additional benefits for maternity illnesses, treatments or procedures under selected insurance plan?",
|
42 |
+
"expectedOutputFormat": "true/false"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"entityId": 7,
|
46 |
+
"entityName": "OPD",
|
47 |
+
"entityDesc": "Amount covering outpatient consultations like physician visits, medicines, etc for the customer under selected insurance plan",
|
48 |
+
"expectedOutputFormat": "float"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"entityId": 8,
|
52 |
+
"entityName": "Copay",
|
53 |
+
"entityDesc": "% of amount that a customer must bear themselves and remainder is paid by the insurer under selected insurance plan",
|
54 |
+
"expectedOutputFormat": "float"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"entityId": 9,
|
58 |
+
"entityName": "Deductible",
|
59 |
+
"entityDesc": "Amount above which the Sum Insured can be claimed in case of super topup for selected insurance plan",
|
60 |
+
"expectedOutputFormat": "float"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"entityId": 10,
|
64 |
+
"entityName": "Daycare treatment",
|
65 |
+
"entityDesc": "Daycare treatments and procedures where hospitalization may be required for < 24 hours under selected insurance plan",
|
66 |
+
"expectedOutputFormat": "stringified JSON list"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"entityId": 11,
|
70 |
+
"entityName": "Free Health checkup",
|
71 |
+
"entityDesc": "Are free full body health checkups offered under selected insurance plan?",
|
72 |
+
"expectedOutputFormat": "true/false"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"entityId": 12,
|
76 |
+
"entityName": "Restoration benefit",
|
77 |
+
"entityDesc": "Does insured sum get restored after if it gets exhausted for selected insurance plan?",
|
78 |
+
"expectedOutputFormat": "true/false"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"entityId": 13,
|
82 |
+
"entityName": "Sublimits",
|
83 |
+
"entityDesc": "Coverage amount on a claim for specific diseases and treatments under selected insurance plan. Extract all sublimits as JSON list of dictionaries of following schema: [{\"sublimit_name\": \"Name of sublimit\", \"sublimit_value\": Value of sublimit in float}]",
|
84 |
+
"expectedOutputFormat": "stringified JSON list"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"entityId": 14,
|
88 |
+
"entityName": "Room rent limit (proportionate deduction)",
|
89 |
+
"entityDesc": "% of total sum insured or amount of total sum insured that insurer shall pay for hospitalisation room rent for selected insurance plan. If given upto SI, find the amount or percentage.",
|
90 |
+
"expectedOutputFormat": "float or string"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"entityId": 15,
|
94 |
+
"entityName": "Pre & Post Hospitalization",
|
95 |
+
"entityDesc": "Are medical expenses leading to and after hospitalization covered under selected insurance plan?",
|
96 |
+
"expectedOutputFormat": "true/false"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"entityId": 16,
|
100 |
+
"entityName": "Domiciliary Cover",
|
101 |
+
"entityDesc": "Is home treatment covered under selected insurance plan?",
|
102 |
+
"expectedOutputFormat": "true/false"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"entityId": 17,
|
106 |
+
"entityName": "No claim bonus",
|
107 |
+
"entityDesc": "Reward offered to customer for selected insurance plan for not raising any claims in the past",
|
108 |
+
"expectedOutputFormat": "float"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"entityId": 18,
|
112 |
+
"entityName": "Ambulance cover",
|
113 |
+
"entityDesc": "Coverage for ambulance charges under selected insurance plan",
|
114 |
+
"expectedOutputFormat": "float"
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"entityId": 19,
|
118 |
+
"entityName": "International coverage",
|
119 |
+
"entityDesc": "Are treatments administered outside India covered under selected insurance plan?",
|
120 |
+
"expectedOutputFormat": "true/false"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"entityId": 20,
|
124 |
+
"entityName": "Dental treatment",
|
125 |
+
"entityDesc": "Coverage for dental treatments and procedures under selected insurance plan",
|
126 |
+
"expectedOutputFormat": "float"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"entityId": 21,
|
130 |
+
"entityName": "AYUSH treatment",
|
131 |
+
"entityDesc": "Are Ayurvedic, Homeopathic and other alternative treatments covered under selected insurance plan?",
|
132 |
+
"expectedOutputFormat": "true/false"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"entityId": 22,
|
136 |
+
"entityName": "Health incentives",
|
137 |
+
"entityDesc": "Are any benefits for healthy habits offered under selected insurance plan?",
|
138 |
+
"expectedOutputFormat": "true/false"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"entityId": 23,
|
142 |
+
"entityName": "Wellness Services",
|
143 |
+
"entityDesc": "Are any complementary services and benefits offered under selected insurance plan?",
|
144 |
+
"expectedOutputFormat": "true/false"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"entityId": 24,
|
148 |
+
"entityName": "Consumables/ Non medical expenses",
|
149 |
+
"entityDesc": "Are expenses related to consumables like syringes, bandages, etc in the course of treatment covered under selected insurance plan?",
|
150 |
+
"expectedOutputFormat": "true/false"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"entityId": 25,
|
154 |
+
"entityName": "Hospital Cash",
|
155 |
+
"entityDesc": "Are amounts paid in hospital bills reimbursed under selected insurance plan?",
|
156 |
+
"expectedOutputFormat": "true/false"
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"entityId": 26,
|
160 |
+
"entityName": "Policy Holder's Details",
|
161 |
+
"entityDesc": "Details of primary policy holder as a JSON dictionary of the following schema : {\"name\": \"Name of primary policy holder\", \"age\": \"Age of primary policy holder in float number\", \"date_of_birth\": \"Date of birth of primary policy holder in dd/mm/yyyy format\", \"address\": \"Residential address of primary policy holder\", \"city\": \"City of residence of primary policy holder\", \"email_address\": \"Email address of primary policy holder\", \"phone_number\": \"Phone number of primary policy holder in integer or string\", \"pan\": \"Permanent Account Number (PAN) of primary policy holder\"}",
|
162 |
+
"expectedOutputFormat": "stringified JSON dictionary"
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"entityId": 27,
|
166 |
+
"entityName": "Insured Persons details",
|
167 |
+
"entityDesc": "Details of insured persons as a JSON list of dictionaries of the following schema : [{\"name\": \"Name of Insured person\", \"age\": \"Age of Insured person in float\", \"date_of_birth\": \"Date of birth of Insured person in dd/mm/yyyy format\", \"gender\": \"Gender of Insured person, one of [Male, Female, Other]\", \"relation\": \"Relationship of insured person with policy holder, one of [Self, Spouse, Mother, Father, Son, Daughter, Mother-in-law, Father-in-law]\", \"pre_existing_diseases\": \"Pre-existing diseases that Insured person has. Separate several diseases with comma in JSON list\"}]",
|
168 |
+
"expectedOutputFormat": "stringified JSON list"
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"entityId": 28,
|
172 |
+
"entityName": "Policy Details",
|
173 |
+
"entityDesc": "Details of the selected insurance plan policy issued to customer as a JSON dictionary of the following schema : {\"insurance_company_name\": \"Name of the insurance company that has issued the given insurance policy\", \"policy_name\": \"Name of the given insurance policy as string\", \"policy_number\": \"Policy number of the given insurance policy\", \"date_of_issue\": \"Date on which the policy was issued to the policy holder in dd/mm/yyy format\", \"policy_start_date\": \"Date on which policy activated in dd/mm/yyyy format\", \"policy_end_date\": \"Date on which policy expires in dd/mm/yyyy format\", \"renewal_date\": \"Date on which policy was renewed in dd/mm/yyyy format\"}",
|
174 |
+
"expectedOutputFormat": "stringified JSON dictionary"
|
175 |
+
}
|
176 |
+
]
|
policy_analyser/extraction.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Extraction
|
3 |
+
@author : Sakshi Tantak
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Imports
|
7 |
+
import os
|
8 |
+
import re
|
9 |
+
import json
|
10 |
+
|
11 |
+
from policy_analyser import EXTRACTION_PROMPT
|
12 |
+
from policy_analyser.llm import call_openai
|
13 |
+
|
14 |
+
|
15 |
+
def extract(text):
|
16 |
+
raw_response = ''
|
17 |
+
try:
|
18 |
+
raw_response = call_openai(EXTRACTION_PROMPT + '\nPolicy Document : ' + text)
|
19 |
+
print(raw_response)
|
20 |
+
response = re.sub(r'`|json', '', raw_response)
|
21 |
+
try:
|
22 |
+
response = json.loads(response)
|
23 |
+
response = [{'entityName' : k, 'entityValue' : v} for k, v in response.items()]
|
24 |
+
except:
|
25 |
+
response = []
|
26 |
+
except Exception as e:
|
27 |
+
print(e)
|
28 |
+
response = []
|
29 |
+
return raw_response, response
|
policy_analyser/llm.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Call OpenAI
|
3 |
+
@author : Sakshi Tantak
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Imports
|
7 |
+
import os
|
8 |
+
|
9 |
+
from openai import AzureOpenAI
|
10 |
+
|
11 |
+
from policy_analyser import GPT_ENGINE, GPT_API_BASE, GPT_KEY, GPT_VERSION
|
12 |
+
|
13 |
+
CLIENT = AzureOpenAI(
|
14 |
+
azure_endpoint = GPT_API_BASE,
|
15 |
+
api_key = GPT_KEY,
|
16 |
+
api_version = GPT_VERSION
|
17 |
+
)
|
18 |
+
|
19 |
+
def call_openai(system_prompt, seed = 42):
|
20 |
+
print('Calling openai')
|
21 |
+
# messages = [{'role' : 'system', 'content' : system_prompt},
|
22 |
+
# {'role' : 'user', 'content' : document}]
|
23 |
+
messages = [{'role' : 'user', 'content' : system_prompt}]
|
24 |
+
response = CLIENT.chat.completions.create(
|
25 |
+
model = GPT_ENGINE,
|
26 |
+
messages = messages,
|
27 |
+
# response_format = response_format,
|
28 |
+
reasoning_effort = 'low'
|
29 |
+
)
|
30 |
+
|
31 |
+
return response.choices[0].message.content
|
policy_analyser/ocr.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
OCR
|
3 |
+
@author : Sakshi Tantak
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Imports
|
7 |
+
import json
|
8 |
+
|
9 |
+
from azure.core.credentials import AzureKeyCredential
|
10 |
+
from azure.ai.formrecognizer import DocumentAnalysisClient
|
11 |
+
import pymupdf4llm, pymupdf
|
12 |
+
|
13 |
+
from policy_analyser import CREDENTIALS
|
14 |
+
|
15 |
+
def convert_nested_complex_obj_to_json(result):
|
16 |
+
result = json.loads(json.dumps(result, default = lambda o : o.__dict__))
|
17 |
+
return result
|
18 |
+
|
19 |
+
class AzureLayoutOCR:
|
20 |
+
def __init__(self):
|
21 |
+
self.client = self._authenticate()
|
22 |
+
self.engine = 'azure/layout'
|
23 |
+
|
24 |
+
def _authenticate(self):
|
25 |
+
client = DocumentAnalysisClient(
|
26 |
+
endpoint=CREDENTIALS['azure']['layout']['endpoint'],
|
27 |
+
credential=AzureKeyCredential(CREDENTIALS['azure']['layout']['key']),
|
28 |
+
connection_verify=False
|
29 |
+
)
|
30 |
+
return client
|
31 |
+
|
32 |
+
def _table2md(self, table, **kwargs):
|
33 |
+
row_count, column_count = table['row_count'], table['column_count']
|
34 |
+
cells = table['cells']
|
35 |
+
|
36 |
+
markdown_table = []
|
37 |
+
table_offsets = (table['spans'][0]['offset'], table['spans'][-1]['offset'] + table['spans'][-1]['length'])
|
38 |
+
|
39 |
+
for _ in range(row_count + 1):
|
40 |
+
row = [''] * column_count
|
41 |
+
markdown_table.append(row)
|
42 |
+
|
43 |
+
header_row_idx = [0]
|
44 |
+
for cell in cells:
|
45 |
+
row_index = cell['row_index']
|
46 |
+
if cell['kind'] == 'columnHeader':
|
47 |
+
# Headers are in the first row of markdown_table, which is row_index 0
|
48 |
+
markdown_table[row_index + 1][cell['column_index']] = '**' + cell['content'].replace('|', '') + '**'
|
49 |
+
header_row_idx.append(row_index + 1)
|
50 |
+
else:
|
51 |
+
# Content cells are offset by 1 due to headers
|
52 |
+
markdown_table[row_index + 1][cell['column_index']] = cell['content'].replace('|', '')
|
53 |
+
|
54 |
+
markdown_output = ''
|
55 |
+
for row in markdown_table:
|
56 |
+
markdown_output += '| ' + ' | '.join(row) + ' |\n'
|
57 |
+
if markdown_table.index(row) in header_row_idx:
|
58 |
+
# if markdown_table.index(row) == 0:
|
59 |
+
# Add a separator after the header
|
60 |
+
markdown_output += '| ' + ' | '.join(['---'] * column_count) + ' |\n'
|
61 |
+
|
62 |
+
return markdown_output, table_offsets
|
63 |
+
|
64 |
+
def _paragraphs2md(self, paragraph, element_offsets, **kwargs):
|
65 |
+
paragraph_offsets = (
|
66 |
+
paragraph['spans'][0]['offset'], paragraph['spans'][-1]['offset'] + paragraph['spans'][-1]['length'])
|
67 |
+
for offset in element_offsets:
|
68 |
+
if paragraph_offsets[0] >= offset[0] and paragraph['spans'][0]['offset'] <= offset[1]:
|
69 |
+
return None, None
|
70 |
+
|
71 |
+
markdown_text = ''
|
72 |
+
|
73 |
+
if paragraph['role'] == 'title':
|
74 |
+
markdown_text += f'# {paragraph["content"]}'
|
75 |
+
elif paragraph == "sectionHeading":
|
76 |
+
markdown_text += f'## {paragraph["content"]}'
|
77 |
+
else:
|
78 |
+
markdown_text += f'{paragraph["content"]}'
|
79 |
+
return markdown_text, paragraph_offsets
|
80 |
+
|
81 |
+
def _stitch_paragraphs_elements(self, paragraphs, elements, **kwargs):
|
82 |
+
new_list = paragraphs + elements
|
83 |
+
sorted_new_list = sorted(new_list, key=lambda x: x['offset'][0])
|
84 |
+
return sorted_new_list
|
85 |
+
|
86 |
+
def _convert2md(self, result, **kwargs):
|
87 |
+
paragraphs, tables = result['paragraphs'], result['tables']
|
88 |
+
md_tables = []
|
89 |
+
for table in tables:
|
90 |
+
md, offset = self._table2md(table, requestId=kwargs.get('requestId'))
|
91 |
+
md_tables.append({'content': md, 'offset': offset})
|
92 |
+
|
93 |
+
table_offsets = [element['offset'] for element in md_tables]
|
94 |
+
md_paragraphs = []
|
95 |
+
|
96 |
+
for para in paragraphs:
|
97 |
+
md, offset = self._paragraphs2md(para, table_offsets, requestId=kwargs.get('requestId'))
|
98 |
+
if md is not None:
|
99 |
+
md_paragraphs.append({'content': md, 'offset': offset})
|
100 |
+
|
101 |
+
all_md_elements = self._stitch_paragraphs_elements(md_paragraphs, md_tables, requestId=kwargs.get('requestId'))
|
102 |
+
full_md = '\n\n'.join([record['content'] for record in all_md_elements])
|
103 |
+
return full_md
|
104 |
+
|
105 |
+
def _call_engine(self, image_reader, **kwargs):
|
106 |
+
poller = self.client.begin_analyze_document(
|
107 |
+
CREDENTIALS['azure']['layout']['model'],
|
108 |
+
image_reader
|
109 |
+
)
|
110 |
+
result = poller.result()
|
111 |
+
|
112 |
+
result = convert_nested_complex_obj_to_json(result)
|
113 |
+
md_text = self._convert2md(result, requestId=kwargs.get('requestId'))
|
114 |
+
|
115 |
+
return md_text, result
|
116 |
+
|
117 |
+
def __call__(self, file_bytes):
|
118 |
+
text, raw_response = self._call_engine(file_bytes)
|
119 |
+
return text, raw_response
|
120 |
+
|
121 |
+
class PyMuPDF4LLMOCR:
|
122 |
+
def __init__(self):
|
123 |
+
self.engine = 'open-source/pymupdf4llm'
|
124 |
+
self.file_type = 'pdf'
|
125 |
+
|
126 |
+
def _create_document(self, file_bytes, file_type = None):
|
127 |
+
return pymupdf.open(stream = file_bytes, filetype = self.file_type if file_type is None else file_type)
|
128 |
+
|
129 |
+
def __call__(self, file_bytes, file_type = None):
|
130 |
+
document = self._create_document(file_bytes, file_type)
|
131 |
+
response = pymupdf4llm.to_markdown(document)
|
132 |
+
return response, None
|
133 |
+
|
134 |
+
if __name__ == '__main__':
|
135 |
+
import sys
|
136 |
+
filepath = sys.argv[1]
|
137 |
+
file_bytes = open(filepath, 'rb').read()
|
138 |
+
ocr = AzureLayoutOCR()
|
139 |
+
text, raw_response = ocr(file_bytes)
|
140 |
+
print(text)
|
policy_analyser/prompts/__init__.py
ADDED
File without changes
|
policy_analyser/prompts/analysis.txt
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Given the markdown content of a customer's health insurance policy, analyse the insurance policy for the customer by applying given rules for specific factors of the policy.
|
2 |
+
|
3 |
+
Apply the following rules enclosed in triple backticks on the policy to analyse it.
|
4 |
+
Make sure you are consider values for analysis factors on basis of customer's selected insurance plan when multiple plans are described in the policy terms.
|
5 |
+
Make sure all factors appear in one of Good, Average or Bad only. No factor should be repeated in more than 1 verdict table
|
6 |
+
Note : Top cities = [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]
|
7 |
+
```
|
8 |
+
IF Adults == 1:
|
9 |
+
IF Is_Top_City:
|
10 |
+
IF Sum_Insured >= 2500000:
|
11 |
+
Verdict = "Good"
|
12 |
+
ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
|
13 |
+
Verdict = "Average"
|
14 |
+
ELSE:
|
15 |
+
Verdict = "Bad"
|
16 |
+
ELSE:
|
17 |
+
IF Sum_Insured >= 1000000:
|
18 |
+
Verdict = "Good"
|
19 |
+
ELSE IF Sum_Insured >= 500000 AND Sum_Insured < 1000000:
|
20 |
+
Verdict = "Average"
|
21 |
+
ELSE:
|
22 |
+
Verdict = "Bad"
|
23 |
+
|
24 |
+
IF Adults >= 2:
|
25 |
+
IF Children == 0:
|
26 |
+
IF Is_Top_City:
|
27 |
+
IF Sum_Insured >= 5000000:
|
28 |
+
Verdict = "Good"
|
29 |
+
ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
|
30 |
+
Verdict = "Average"
|
31 |
+
ELSE:
|
32 |
+
Verdict = "Bad"
|
33 |
+
ELSE:
|
34 |
+
IF Sum_Insured >= 2500000:
|
35 |
+
Verdict = "Good"
|
36 |
+
ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
|
37 |
+
Verdict = "Average"
|
38 |
+
ELSE:
|
39 |
+
Verdict = "Bad"
|
40 |
+
|
41 |
+
IF Children >= 1:
|
42 |
+
IF Children > 1 OR Is_Top_City:
|
43 |
+
IF Sum_Insured >= 10000000:
|
44 |
+
Verdict = "Good"
|
45 |
+
ELSE IF Sum_Insured >= 5000000 AND Sum_Insured < 10000000:
|
46 |
+
Verdict = "Average"
|
47 |
+
ELSE:
|
48 |
+
Verdict = "Bad"
|
49 |
+
ELSE:
|
50 |
+
IF Sum_Insured >= 5000000:
|
51 |
+
Verdict = "Good"
|
52 |
+
ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
|
53 |
+
Verdict = "Average"
|
54 |
+
ELSE:
|
55 |
+
Verdict = "Bad"
|
56 |
+
|
57 |
+
# Room Rent Limit
|
58 |
+
IF Room_Rent_Limit > 0:
|
59 |
+
Verdict = "Bad"
|
60 |
+
ELSE:
|
61 |
+
Verdict = "Good"
|
62 |
+
|
63 |
+
# Deductibles
|
64 |
+
IF Deductible > 0:
|
65 |
+
Verdict = "Bad"
|
66 |
+
ELSE:
|
67 |
+
Verdict = "Good"
|
68 |
+
|
69 |
+
# Sublimits
|
70 |
+
IF Sublimits == EMPTY:
|
71 |
+
Verdict = "Good"
|
72 |
+
ELSE:
|
73 |
+
Verdict = "Bad"
|
74 |
+
|
75 |
+
# Copayment
|
76 |
+
IF Copay <= 5:
|
77 |
+
Verdict = "Good"
|
78 |
+
ELSE IF Copay > 5 AND Copay <= 10:
|
79 |
+
Verdict = "Average"
|
80 |
+
ELSE:
|
81 |
+
Verdict = "Bad"
|
82 |
+
|
83 |
+
# Pre-existing Diseases (PED) Waiting Period
|
84 |
+
IF PED_Waiting_Period > 0:
|
85 |
+
IF Policy_Age > PED_Waiting_Period:
|
86 |
+
Verdict = "Good"
|
87 |
+
ELSE:
|
88 |
+
Verdict = "Bad"
|
89 |
+
ELSE:
|
90 |
+
Verdict = "Good"
|
91 |
+
|
92 |
+
# 30-Day Waiting Period
|
93 |
+
IF Thirty_Day_Waiting_Period:
|
94 |
+
IF Policy_Age > 1:
|
95 |
+
Verdict = "Good"
|
96 |
+
ELSE:
|
97 |
+
Verdict = "Bad"
|
98 |
+
ELSE:
|
99 |
+
Verdict = "Good"
|
100 |
+
|
101 |
+
# Specific Illness Waiting Period
|
102 |
+
IF Specific_Illness_Waiting_Period > 0:
|
103 |
+
IF Policy_Age > Specific_Illness_Waiting_Period:
|
104 |
+
Verdict = "Good"
|
105 |
+
ELSE:
|
106 |
+
Verdict = "Bad"
|
107 |
+
ELSE:
|
108 |
+
Verdict = "Good"
|
109 |
+
|
110 |
+
# Maternity Benefits
|
111 |
+
IF Maternity_Benefits:
|
112 |
+
Verdict = "Good"
|
113 |
+
IF Maternity_Waiting_Period > 0:
|
114 |
+
IF Policy_Age > Maternity_Waiting_Period:
|
115 |
+
Verdict = "Good"
|
116 |
+
ELSE:
|
117 |
+
Verdict = "Bad"
|
118 |
+
ELSE:
|
119 |
+
Verdict = "Good"
|
120 |
+
ELSE:
|
121 |
+
Verdict = "Bad"
|
122 |
+
```
|
123 |
+
|
124 |
+
Format your response in the following way, to present analysis to customer. Use appropriate language and emojis to portray analysis and verdicts to the customer. Be discrete about rules, do not expose rules to customer but use them to explain reasoning and analysis:
|
125 |
+
|
126 |
+
[CUSTOMER_RESPONSE]
|
127 |
+
# Our Analysis of your policy [Name of policy] by [Name of insurance company]
|
128 |
+
## Good Factors
|
129 |
+
| Factor | Your policy | Our Analysis |
|
130 |
+
| --- | --- | --- |
|
131 |
+
| Sum Insured | Value of sum insred in the policy | Analysis of why sum insured is good based on the given rules |
|
132 |
+
|
133 |
+
## Average Factors
|
134 |
+
| Factor | Your policy | Our Analysis |
|
135 |
+
| --- | --- | --- |
|
136 |
+
| Copay | Value of copayment in the policy | Analysis of why copay is average based on the given rules |
|
137 |
+
|
138 |
+
## Bad Factors
|
139 |
+
| Factor | Your policy | Our Analysis |
|
140 |
+
| --- | --- | --- |
|
141 |
+
| Deductible | Value of deductible in the policy | Analysis of why deductible is bad based on the given rules |
|
142 |
+
|
143 |
+
[/CUSTOMER_RESPONSE]
|
policy_analyser/prompts/extraction.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Given markdown content of a policy document, extract given list of entities.
|
2 |
+
|
3 |
+
Format your response as a JSON dictionary exactly in the following schema:
|
4 |
+
```
|
5 |
+
{
|
6 |
+
"Name of entity from entities list" : "Entity value for entity name in `expectedOutputFormat` based on entity description given in `entityDesc`"
|
7 |
+
}
|
8 |
+
```
|
9 |
+
If any numerical or quantitative entities are absent in the document, default entityValue to 0.
|
10 |
+
|
11 |
+
Entities:
|
policy_analyser/prompts/suggest.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
A customer's insurance policy was analysed by you. Using the analysis and Acko's health insurance policy, pitch Acko's policy to the customer by comparing it with their policy on the basis of the analysis and reasoning on the advantages and disadvantages of their policy.
|
2 |
+
Format your response in the following format in less than 150 words.
|
3 |
+
Format in bullet points with respect to comparing factors and include catchy emojis wherever required:
|
4 |
+
|
5 |
+
[POLICY_PITCH] Policy pitch [/POLICY_PITCH]
|
policy_analyser/rules.py
ADDED
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Rules for policy analyser
|
3 |
+
@author : Sakshi Tantak
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Imports
|
7 |
+
import json
|
8 |
+
from datetime import datetime
|
9 |
+
|
10 |
+
from policy_analyser.llm import call_openai
|
11 |
+
|
12 |
+
def prepare_payload(extraction):
|
13 |
+
payload = {
|
14 |
+
'Sum Insured (SI)' : 0,
|
15 |
+
'Pre-existing diseases (PED) Waiting period' : 0,
|
16 |
+
'30-Day Waiting Period' : False,
|
17 |
+
'Specific Illness Waiting Period' : 0,
|
18 |
+
'Maternity waiting period' : 0,
|
19 |
+
'Exclusions' : [],
|
20 |
+
'Maternity benefits' : False,
|
21 |
+
'OPD' : 0,
|
22 |
+
'Copay' : 0,
|
23 |
+
'Deductible' : 0,
|
24 |
+
'Daycare treatment' : [],
|
25 |
+
'Free Health checkup' : False,
|
26 |
+
'Restoration benefit' : False,
|
27 |
+
'Sublimits' : [],
|
28 |
+
'Room rent limit (proportionate deduction)' : 100,
|
29 |
+
'Pre & Post Hospitalization' : False,
|
30 |
+
'Domiciliary Cover' : False,
|
31 |
+
'No claim bonus' : 0,
|
32 |
+
'Ambulance cover' : 0,
|
33 |
+
'International coverage' : False,
|
34 |
+
'Dental treatment' : 0,
|
35 |
+
'AYUSH treatment' : False,
|
36 |
+
'Health incentives' : False,
|
37 |
+
'Wellness Services' : False,
|
38 |
+
'Consumables/ Non medical expenses' : False,
|
39 |
+
'Hospital Cash' : False,
|
40 |
+
'Adults' : 0,
|
41 |
+
'Children' : 0,
|
42 |
+
'City' : '',
|
43 |
+
'Is Top City' : True,
|
44 |
+
'Policy Age' : 0
|
45 |
+
}
|
46 |
+
num_adults, num_children, is_top_city = 0, 0, True
|
47 |
+
today = datetime.today()
|
48 |
+
for entity in extraction:
|
49 |
+
if entity['entityName'] in ['Exclusions', 'Daycare treatment', 'Sublimits']:
|
50 |
+
try:
|
51 |
+
value = json.loads(entity['entityValue'])
|
52 |
+
payload[entity['entityName']] = value
|
53 |
+
except:
|
54 |
+
pass
|
55 |
+
|
56 |
+
if entity['entityName'] == "Policy Holder's Details":
|
57 |
+
value = entity['entityValue']
|
58 |
+
city = ''
|
59 |
+
try:
|
60 |
+
value = json.loads(value)
|
61 |
+
if 'city' in value:
|
62 |
+
city = value['city']
|
63 |
+
try:
|
64 |
+
response = call_openai('Does a given city string belong to set of given cities : [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]. Answer in true/false only', city)
|
65 |
+
is_top_city = True if response == 'true' else False
|
66 |
+
except:
|
67 |
+
pass
|
68 |
+
except:
|
69 |
+
pass
|
70 |
+
payload['Is Top City'] = is_top_city
|
71 |
+
payload['City'] = city
|
72 |
+
|
73 |
+
if entity['entityName'] == 'Insured Persons details':
|
74 |
+
value = entity['entityValue']
|
75 |
+
try:
|
76 |
+
value = json.loads(value)
|
77 |
+
for person in value:
|
78 |
+
if 'date_of_birth' in person:
|
79 |
+
dob = person['date_of_birth']
|
80 |
+
dob = datetime.strptime(dob, '%d/%m/%Y')
|
81 |
+
age = (today - dob).days / 365
|
82 |
+
elif 'age' in person:
|
83 |
+
age = person['age']
|
84 |
+
|
85 |
+
if age >= 18:
|
86 |
+
num_adults += 1
|
87 |
+
else:
|
88 |
+
num_children += 1
|
89 |
+
except:
|
90 |
+
num_adults = 1
|
91 |
+
payload['Adults'] = num_adults
|
92 |
+
payload['Children'] = num_children
|
93 |
+
|
94 |
+
if entity['entityName'] == 'Policy Details':
|
95 |
+
try:
|
96 |
+
value = json.loads(entity['entityValue'])
|
97 |
+
if 'policy_start_date' in value:
|
98 |
+
payload['Policy Age'] = ((today - datetime.strptime(value['policy_start_date'], '%d/%m/%Y')).days / 365) * 12
|
99 |
+
except:
|
100 |
+
pass
|
101 |
+
|
102 |
+
if entity['entityName'] in ['Sum Insured (SI)', 'Pre-existing diseases (PED) Waiting period', 'Specific Illness Waiting Period',
|
103 |
+
'Maternity waiting period', 'OPD', 'Copay', 'Deductible', 'No claim bonus', 'Ambulance cover',
|
104 |
+
'Dental treatment', 'Room rent limit (proportionate deduction)']:
|
105 |
+
value = entity['entityValue']
|
106 |
+
if isinstance(value, (float, int)):
|
107 |
+
payload[entity['entityName']] = value
|
108 |
+
else:
|
109 |
+
try:
|
110 |
+
value = float(value)
|
111 |
+
payload[entity['entityName']] = value
|
112 |
+
except:
|
113 |
+
pass
|
114 |
+
|
115 |
+
if entity['entityName'] in ['30-Day Waiting Period', 'Maternity benefits', 'Free Health checkup',
|
116 |
+
'Restoration benefit', 'Pre & Post Hospitalization', 'Domiciliary Cover',
|
117 |
+
'International coverage', 'AYUSH treatment', 'Health incentives', 'Wellness Services',
|
118 |
+
'Consumables/ Non medical expenses', 'Hospital Cash']:
|
119 |
+
value = entity['entityValue']
|
120 |
+
if isinstance(value, bool):
|
121 |
+
payload[entity['entityName']] = value
|
122 |
+
else:
|
123 |
+
payload[entity['entityName']] = True if 'true' in value else False
|
124 |
+
|
125 |
+
return payload
|
126 |
+
|
127 |
+
def rules(payload):
|
128 |
+
analysis = []
|
129 |
+
if payload['Adults'] == 1:
|
130 |
+
if payload['Is Top City']:
|
131 |
+
if payload['Sum Insured (SI)'] >= 2500000:
|
132 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L for an adult in {payload["City"]}'
|
133 |
+
if payload['Sum Insured (SI)'] >= 1000000 and payload['Sum Insured (SI)'] < 2500000:
|
134 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L but > 10L for an adult in {payload["City"]}'
|
135 |
+
if payload['Sum Insured (SI)'] < 1000000:
|
136 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 10L for an adult in {payload["City"]}'
|
137 |
+
else:
|
138 |
+
if payload['Sum Insured (SI)'] >= 1000000:
|
139 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 10L for an adult in {payload["City"]}'
|
140 |
+
if payload['Sum Insured (SI)'] >= 500000 and payload['Sum Insured (SI)'] < 1000000:
|
141 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 5L but < 10L for an adult in {payload["City"]}'
|
142 |
+
if payload['Sum Insured (SI)'] < 500000:
|
143 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 5L for an adult in {payload["City"]}'
|
144 |
+
|
145 |
+
if payload['Adults'] >= 2:
|
146 |
+
if payload['Children'] == 0:
|
147 |
+
if payload['Is Top City']:
|
148 |
+
if payload['Sum Insured (SI)'] >= 5000000:
|
149 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L for {payload["Adults"]} adults in {payload["City"]}'
|
150 |
+
if payload['Sum Insured (SI)'] >= 2500000 and payload['Sum Insured (SI)'] < 5000000:
|
151 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L but < 50L for {payload["Adults"]} adults in {payload["City"]}'
|
152 |
+
if payload['Sum Insured (SI)'] < 2500000:
|
153 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L for {payload["Adults"]} adults in {payload["City"]}'
|
154 |
+
else:
|
155 |
+
if payload['Sum Insured (SI)'] >= 2500000:
|
156 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L for {payload["Adults"]} adults in {payload["City"]}'
|
157 |
+
if payload['Sum Insured (SI)'] >= 1000000 and payload['Sum Insured (SI)'] < 2500000:
|
158 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 10L but < 25L for {payload["Adults"]} adults in {payload["City"]}'
|
159 |
+
if payload['Sum Insured (SI)'] < 1000000:
|
160 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 10L for {payload["Adults"]} adults in {payload["City"]}'
|
161 |
+
|
162 |
+
if payload['Children'] >= 1:
|
163 |
+
if payload['Children'] > 1 or payload['Is Top City']:
|
164 |
+
if payload['Sum Insured (SI)'] >= 10000000:
|
165 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 1 CR for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
166 |
+
if payload['Sum Insured (SI)'] >= 5000000 and payload['Sum Insured (SI)'] < 10000000:
|
167 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L but < 1 CR for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
168 |
+
if payload['Sum Insured (SI)'] < 5000000:
|
169 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
170 |
+
else:
|
171 |
+
if payload['Sum Insured (SI)'] >= 5000000:
|
172 |
+
verdict, reason = 'Good', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
173 |
+
if payload['Sum Insured (SI)'] >= 2500000 and payload['Sum Insured (SI)'] < 5000000:
|
174 |
+
verdict, reason = 'Average', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) > 25L but < 50L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
175 |
+
if payload['Sum Insured (SI)'] < 2500000:
|
176 |
+
verdict, reason = 'Bad', f'Sum Insured (SI) ({payload["Sum Insured (SI)"]}) < 25L for {payload["Adults"]} adults & {payload["Children"]} children in {payload["City"]}'
|
177 |
+
analysis.append(
|
178 |
+
{
|
179 |
+
'factor' : 'Sum Insured (SI)',
|
180 |
+
'verdict' : verdict,
|
181 |
+
'reason' : reason
|
182 |
+
}
|
183 |
+
)
|
184 |
+
|
185 |
+
if payload['Room rent limit (proportionate deduction)'] > 0:
|
186 |
+
verdict, reason = 'Bad', f'There is cap of {payload["Room rent limit (proportionate deduction)"]} on room rent'
|
187 |
+
else:
|
188 |
+
verdict, reason = 'Good', 'There is no cap on room rent'
|
189 |
+
analysis.append({'factor' : 'Room rent limit (proportionate deduction)', 'verdict' : verdict, 'reason' : reason})
|
190 |
+
|
191 |
+
if payload['Deductible'] > 0:
|
192 |
+
verdict, reason = 'Bad', f'There is a deductible of {payload["Deductible"]}'
|
193 |
+
else:
|
194 |
+
verdict, reason = 'Good', 'No deductible'
|
195 |
+
analysis.append({'factor' : 'Deductible', 'verdict' : verdict, 'reason' : reason})
|
196 |
+
|
197 |
+
if payload['Sublimits'] == []:
|
198 |
+
verdict, reason = 'Good', 'There are no sublimits on any treatments or diseases'
|
199 |
+
else:
|
200 |
+
verdict = 'Bad'
|
201 |
+
sublimits_str = '\n'.join([f'{sublimit["sublimit_name"]}: {sublimit["sublimit_value"]}' for sublimit in payload['Sublimits']])
|
202 |
+
reason = f'Following sublimits were found in your policy:\n{sublimits_str}'
|
203 |
+
analysis.append({'factor' :'Sublimits', 'verdict' : verdict, 'reason' : reason})
|
204 |
+
|
205 |
+
if payload['Copay'] == 0 and payload['Copay'] <= 5:
|
206 |
+
verdict, reason = 'Good', f'Copayment ({payload["Copay"]}) < 5%'
|
207 |
+
elif payload['Copay'] > 5 and payload['Copay'] <= 10:
|
208 |
+
verdict, reason = 'Average', f'Copayment ({payload["Copay"]}) > 5% but < 10%'
|
209 |
+
elif payload['Copay'] > 10:
|
210 |
+
verdict, reason = 'Bad', f'Copayment (({payload["Copay"]})) > 10%'
|
211 |
+
analysis.append({'factor' : 'Copay', 'verdict' : verdict, 'reason' : reason})
|
212 |
+
|
213 |
+
if payload['Pre-existing diseases (PED) Waiting period'] > 0:
|
214 |
+
if payload['Policy Age'] > payload['Pre-existing diseases (PED) Waiting period']:
|
215 |
+
verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Pre-existing diseases (PED) Waiting period"]} months on pre-existing diseases but the waiting period has expired as of today'
|
216 |
+
else:
|
217 |
+
verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Pre-existing diseases (PED) Waiting period"]} months on pre-existing diseases which is yet to expire'
|
218 |
+
else:
|
219 |
+
verdict, reason = 'Good', f'Your policy has no waiting period on pre-existing diseases'
|
220 |
+
analysis.append({'factor' : 'Pre-existing diseases (PED) Waiting period', 'verdict' : verdict, 'reason' : reason})
|
221 |
+
|
222 |
+
if payload['30-Day Waiting Period']:
|
223 |
+
if payload['Policy Age'] > 1:
|
224 |
+
verdict, reason = 'Good', f'Your policy has a 30 day waiting period but it has expired as of today'
|
225 |
+
else:
|
226 |
+
verdict, reason = 'Bad', f'Your policy has a 30 day waiting period which is yet to expire'
|
227 |
+
else:
|
228 |
+
verdict, reason = 'Good', f'Your policy has no 30 day waiting period'
|
229 |
+
analysis.append({'factor' : '30-Day Waiting Period', 'verdict' : verdict, 'reason' : reason})
|
230 |
+
|
231 |
+
if payload['Specific Illness Waiting Period'] > 0:
|
232 |
+
if payload['Policy Age'] > payload['Specific Illness Waiting Period']:
|
233 |
+
verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Specific Illness Waiting Period"]} on specific illnesses but the waiting period has expired as of today'
|
234 |
+
else:
|
235 |
+
verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Specific Illness Waiting Period"]} on specific illnesses which is yet to expire'
|
236 |
+
else:
|
237 |
+
verdict, reason = 'Good', f'Your policy has no waiting period any on specific illnesses'
|
238 |
+
analysis.append({'factor' : 'Specific Illness Waiting Period', 'verdict' : verdict, 'reason' : reason})
|
239 |
+
|
240 |
+
if payload['Maternity benefits']:
|
241 |
+
analysis.append(
|
242 |
+
{
|
243 |
+
'factor' : 'Maternity benefits',
|
244 |
+
'verdict' : 'Good',
|
245 |
+
'reason' : 'Maternity benefits present, check waiting period'
|
246 |
+
}
|
247 |
+
)
|
248 |
+
if payload['Maternity waiting period'] > 0:
|
249 |
+
if payload['Policy Age'] > payload['Maternity waiting period']:
|
250 |
+
verdict, reason = 'Good', f'Your policy has a waiting period of {payload["Maternity waiting period"]} for maternity cases but it has expired as of today'
|
251 |
+
else:
|
252 |
+
verdict, reason = 'Bad', f'Your policy has a waiting period of {payload["Maternity waiting period"]} for maternity cases which is yet to expire'
|
253 |
+
else:
|
254 |
+
verdict, reason = 'Good', f'Your policy has a no waiting period for maternity cases'
|
255 |
+
analysis.append({'factor' : 'Maternity waiting period', 'verdict' : verdict, 'reason' : reason})
|
256 |
+
else:
|
257 |
+
analysis.append(
|
258 |
+
{
|
259 |
+
'factor' : 'Maternity benefits',
|
260 |
+
'verdict' : 'Bad',
|
261 |
+
'reason' : 'No maternity benefits'
|
262 |
+
}
|
263 |
+
)
|
264 |
+
|
265 |
+
return analysis
|
266 |
+
|
267 |
+
if __name__ == '__main__':
|
268 |
+
import json
|
269 |
+
import glob
|
270 |
+
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
|
271 |
+
for file in glob.glob(f'{dirpath}/*.analysis.json'):
|
272 |
+
json_data = json.load(open(file))
|
273 |
+
payload = prepare_payload(json_data[1]['response']['processed'])
|
274 |
+
json_data.append({
|
275 |
+
'stage' : 'POST_PROCESS',
|
276 |
+
'response' : payload,
|
277 |
+
'time' : 0
|
278 |
+
})
|
279 |
+
# print(json_data)
|
280 |
+
with open(file, 'w') as f:
|
281 |
+
json.dump(json_data, f, indent = 4)
|
styles.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
def apply_custom_styles():
|
4 |
+
st.markdown("""
|
5 |
+
<style>
|
6 |
+
.stApp {
|
7 |
+
max-width: 1200px;
|
8 |
+
margin: 0 auto;
|
9 |
+
background: linear-gradient(135deg, #f5f7fa 0%, #e4e9f2 100%);
|
10 |
+
background-attachment: fixed;
|
11 |
+
min-height: 100vh;
|
12 |
+
}
|
13 |
+
.upload-container {
|
14 |
+
border: 2px dashed #0066cc;
|
15 |
+
border-radius: 10px;
|
16 |
+
padding: 20px;
|
17 |
+
text-align: center;
|
18 |
+
margin: 20px 0;
|
19 |
+
background: rgba(255, 255, 255, 0.9);
|
20 |
+
backdrop-filter: blur(5px);
|
21 |
+
}
|
22 |
+
.factor-card {
|
23 |
+
background-color: rgba(255, 255, 255, 0.95);
|
24 |
+
padding: 20px;
|
25 |
+
border-radius: 10px;
|
26 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
27 |
+
margin: 10px 0;
|
28 |
+
backdrop-filter: blur(5px);
|
29 |
+
height: 100%;
|
30 |
+
}
|
31 |
+
.good-factor {
|
32 |
+
border-left: 4px solid #28a745;
|
33 |
+
}
|
34 |
+
.average-factor {
|
35 |
+
border-left: 4px solid #ffc107;
|
36 |
+
}
|
37 |
+
.bad-factor {
|
38 |
+
border-left: 4px solid #dc3545;
|
39 |
+
}
|
40 |
+
.header-container {
|
41 |
+
padding: 2rem 0;
|
42 |
+
margin-bottom: 2rem;
|
43 |
+
background: linear-gradient(90deg, #0066cc 0%, #0099ff 100%);
|
44 |
+
color: white;
|
45 |
+
border-radius: 10px;
|
46 |
+
text-align: center;
|
47 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
48 |
+
}
|
49 |
+
.detailed-factor {
|
50 |
+
padding: 15px;
|
51 |
+
border-radius: 8px;
|
52 |
+
margin: 10px 0;
|
53 |
+
background: rgba(255, 255, 255, 0.9);
|
54 |
+
border-left: 4px solid #666;
|
55 |
+
}
|
56 |
+
.detailed-factor.good {
|
57 |
+
border-left-color: #28a745;
|
58 |
+
background: rgba(40, 167, 69, 0.1);
|
59 |
+
}
|
60 |
+
.detailed-factor.average {
|
61 |
+
border-left-color: #ffc107;
|
62 |
+
background: rgba(255, 193, 7, 0.1);
|
63 |
+
}
|
64 |
+
.detailed-factor.bad {
|
65 |
+
border-left-color: #dc3545;
|
66 |
+
background: rgba(220, 53, 69, 0.1);
|
67 |
+
}
|
68 |
+
.comparison-table {
|
69 |
+
background: white;
|
70 |
+
border-radius: 10px;
|
71 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
72 |
+
margin: 20px 0;
|
73 |
+
}
|
74 |
+
</style>
|
75 |
+
""", unsafe_allow_html=True)
|
76 |
+
|
77 |
+
def show_factor_section(title, factors, color):
|
78 |
+
if factors:
|
79 |
+
st.markdown(f"""
|
80 |
+
<div class="factor-card {color}-factor">
|
81 |
+
<h3 style="color: #333;">{title}</h3>
|
82 |
+
<ul style="list-style-type: none; padding-left: 0;">
|
83 |
+
{"".join(f'<li style="margin: 10px 0; padding: 10px; background: rgba(248, 249, 250, 0.8); border-radius: 5px;">{factor}</li>' for factor in factors)}
|
84 |
+
</ul>
|
85 |
+
</div>
|
86 |
+
""", unsafe_allow_html=True)
|
87 |
+
|
88 |
+
def show_detailed_factors(good_factors, average_factors, bad_factors):
|
89 |
+
for factor in good_factors:
|
90 |
+
name, explanation = factor.split(':', 1)
|
91 |
+
st.markdown(f"""
|
92 |
+
<div class="detailed-factor good">
|
93 |
+
<strong>{name}</strong>
|
94 |
+
<p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
|
95 |
+
</div>
|
96 |
+
""", unsafe_allow_html=True)
|
97 |
+
|
98 |
+
for factor in average_factors:
|
99 |
+
name, explanation = factor.split(':', 1)
|
100 |
+
st.markdown(f"""
|
101 |
+
<div class="detailed-factor average">
|
102 |
+
<strong>{name}</strong>
|
103 |
+
<p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
|
104 |
+
</div>
|
105 |
+
""", unsafe_allow_html=True)
|
106 |
+
|
107 |
+
for factor in bad_factors:
|
108 |
+
name, explanation = factor.split(':', 1)
|
109 |
+
st.markdown(f"""
|
110 |
+
<div class="detailed-factor bad">
|
111 |
+
<strong>{name}</strong>
|
112 |
+
<p style="margin: 5px 0 0 0; color: #666;">{explanation}</p>
|
113 |
+
</div>
|
114 |
+
""", unsafe_allow_html=True)
|
115 |
+
|
116 |
+
def show_factor_summary(summary, verdict, sentiment_title):
|
117 |
+
if len(summary) > 0:
|
118 |
+
st.markdown(f"""
|
119 |
+
<div class="detailed-factor {verdict}">
|
120 |
+
<strong>{sentiment_title}</strong>
|
121 |
+
<p style="margin: 5px 0 0 0; color: #666;">{summary}</p>
|
122 |
+
</div>
|
123 |
+
""", unsafe_allow_html=True)
|
ui.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Rules for Policy Analyser
|
3 |
+
"""
|
4 |
+
|
5 |
+
# Imports
|
6 |
+
import os
|
7 |
+
import glob
|
8 |
+
import json
|
9 |
+
from datetime import datetime
|
10 |
+
import base64
|
11 |
+
|
12 |
+
import openai
|
13 |
+
import streamlit as st
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
from policy_analyser.analyse import analyse
|
17 |
+
|
18 |
+
st.set_page_config('Policy Analyser', '🧐', layout = 'wide')
|
19 |
+
|
20 |
+
# def call_openai(system_prompt, document, seed = 42):
|
21 |
+
# messages = [{'role' : 'system', 'content' : system_prompt},
|
22 |
+
# {'role' : 'user', 'content' : document}]
|
23 |
+
# response = openai.ChatCompletion.create(
|
24 |
+
# engine = 'AskoGPT4-1106',
|
25 |
+
# messages = messages,
|
26 |
+
# seed = seed,
|
27 |
+
# temperature = 0.3,
|
28 |
+
# api_key = 'a033560cc39647da989fcce2910f6e84',
|
29 |
+
# api_version = '2023-07-01-preview',
|
30 |
+
# api_type = 'azure',
|
31 |
+
# api_base = 'https://asko-v1.openai.azure.com/'
|
32 |
+
# )
|
33 |
+
# return response.choices[0].message.content
|
34 |
+
|
35 |
+
|
36 |
+
def displayPDF(file):
|
37 |
+
# Opening file from file path
|
38 |
+
if isinstance(file, str):
|
39 |
+
file_bytes = open(file, 'rb').read()
|
40 |
+
else:
|
41 |
+
file_bytes = file
|
42 |
+
# with open(file, "rb") as f:
|
43 |
+
base64_pdf = base64.b64encode(file_bytes).decode('utf-8')
|
44 |
+
|
45 |
+
# Embedding PDF in HTML
|
46 |
+
pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
|
47 |
+
|
48 |
+
# Displaying File
|
49 |
+
st.markdown(pdf_display, unsafe_allow_html=True)
|
50 |
+
|
51 |
+
def view_saved_results():
|
52 |
+
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
|
53 |
+
files = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')]
|
54 |
+
file = st.sidebar.selectbox('Select Result to view', options = files)
|
55 |
+
if file is not None:
|
56 |
+
file = os.path.join(dirpath, file)
|
57 |
+
pdf_path = file.replace('.analysis.json', '.pdf')
|
58 |
+
displayPDF(pdf_path)
|
59 |
+
analysis = json.load(open(file))
|
60 |
+
if len(analysis) > 0:
|
61 |
+
for stage in analysis:
|
62 |
+
if stage['stage'] == 'EXTRACTION':
|
63 |
+
st.sidebar.json(stage['response']['processed'])
|
64 |
+
if stage['stage'] == 'POST_PROCESS':
|
65 |
+
st.sidebar.json(stage['response'])
|
66 |
+
if stage['stage'] == 'ANALYSE':
|
67 |
+
df = pd.DataFrame.from_records(stage['response'])
|
68 |
+
for verdict in ['Good', 'Average', 'Bad']:
|
69 |
+
df_tmp = df.loc[df['verdict'] == verdict]
|
70 |
+
if len(df_tmp) > 0:
|
71 |
+
st.sidebar.markdown(f'**{verdict}**')
|
72 |
+
st.sidebar.table(df_tmp)
|
73 |
+
|
74 |
+
def run():
|
75 |
+
file = st.sidebar.file_uploader('Upload PDF')
|
76 |
+
if file is not None:
|
77 |
+
file_bytes = file.getvalue()
|
78 |
+
displayPDF(file_bytes)
|
79 |
+
analysis = analyse(file_bytes)
|
80 |
+
st.json(analysis)
|
81 |
+
for stage in analysis:
|
82 |
+
if stage['stage']['ANALYSE']:
|
83 |
+
df = pd.DataFrame(stage['response'])
|
84 |
+
for verdict in ['Good', 'Average', 'Bad']:
|
85 |
+
df_tmp = df.loc[df['verdict'] == verdict]
|
86 |
+
if len(df_tmp) > 0:
|
87 |
+
st.sidebar.markdown(f'**{verdict}**')
|
88 |
+
st.sidebar.table(df_tmp)
|
89 |
+
|
90 |
+
def validate_results():
|
91 |
+
dirpath = '/Users/sakshi.tantak/Downloads/Porting Documents/testing-data/sample/poc'
|
92 |
+
file = st.sidebar.selectbox('Select file to validation', options = [file for file in os.listdir(dirpath) if file.endswith('.analysis.json')])
|
93 |
+
if file is not None:
|
94 |
+
filepath = os.path.join(dirpath, file)
|
95 |
+
json_data = json.load(open(filepath))
|
96 |
+
if len(json_data) > 0:
|
97 |
+
for stage in json_data:
|
98 |
+
if stage['stage'] == 'EXTRACTION':
|
99 |
+
entities = stage['response']['processed']
|
100 |
+
for entity in entities:
|
101 |
+
entity.update(
|
102 |
+
{'entityValueQC' : entity['entityValue'], 'isRight' : False, 'QCRemarks' : ''}
|
103 |
+
)
|
104 |
+
df = pd.DataFrame.from_records(entities)
|
105 |
+
df = df[['isRight', 'entityName', 'entityValue', 'entityValueQC', 'QCRemarks']]
|
106 |
+
df = df.astype({'entityValueQC' : str})
|
107 |
+
df = st.data_editor(df)
|
108 |
+
displayPDF(filepath.replace('.analysis.json', '.pdf'))
|
109 |
+
print(df)
|
110 |
+
if st.button('Save?'):
|
111 |
+
df.to_csv(filepath.replace('.analysis.json', '.qc-entities.csv'), index = False)
|
112 |
+
|
113 |
+
|
114 |
+
def main():
|
115 |
+
view_results_page = st.Page(view_saved_results, title = 'View Save Results')
|
116 |
+
run_page = st.Page(run, title = 'Run your PDF')
|
117 |
+
qc = st.Page(validate_results, title = 'QC')
|
118 |
+
pages = st.navigation(
|
119 |
+
{
|
120 |
+
'View' : [view_results_page, qc],
|
121 |
+
'Run & Analyse' : [run_page]
|
122 |
+
}
|
123 |
+
)
|
124 |
+
pages.run()
|
125 |
+
|
126 |
+
if __name__ == '__main__':
|
127 |
+
main()
|
utils.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import json
|
3 |
+
import requests
|
4 |
+
from typing import Dict, List, Tuple
|
5 |
+
import streamlit as st
|
6 |
+
|
7 |
+
def mock_api_call(pdf_bytes: bytes) -> List:
|
8 |
+
"""
|
9 |
+
Simulates an API call for policy analysis.
|
10 |
+
In production, this would make a real API call.
|
11 |
+
"""
|
12 |
+
# Mock response based on the provided sample
|
13 |
+
return [{
|
14 |
+
"stage": "ANALYSE",
|
15 |
+
"response": [
|
16 |
+
{
|
17 |
+
"factor": "Room rent limit",
|
18 |
+
"verdict": "Bad",
|
19 |
+
"reason": "There is cap of 100 on room rent"
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"factor": "Deductible",
|
23 |
+
"verdict": "Good",
|
24 |
+
"reason": "No deductible"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"factor": "Copay",
|
28 |
+
"verdict": "Good",
|
29 |
+
"reason": "Copayment (0.0) < 5%"
|
30 |
+
}
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"stage" : "ANALYSIS_SUMMARY",
|
35 |
+
"response" : {
|
36 |
+
"Good" : "This was great!",
|
37 |
+
# "Average" : "This was okay",
|
38 |
+
"Bad" : "This was meh :/"
|
39 |
+
}
|
40 |
+
}
|
41 |
+
]
|
42 |
+
|
43 |
+
def parse_analysis_response(response: List) -> Tuple[List[str], List[str], List[str]]:
|
44 |
+
"""
|
45 |
+
Parses the API response and extracts categorized factors with their reasons.
|
46 |
+
Handles the verdict-based factor format.
|
47 |
+
"""
|
48 |
+
try:
|
49 |
+
# Find the analysis stage in the response list
|
50 |
+
analysis_item = next(
|
51 |
+
(item for item in response if item.get("stage") == "ANALYSE"),
|
52 |
+
None
|
53 |
+
)
|
54 |
+
|
55 |
+
if not analysis_item:
|
56 |
+
st.error("No analysis data found in the response")
|
57 |
+
return [], [], []
|
58 |
+
|
59 |
+
# Get the response list from analysis item
|
60 |
+
analysis_list = analysis_item.get("response", [])
|
61 |
+
|
62 |
+
if not analysis_list or not isinstance(analysis_list, list):
|
63 |
+
st.error("Invalid analysis response format")
|
64 |
+
return [], [], []
|
65 |
+
|
66 |
+
# Initialize categorized factors
|
67 |
+
good_factors = []
|
68 |
+
average_factors = []
|
69 |
+
bad_factors = []
|
70 |
+
|
71 |
+
# Categorize factors based on verdict
|
72 |
+
for item in analysis_list:
|
73 |
+
factor_text = f"{item.get('factor')}: {item.get('reason')}"
|
74 |
+
verdict = item.get('verdict', '').lower()
|
75 |
+
|
76 |
+
if verdict == 'good':
|
77 |
+
good_factors.append(factor_text)
|
78 |
+
elif verdict == 'average':
|
79 |
+
average_factors.append(factor_text)
|
80 |
+
elif verdict == 'bad':
|
81 |
+
bad_factors.append(factor_text)
|
82 |
+
|
83 |
+
return good_factors, average_factors, bad_factors
|
84 |
+
except Exception as e:
|
85 |
+
st.error(f"Error parsing analysis response: {str(e)}")
|
86 |
+
return [], [], []
|
87 |
+
|
88 |
+
def validate_pdf(pdf_bytes: bytes) -> bool:
|
89 |
+
"""
|
90 |
+
Validates the uploaded PDF file.
|
91 |
+
"""
|
92 |
+
if not pdf_bytes:
|
93 |
+
return False
|
94 |
+
|
95 |
+
# Check file signature for PDF (%PDF-)
|
96 |
+
return pdf_bytes.startswith(b'%PDF-')
|
97 |
+
|
98 |
+
def displayPDF(file):
|
99 |
+
# Opening file from file path
|
100 |
+
if isinstance(file, str):
|
101 |
+
file_bytes = open(file, 'rb').read()
|
102 |
+
else:
|
103 |
+
file_bytes = file
|
104 |
+
# with open(file, "rb") as f:
|
105 |
+
base64_pdf = base64.b64encode(file_bytes).decode('utf-8')
|
106 |
+
|
107 |
+
# Embedding PDF in HTML
|
108 |
+
pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
|
109 |
+
|
110 |
+
# Displaying File
|
111 |
+
st.markdown(pdf_display, unsafe_allow_html=True)
|