Spaces:

pikaduck
/

policy-analyser

Sleeping

App Files Files Community

Sakshi commited on Feb 26

Commit

bef8e94

1 Parent(s): 7d4da57

arch lob agnostic

Browse files

Files changed (14) hide show

app.py +81 -34
policy_analyser/__init__.py +6 -9
policy_analyser/analyse.py +100 -186
policy_analyser/analyse_.py +233 -0
policy_analyser/data/{Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md → health.md} +0 -0
policy_analyser/llm.py +1 -1
policy_analyser/prompts/analysis.txt +3 -143
policy_analyser/prompts/auto/__init__.py +0 -0
policy_analyser/prompts/health/__init__.py +0 -0
policy_analyser/prompts/health/analysis_output_format.txt +24 -0
policy_analyser/prompts/{extraction.txt → health/extraction.txt} +0 -0
policy_analyser/prompts/health/rules.txt +114 -0
policy_analyser/prompts/{suggest.txt → health/suggest.txt} +0 -0
policy_analyser/prompts/life/__init__.py +0 -0

app.py CHANGED Viewed

@@ -1,15 +1,55 @@
 import os
 import re
 import streamlit as st
 from utils import validate_pdf
 from styles import apply_custom_styles
-from policy_analyser.analyse import analyse
 if 'GPT_KEY' not in os.environ or os.environ.get('GPT_KEY') in [None, '']:
     os.environ['GPT_KEY'] = st.secrets['GPT_KEY']
 def main():
     # Apply custom styles
     apply_custom_styles()
@@ -27,6 +67,11 @@ def main():
     st.markdown('<div class="upload-container">', unsafe_allow_html=True)
     uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
     print(uploaded_files)
     st.markdown('</div>', unsafe_allow_html=True)
     if uploaded_files and st.button('Analyse'):
@@ -51,7 +96,7 @@ def main():
             with st.spinner(f"Analyzing {uploaded_file.name}..."):
                 try:
                     # Make API call
-                    response = analyse(pdf_bytes, True)
                     analysis = next(
                         (item for item in response if item.get("stage") == "ANALYSE"), None
                     )['response']
@@ -63,7 +108,7 @@ def main():
                     # Store results
                     all_analyses.append({
                         'name': uploaded_file.name,
-                        'analysis' : re.sub(r'\<\/?(GOOD|AVERAGE|BAD|FINAL_VERDICT)\>', '', analysis),
                         'suggestion' : suggestion
                     })
@@ -75,43 +120,45 @@ def main():
             for idx, analysis in enumerate(all_analyses):
                 with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
                     with st.container():
-                        st.markdown(analysis['analysis'])
                     with st.container():
                         st.markdown('# Why Acko? 🚀')
                         st.markdown(analysis['suggestion'])
         # Detailed Comparison Tab
         with tab2:
-            if len(all_analyses) > 1:
-                # Create comparison matrix
-                factors_to_compare = set()
-                for analysis in all_analyses:
-                    factors_to_compare.update(
-                        [f.split(':')[0] for f in analysis['good_factors'] +
-                         analysis['average_factors'] + analysis['bad_factors']]
-                    )
-                # Create comparison table
-                st.markdown("### Policy Comparison Matrix")
-                comparison_data = []
-                for factor in sorted(factors_to_compare):
-                    row = {'Factor': factor}
-                    for idx, analysis in enumerate(all_analyses):
-                        policy_name = f"Policy {idx + 1}"
-                        verdict = 'Not Found'
-                        for category in ['good_factors', 'average_factors', 'bad_factors']:
-                            for item in analysis[category]:
-                                if item.split(':')[0] == factor:
-                                    verdict = category.split('_')[0].title()
-                                    break
-                        row[policy_name] = verdict
-                    comparison_data.append(row)
-                # Display comparison table
-                st.table(comparison_data)
-            else:
-                st.info("Upload multiple policies to see comparison")
     # Footer
     st.markdown("""

 import os
 import re
+import json
 import streamlit as st
+from streamviz import gauge
 from utils import validate_pdf
 from styles import apply_custom_styles
+from policy_analyser.analyse import Health
 if 'GPT_KEY' not in os.environ or os.environ.get('GPT_KEY') in [None, '']:
     os.environ['GPT_KEY'] = st.secrets['GPT_KEY']
+if 'health_analyser' not in st.session_state:
+    st.session_state.health_analyser = Health()
+def markdown_table_to_json(markdown):
+    lines = markdown.strip().split("\n")
+    # Extract headers
+    headers = [h.strip() for h in lines[0].split("|") if h.strip()]
+    # Extract rows
+    rows = []
+    for line in lines[2:]:  # Skip header and separator line
+        values = [v.strip() for v in line.split("|") if v.strip()]
+        row_dict = dict(zip(headers, values))
+        rows.append(row_dict)
+    return rows
+def visualise_pie_chart(analysis):
+    verdicts = {}
+    score = 0
+    total = 0
+    for verdict in ['GOOD', 'AVERAGE', 'BAD']:
+        table = analysis.split(f'<{verdict}>')[-1].split(f'</{verdict}>')[0]
+        table = markdown_table_to_json(table)
+        if len(table) > 0:
+            verdicts[verdict] = table
+            if verdict == 'GOOD':
+                score += 5 * len(table)
+            if verdict == 'AVERAGE':
+                score += 3 * len(table)
+            elif verdict == 'BAD':
+                score += len(table)
+            total += 5 * len(table)
+    gauge(gVal = total, gTitle = '', gMode = 'gauge+number',
+          grLow = total // 3,
+          grMid = 2 * (total // 3))
 def main():
     # Apply custom styles
     apply_custom_styles()
     st.markdown('<div class="upload-container">', unsafe_allow_html=True)
     uploaded_files = st.file_uploader("Choose policy PDF files", type="pdf", accept_multiple_files=True)
     print(uploaded_files)
+    lob = st.selectbox(
+        'Type of insurance',
+        options = ['Health', 'Life', 'Auto'],
+        index = 0
+    )
     st.markdown('</div>', unsafe_allow_html=True)
     if uploaded_files and st.button('Analyse'):
             with st.spinner(f"Analyzing {uploaded_file.name}..."):
                 try:
                     # Make API call
+                    response = st.session_state.health_analyser(pdf_bytes)
                     analysis = next(
                         (item for item in response if item.get("stage") == "ANALYSE"), None
                     )['response']
                     # Store results
                     all_analyses.append({
                         'name': uploaded_file.name,
+                        'analysis' : analysis,
                         'suggestion' : suggestion
                     })
             for idx, analysis in enumerate(all_analyses):
                 with st.expander(f"### Policy {idx + 1}: {analysis['name']}"):
                     with st.container():
+                        st.markdown(re.sub(r'\<\/?(GOOD|AVERAGE|BAD|FINAL_VERDICT)\>', '', analysis['analysis']))
                     with st.container():
                         st.markdown('# Why Acko? 🚀')
                         st.markdown(analysis['suggestion'])
+                        # visualise_pie_chart(analysis['analysis'])
         # Detailed Comparison Tab
         with tab2:
+            st.warning('Coming Soon')
+            # if len(all_analyses) > 1:
+            #     # Create comparison matrix
+            #     factors_to_compare = set()
+            #     for analysis in all_analyses:
+            #         factors_to_compare.update(
+            #             [f.split(':')[0] for f in analysis['good_factors'] +
+            #              analysis['average_factors'] + analysis['bad_factors']]
+            #         )
+            #     # Create comparison table
+            #     st.markdown("### Policy Comparison Matrix")
+            #     comparison_data = []
+            #     for factor in sorted(factors_to_compare):
+            #         row = {'Factor': factor}
+            #         for idx, analysis in enumerate(all_analyses):
+            #             policy_name = f"Policy {idx + 1}"
+            #             verdict = 'Not Found'
+            #             for category in ['good_factors', 'average_factors', 'bad_factors']:
+            #                 for item in analysis[category]:
+            #                     if item.split(':')[0] == factor:
+            #                         verdict = category.split('_')[0].title()
+            #                         break
+            #             row[policy_name] = verdict
+            #         comparison_data.append(row)
+            #     # Display comparison table
+            #     st.table(comparison_data)
+            # else:
+            #     st.info("Upload multiple policies to see comparison")
     # Footer
     st.markdown("""

policy_analyser/__init__.py CHANGED Viewed

@@ -32,12 +32,9 @@ GPT_KEY = os.environ.get('GPT_KEY', '')
 GPT_VERSION = '2024-12-01-preview'
 GPT_API_BASE = 'https://ai-ackods910341544474.openai.azure.com/'
-EXTRACTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'extraction.txt')).read()
-entities = json.load(open(os.path.join(DATA_DIR, 'policy_analyser_entities.json')))
-for entity in entities:
-    del entity['entityId']
-entities_str = '\n---\n'.join(['\n'.join([f'{k} : {v}' for k, v in entity.items()]) for entity in entities])
-EXTRACTION_PROMPT += entities_str
-ANALYSIS_PROMPT = open(os.path.join(PROMPTS_DIR, 'analysis.txt')).read().strip()
-SUGGESTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'suggest.txt')).read().strip()
-ACKO_POLICY = open(os.path.join(DATA_DIR, 'Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md')).read()

 GPT_VERSION = '2024-12-01-preview'
 GPT_API_BASE = 'https://ai-ackods910341544474.openai.azure.com/'
+# EXTRACTION_PROMPT = open(os.path.join(PROMPTS_DIR, 'extraction.txt')).read()
+# entities = json.load(open(os.path.join(DATA_DIR, 'policy_analyser_entities.json')))
+# for entity in entities:
+    # del entity['entityId']
+# entities_str = '\n---\n'.join(['\n'.join([f'{k} : {v}' for k, v in entity.items()]) for entity in entities])
+# EXTRACTION_PROMPT += entities_str

policy_analyser/analyse.py CHANGED Viewed

@@ -4,203 +4,116 @@
 """
 # Imports
 from time import time
 from datetime import datetime
-from policy_analyser import ACKO_POLICY, ANALYSIS_PROMPT, SUGGESTION_PROMPT
 from policy_analyser.ocr import PyMuPDF4LLMOCR
-from policy_analyser.extraction import extract
-from policy_analyser.rules import prepare_payload, rules
 from policy_analyser.llm import call_openai
-# OCR = AzureLayoutOCR()
-OCR = PyMuPDF4LLMOCR()
-def analyse(file_bytes, end2end = False):
-    print('OCR Started ...')
-    ocr_start = time()
-    if isinstance(file_bytes, str):
-        text = file_bytes
-    elif isinstance(file_bytes, (bytearray, bytes)):
-        text, _ = OCR(file_bytes)
-    ocr_end = time()
-    print(f'OCR done [{ocr_end - ocr_start}]')
-    if len(text) > 0:
-        if not end2end:
-            print('Extraction Started ...')
-            ext_start = time()
-            raw_response, entities = extract(text)
-            ext_end = time()
-            print(f'Extraction done [{ext_end - ext_start}]')
-            if len(entities) > 0:
-                print('Preparing payload for analysis ...')
-                payload = prepare_payload(entities)
-                print('Payload prepared for analysis')
-                print('Analysing ...')
-                analysis_start = time()
-                analysis = rules(payload)
-                analysis_end = time()
-                print(f'Analysed [{analysis_end - analysis_start}]')
-                print('Summarising ...')
-                summary = {}
-                summary_start = time()
-                for verdict in ['Good', 'Average', 'Bad']:
-                    descriptions = '\n'.join([factor['reason'] for factor in analysis if factor['verdict'] == verdict])
-                    if len(descriptions) > 0:
-                        prompt = f"""Given the following analysis on the {verdict} factors of a customer's policy that they have bought, generate a crisp and catchy summary of the factors for a customer. Try to make it factor-wise with bullet points
-    NOTE : THE POLICY WAS NOT SOLD BY US
-    analysis : {descriptions}
-    summary : """
-                        response = call_openai(prompt)
-                        print(response)
-                    else:
-                        response = ''
-                    summary[verdict] = response
-                summary_end = time()
-    #             print(f'Summarised [{summary_end - summary_start}]')
-    #             factors_str = ''
-    #             for verdict in ['Good', 'Average', 'Bad']:
-    #                 factors_str += verdict + ' Factors:'
-    #                 factors_str += '\n' + '\n'.join([f"{factor['factor']}: {factor['reason']}" for factor in analysis if factor['verdict'] == verdict])
-    #             print('Suggesting ...')
-    #             suggestion_start = time()
-    #             suggestion = call_openai(f"""Given the following main factors and their values of a customer's health insurance policy, use these factors to compare with given Acko's health policy and suggest to the customer how the Average and Bad factors maybe covered better by Acko's policy.
-    # Format response in less than 50 words and make it factor-wise. Try to format in points. Include emojis to make it catchy.
-    # Customer Poliocy Factors:
-    # {factors_str}
-    # Acko Policy : {ACKO_POLICY}
-    # Customer Suggestion : """)
-    #             suggestion_end = time()
-                # print(f'Suggested [{suggestion_end - suggestion_start}]')
-                response = [
-                    {
-                        'stage' : 'OCR',
-                        'response' : text,
-                        'time' : ocr_end - ocr_start
-                    },
-                    {
-                        'stage' : 'EXTRACTION',
-                        'response' : {
-                            'raw' : raw_response,
-                            'processed' : entities
-                        },
-                        'time' : ext_end - ext_start
-                    },
-                    {
-                        'stage' : 'POST_PROCESS',
-                        'response' : payload,
-                        'time' : 0
-                    },
-                    {
-                        'stage' : 'ANALYSE',
-                        'response' : analysis,
-                        'time' : analysis_end - analysis_start
-                    },
-                    {
-                        'stage' : 'ANALYSIS_SUMMARY',
-                        'response' : summary,
-                        'time' : summary_end - summary_start
-                    },
-                    # {
-                    #     'stage' : 'SUGGEST',
-                    #     'response' : suggestion,
-                    #     'time' : suggestion_end - suggestion_start
-                    # }
-                ]
                 return response
-            response = [
-                    {
-                        'stage' : 'OCR',
-                        'response' : text,
-                        'time' : 0
-                    },
-                    {
-                        'stage' : 'EXTRACTION',
-                        'response' : {
-                            'raw' : '',
-                            'processed' : []
-                        },
-                        'time' : 0
-                    },
-                    {
-                        'stage' : 'POST_PROCESS',
-                        'response' : {},
-                        'time' : 0
-                    },
-                    {
-                        'stage' : 'ANALYSE',
-                        'response' : [],
-                        'time' : 0
-                    },
-                    {
-                        'stage' : 'ANALYSIS_SUMMARY',
-                        'response' : {'Good' : '', 'Average' : '', 'Bad' : ''},
-                        'time' : 0
-                    },
-                    # {
-                    #     'stage' : 'SUGGEST',
-                    #     'response' : '',
-                    #     'time' : 0
-                    # }
-                ]
-            return response
-        else:
-            response = [
-                {
-                    'stage' : 'OCR',
-                    'response' : text,
-                    'time' : ocr_end - ocr_start
-                }
-            ]
-            try:
-                print('Analysing ...')
-                analysis_start = time()
-                raw_response = call_openai(ANALYSIS_PROMPT + 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates")
-                analysis_end = time()
-                print('Analysis : ', raw_response)
-                print(f'Analysed [{analysis_end - analysis_start}]')
-                if raw_response is not None:
-                    response.append(
-                        {
-                            'stage' : 'ANALYSE',
-                            'response' : raw_response,
-                            'time' : analysis_end - analysis_start
-                        }
-                    )
-                    print('Suggesting our policy ...')
-                    suggestion_start = time()
-                    suggestion = call_openai(SUGGESTION_PROMPT + "\nCustomer Policy Analysis : " + raw_response + "\nAcko's Policy : " + ACKO_POLICY)
-                    suggestion_end = time()
-                    print(f'Suggested [{suggestion_end - suggestion_start}]')
-                    if suggestion is not None:
-                        response.append({
-                            'stage' : 'SUGGEST',
-                            'response' : suggestion,
-                            'time' : suggestion_end - suggestion_start
-                            }
-                        )
-                        return response
-            except Exception as e:
-                print(e)
-                response.extend(
-                    [
-                        {
-                            'stage' : 'ANALYSE',
-                            'response' : '',
-                            'time' : 0
-                        },
-                        {
-                            'stage' : 'SUGGEST',
-                            'response' : '',
-                            'time' : 0
-                        }
-                    ]
-                )
-            return response
 if __name__ == '__main__':
     import os
@@ -208,6 +121,7 @@ if __name__ == '__main__':
     import sys
     from tqdm import tqdm
     filepaths = sys.argv[1:]
     for filepath in tqdm(filepaths):
         # if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
@@ -220,7 +134,7 @@ if __name__ == '__main__':
         elif filepath.endswith(('.txt', '.md')):
             file_bytes = open(filepath).read()
         end2end = True
-        analysis = analyse(file_bytes, True)
         # print(analysis)
         basepath = os.path.splitext(filepath)[0]
         if not end2end:

 """
 # Imports
+import os
 from time import time
 from datetime import datetime
+from policy_analyser import PROMPTS_DIR, DATA_DIR
 from policy_analyser.ocr import PyMuPDF4LLMOCR
 from policy_analyser.llm import call_openai
+class LOB:
+    def __init__(self, ocr_engine = 'open-source/pymupdf4llm'):
+        if ocr_engine == 'open-source/pymupdf4llm':
+            self.engine = PyMuPDF4LLMOCR()
+        self.file_type = 'pdf'
+        with open(os.path.join(PROMPTS_DIR, 'analysis.txt'), 'r') as f:
+            self.analysis_prompt = f.read()
+    def __call__(self, file_bytes):
+        response = [
+            {
+                'stage' : 'OCR',
+                'response' : '',
+                'time' : 0
+            },
+            {
+                'stage' : 'ANALYSE',
+                'response' : '',
+                'time' : 0
+            },
+            {
+                'stage' : 'SUGGEST',
+                'response' : '',
+                'time' : 0
+            }
+        ]
+        try:
+            print('OCR Started ...')
+            ocr_start = time()
+            if isinstance(file_bytes, str):
+                text = file_bytes
+            elif isinstance(file_bytes, (bytearray, bytes)):
+                text, _ = self.engine(file_bytes)
+            ocr_end = time()
+            print(f'OCR done [{ocr_end - ocr_start}]')
+            if len(text) > 0:
+                response[0].update({'response' : text, 'time' : ocr_end - ocr_start})
+                try:
+                    print('Analysing ...')
+                    analysis_start = time()
+                    raw_response = self._analyse(text = text)
+                    analysis_end = time()
+                    print('Analysis : ', raw_response)
+                    print(f'Analysed [{analysis_end - analysis_start}]')
+                    if raw_response is not None and len(raw_response) > 0:
+                        response[1].update({'response' : raw_response, 'time' : analysis_end - analysis_start})
+                        try:
+                            print('Suggesting our policy ...')
+                            suggestion_start = time()
+                            suggestion = self._suggest(analysis = raw_response)
+                            suggestion_end = time()
+                            print(f'Suggested [{suggestion_end - suggestion_start}]')
+                            if suggestion is not None and len(suggestion) > 0:
+                                response[2].update({'response' : suggestion, 'time' : suggestion_end - suggestion_start})
+                        except Exception as sugg_e:
+                            print(f'Exception while suggesting : {sugg_e}')
+                except Exception as analysis_e:
+                    print(f'Exception while analysing : {analysis_e}')
+        except Exception as ocr_e:
+            print(f'Exception while OCR : {ocr_e}')
+        return response
+    def _analyse(self, **kwargs):
+        raise NotImplemented
+    def _suggest(self, **kwargs):
+        raise NotImplemented
+class Health(LOB):
+    def __init__(self, ocr_engine = 'open-source/pymupdf4llm'):
+        super().__init__(ocr_engine)
+        with open(os.path.join(PROMPTS_DIR, 'health', 'analysis_output_format.txt'), 'r') as f:
+            self.analysis_output_format = f.read()
+        with open(os.path.join(PROMPTS_DIR, 'health', 'rules.txt'), 'r') as f:
+            self.rules = f.read()
+        with open(os.path.join(PROMPTS_DIR, 'health', 'suggest.txt'), 'r') as f:
+            self.suggest_prompt = f.read()
+        with open(os.path.join(DATA_DIR, 'health.md'), 'r') as f:
+            self.acko_policy = f.read()
+    def _analyse(self, **kwargs):
+        text = kwargs.get('text')
+        if len(text) > 0:
+            prompt = self.analysis_prompt.replace('{{lob}}', 'health').replace('{{rules}}', self.rules).replace('{{output_format}}', self.analysis_output_format)
+            prompt += 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates"
+            response = call_openai(prompt)
+            if len(response) > 0:
                 return response
+        return ''
+    def _suggest(self, **kwargs):
+        analysis = kwargs.get('analysis')
+        if len(analysis) > 0:
+            prompt = self.suggest_prompt + "\nCustomer Policy Analysis : " + analysis + "\nAcko's Policy : " + self.acko_policy
+            response = call_openai(prompt)
+            if len(response) > 0:
+                return response
+        return ''
+    def __call__(self, file_bytes):
+        return super().__call__(file_bytes)
 if __name__ == '__main__':
     import os
     import sys
     from tqdm import tqdm
     filepaths = sys.argv[1:]
+    health = Health()
     for filepath in tqdm(filepaths):
         # if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
         elif filepath.endswith(('.txt', '.md')):
             file_bytes = open(filepath).read()
         end2end = True
+        analysis = health(file_bytes)
         # print(analysis)
         basepath = os.path.splitext(filepath)[0]
         if not end2end:

policy_analyser/analyse_.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+    Run analysis
+    @author : Sakshi Tantak
+"""
+# Imports
+from time import time
+from datetime import datetime
+from policy_analyser import ACKO_POLICY, ANALYSIS_PROMPT, SUGGESTION_PROMPT
+from policy_analyser.ocr import PyMuPDF4LLMOCR
+from policy_analyser.extraction import extract
+from policy_analyser.rules import prepare_payload, rules
+from policy_analyser.llm import call_openai
+# OCR = AzureLayoutOCR()
+OCR = PyMuPDF4LLMOCR()
+def analyse(file_bytes, end2end = False):
+    print('OCR Started ...')
+    ocr_start = time()
+    if isinstance(file_bytes, str):
+        text = file_bytes
+    elif isinstance(file_bytes, (bytearray, bytes)):
+        text, _ = OCR(file_bytes)
+    ocr_end = time()
+    print(f'OCR done [{ocr_end - ocr_start}]')
+    if len(text) > 0:
+        if not end2end:
+            print('Extraction Started ...')
+            ext_start = time()
+            raw_response, entities = extract(text)
+            ext_end = time()
+            print(f'Extraction done [{ext_end - ext_start}]')
+            if len(entities) > 0:
+                print('Preparing payload for analysis ...')
+                payload = prepare_payload(entities)
+                print('Payload prepared for analysis')
+                print('Analysing ...')
+                analysis_start = time()
+                analysis = rules(payload)
+                analysis_end = time()
+                print(f'Analysed [{analysis_end - analysis_start}]')
+                print('Summarising ...')
+                summary = {}
+                summary_start = time()
+                for verdict in ['Good', 'Average', 'Bad']:
+                    descriptions = '\n'.join([factor['reason'] for factor in analysis if factor['verdict'] == verdict])
+                    if len(descriptions) > 0:
+                        prompt = f"""Given the following analysis on the {verdict} factors of a customer's policy that they have bought, generate a crisp and catchy summary of the factors for a customer. Try to make it factor-wise with bullet points
+    NOTE : THE POLICY WAS NOT SOLD BY US
+    analysis : {descriptions}
+    summary : """
+                        response = call_openai(prompt)
+                        print(response)
+                    else:
+                        response = ''
+                    summary[verdict] = response
+                summary_end = time()
+    #             print(f'Summarised [{summary_end - summary_start}]')
+    #             factors_str = ''
+    #             for verdict in ['Good', 'Average', 'Bad']:
+    #                 factors_str += verdict + ' Factors:'
+    #                 factors_str += '\n' + '\n'.join([f"{factor['factor']}: {factor['reason']}" for factor in analysis if factor['verdict'] == verdict])
+    #             print('Suggesting ...')
+    #             suggestion_start = time()
+    #             suggestion = call_openai(f"""Given the following main factors and their values of a customer's health insurance policy, use these factors to compare with given Acko's health policy and suggest to the customer how the Average and Bad factors maybe covered better by Acko's policy.
+    # Format response in less than 50 words and make it factor-wise. Try to format in points. Include emojis to make it catchy.
+    # Customer Poliocy Factors:
+    # {factors_str}
+    # Acko Policy : {ACKO_POLICY}
+    # Customer Suggestion : """)
+    #             suggestion_end = time()
+                # print(f'Suggested [{suggestion_end - suggestion_start}]')
+                response = [
+                    {
+                        'stage' : 'OCR',
+                        'response' : text,
+                        'time' : ocr_end - ocr_start
+                    },
+                    {
+                        'stage' : 'EXTRACTION',
+                        'response' : {
+                            'raw' : raw_response,
+                            'processed' : entities
+                        },
+                        'time' : ext_end - ext_start
+                    },
+                    {
+                        'stage' : 'POST_PROCESS',
+                        'response' : payload,
+                        'time' : 0
+                    },
+                    {
+                        'stage' : 'ANALYSE',
+                        'response' : analysis,
+                        'time' : analysis_end - analysis_start
+                    },
+                    {
+                        'stage' : 'ANALYSIS_SUMMARY',
+                        'response' : summary,
+                        'time' : summary_end - summary_start
+                    },
+                    # {
+                    #     'stage' : 'SUGGEST',
+                    #     'response' : suggestion,
+                    #     'time' : suggestion_end - suggestion_start
+                    # }
+                ]
+                return response
+            response = [
+                    {
+                        'stage' : 'OCR',
+                        'response' : text,
+                        'time' : 0
+                    },
+                    {
+                        'stage' : 'EXTRACTION',
+                        'response' : {
+                            'raw' : '',
+                            'processed' : []
+                        },
+                        'time' : 0
+                    },
+                    {
+                        'stage' : 'POST_PROCESS',
+                        'response' : {},
+                        'time' : 0
+                    },
+                    {
+                        'stage' : 'ANALYSE',
+                        'response' : [],
+                        'time' : 0
+                    },
+                    {
+                        'stage' : 'ANALYSIS_SUMMARY',
+                        'response' : {'Good' : '', 'Average' : '', 'Bad' : ''},
+                        'time' : 0
+                    },
+                    # {
+                    #     'stage' : 'SUGGEST',
+                    #     'response' : '',
+                    #     'time' : 0
+                    # }
+                ]
+            return response
+        else:
+            response = [
+                {
+                    'stage' : 'OCR',
+                    'response' : text,
+                    'time' : ocr_end - ocr_start
+                }
+            ]
+            try:
+                print('Analysing ...')
+                analysis_start = time()
+                raw_response = call_openai(ANALYSIS_PROMPT + 'Policy : ' + text + f"\n\nConsider today's date as {datetime.today().day}/{datetime.today().month}/{datetime.today().year} for your analysis on waiting periods and dates")
+                analysis_end = time()
+                print('Analysis : ', raw_response)
+                print(f'Analysed [{analysis_end - analysis_start}]')
+                if raw_response is not None:
+                    response.append(
+                        {
+                            'stage' : 'ANALYSE',
+                            'response' : raw_response,
+                            'time' : analysis_end - analysis_start
+                        }
+                    )
+                    print('Suggesting our policy ...')
+                    suggestion_start = time()
+                    suggestion = call_openai(SUGGESTION_PROMPT + "\nCustomer Policy Analysis : " + raw_response + "\nAcko's Policy : " + ACKO_POLICY)
+                    suggestion_end = time()
+                    print(f'Suggested [{suggestion_end - suggestion_start}]')
+                    if suggestion is not None:
+                        response.append({
+                            'stage' : 'SUGGEST',
+                            'response' : suggestion,
+                            'time' : suggestion_end - suggestion_start
+                            }
+                        )
+                        return response
+            except Exception as e:
+                print(e)
+                response.extend(
+                    [
+                        {
+                            'stage' : 'ANALYSE',
+                            'response' : '',
+                            'time' : 0
+                        },
+                        {
+                            'stage' : 'SUGGEST',
+                            'response' : '',
+                            'time' : 0
+                        }
+                    ]
+                )
+            return response
+if __name__ == '__main__':
+    import os
+    import json
+    import sys
+    from tqdm import tqdm
+    filepaths = sys.argv[1:]
+    for filepath in tqdm(filepaths):
+        # if os.path.isfile(filepath.replace('.pdf', '.analysis.json')):
+            # continue
+        if '.analysis' in filepath or '.e2e-analysis' in filepath:
+            continue
+        print(filepath)
+        if filepath.endswith('.pdf'):
+            file_bytes = open(filepath, 'rb').read()
+        elif filepath.endswith(('.txt', '.md')):
+            file_bytes = open(filepath).read()
+        end2end = True
+        analysis = analyse(file_bytes, True)
+        # print(analysis)
+        basepath = os.path.splitext(filepath)[0]
+        if not end2end:
+            with open(os.path.splitext(filepath)[0] + '.analysis.json', 'w') as f:
+                json.dump(analysis, f, indent = 4)
+        else:
+            with open(os.path.splitext(filepath)[0] + '.e2e-analysis.json', 'w') as f:
+                json.dump(analysis, f, indent = 4)
+            with open(os.path.splitext(filepath)[0] + '.e2e-analysis.md', 'w') as f:
+                f.write(analysis[1]['response'])

policy_analyser/data/{Policy_Wordings_Acko_Personal_Health_Policy_Applicable_for_the_policies_sold_post_1_10_2024_64ea02eb51_ab3c8eefa2.md → health.md} RENAMED Viewed

File without changes

policy_analyser/llm.py CHANGED Viewed

@@ -25,5 +25,5 @@ def call_openai(system_prompt, seed = 42):
         # response_format = response_format,
         reasoning_effort = 'low'
     )
     return response.choices[0].message.content

         # response_format = response_format,
         reasoning_effort = 'low'
     )
+    print('LLM response : ', response)
     return response.choices[0].message.content

policy_analyser/prompts/analysis.txt CHANGED Viewed

@@ -1,154 +1,14 @@
-Given the markdown content of a customer's health insurance policy, analyse the insurance policy for the customer by applying given rules for specific factors of the policy.
 Apply the following rules enclosed in triple backticks on the policy to analyse it.
 Make sure you are consider values for analysis factors on basis of customer's selected insurance plan when multiple plans are described in the policy terms.
 Make sure all factors appear in one of Good, Average or Bad only. No factor should be repeated in more than 1 verdict table.
 Note : Top cities = [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]
 ```
-IF Adults == 1:
-    IF Is_Top_City:
-        IF Sum_Insured >= 2500000:
-            Verdict = "Good"
-        ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
-            Verdict = "Average"
-        ELSE:
-            Verdict = "Bad"
-    ELSE:
-        IF Sum_Insured >= 1000000:
-            Verdict = "Good"
-        ELSE IF Sum_Insured >= 500000 AND Sum_Insured < 1000000:
-            Verdict = "Average"
-        ELSE:
-            Verdict = "Bad"
-IF Adults >= 2:
-    IF Children == 0:
-        IF Is_Top_City:
-            IF Sum_Insured >= 5000000:
-                Verdict = "Good"
-            ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
-                Verdict = "Average"
-            ELSE:
-                Verdict = "Bad"
-        ELSE:
-            IF Sum_Insured >= 2500000:
-                Verdict = "Good"
-            ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
-                Verdict = "Average"
-            ELSE:
-                Verdict = "Bad"
-    IF Children >= 1:
-        IF Children > 1 OR Is_Top_City:
-            IF Sum_Insured >= 10000000:
-                Verdict = "Good"
-            ELSE IF Sum_Insured >= 5000000 AND Sum_Insured < 10000000:
-                Verdict = "Average"
-            ELSE:
-                Verdict = "Bad"
-        ELSE:
-            IF Sum_Insured >= 5000000:
-                Verdict = "Good"
-            ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
-                Verdict = "Average"
-            ELSE:
-                Verdict = "Bad"
-# Room Rent Limit
-IF Room_Rent_Limit > 0:
-    Verdict = "Bad"
-ELSE:
-    Verdict = "Good"
-# Deductibles
-IF Deductible > 0:
-    Verdict = "Bad"
-ELSE:
-    Verdict = "Good"
-# Sublimits
-IF Sublimits == EMPTY:
-    Verdict = "Good"
-ELSE:
-    Verdict = "Bad"
-# Copayment
-IF Copay <= 5:
-    Verdict = "Good"
-ELSE IF Copay > 5 AND Copay <= 10:
-    Verdict = "Average"
-ELSE:
-    Verdict = "Bad"
-# Pre-existing Diseases (PED) Waiting Period
-IF PED_Waiting_Period > 0:
-    IF Policy_Age > PED_Waiting_Period:
-        Verdict = "Good"
-    ELSE:
-        Verdict = "Bad"
-ELSE:
-    Verdict = "Good"
-# 30-Day Waiting Period
-IF Thirty_Day_Waiting_Period:
-    IF Policy_Age > 1:
-        Verdict = "Good"
-    ELSE:
-        Verdict = "Bad"
-ELSE:
-    Verdict = "Good"
-# Specific Illness Waiting Period
-IF Specific_Illness_Waiting_Period > 0:
-    IF Policy_Age > Specific_Illness_Waiting_Period:
-        Verdict = "Good"
-    ELSE:
-        Verdict = "Bad"
-ELSE:
-    Verdict = "Good"
-# Maternity Benefits
-IF Maternity_Benefits:
-    Verdict = "Good"
-    IF Maternity_Waiting_Period > 0:
-        IF Policy_Age > Maternity_Waiting_Period:
-            Verdict = "Good"
-        ELSE:
-            Verdict = "Bad"
-    ELSE:
-        Verdict = "Good"
-ELSE:
-    Verdict = "Bad"
 ```
 Format your response in the following way, to present analysis to customer. Don't keep a table if there are no factors in it.
 Use appropriate language and emojis to portray analysis and verdicts to the customer. Generate short and crisp verdicts and analysis. Be discrete about rules, do not expose rules to customer but use them to explain reasoning and analysis:
-<CUSTOMER_RESPONSE>
-    # Our Analysis of your policy [Name of policy] by [Name of insurance company]
-    <GOOD>
-    ## Good Factors
-    | Factor | Your policy | Our Analysis |
-    | --- | --- | --- |
-    | Sum Insured | Value of sum insured in the policy | Analysis of why sum insured is good based on the given rules |
-    </GOOD>
-    <AVERAGE>
-    ## Average Factors
-    | Factor | Your policy | Our Analysis |
-    | --- | --- | --- |
-    | Copay | Value of copayment in the policy | Analysis of why copay is average based on the given rules |
-    </AVERAGE>
-    <BAD>
-    ## Bad Factors
-    | Factor | Your policy | Our Analysis |
-    | --- | --- | --- |
-    | Deductible | Value of deductible in the policy | Analysis of why deductible is bad based on the given rules |
-    </BAD>
-    <FINAL_VERDICT>
-    Final and short point-wise verdict on the analysis
-    </FINAL_VERDICT>
-</CUSTOMER_RESPONSE>

+Given the markdown content of a customer's {{lob}} insurance policy, analyse the insurance policy for the customer by applying given rules for specific factors of the policy.
 Apply the following rules enclosed in triple backticks on the policy to analyse it.
 Make sure you are consider values for analysis factors on basis of customer's selected insurance plan when multiple plans are described in the policy terms.
 Make sure all factors appear in one of Good, Average or Bad only. No factor should be repeated in more than 1 verdict table.
 Note : Top cities = [Mumbai, Delhi, Bangalore, Chennai, Hyderabad, Gurgaon, Pune]
 ```
+{{rules}}
 ```
 Format your response in the following way, to present analysis to customer. Don't keep a table if there are no factors in it.
 Use appropriate language and emojis to portray analysis and verdicts to the customer. Generate short and crisp verdicts and analysis. Be discrete about rules, do not expose rules to customer but use them to explain reasoning and analysis:
+{{output_format}}

policy_analyser/prompts/auto/__init__.py ADDED Viewed

File without changes

policy_analyser/prompts/health/__init__.py ADDED Viewed

File without changes

policy_analyser/prompts/health/analysis_output_format.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+<CUSTOMER_RESPONSE>
+    # Our Analysis of your policy [Name of policy] by [Name of insurance company]
+    <GOOD>
+    ## Good Factors
+    | Factor | Your policy | Our Analysis |
+    | --- | --- | --- |
+    | Sum Insured | Value of sum insured in the policy | Analysis of why sum insured is good based on the given rules |
+    </GOOD>
+    <AVERAGE>
+    ## Average Factors
+    | Factor | Your policy | Our Analysis |
+    | --- | --- | --- |
+    | Copay | Value of copayment in the policy | Analysis of why copay is average based on the given rules |
+    </AVERAGE>
+    <BAD>
+    ## Bad Factors
+    | Factor | Your policy | Our Analysis |
+    | --- | --- | --- |
+    | Deductible | Value of deductible in the policy | Analysis of why deductible is bad based on the given rules |
+    </BAD>
+</CUSTOMER_RESPONSE>

policy_analyser/prompts/{extraction.txt → health/extraction.txt} RENAMED Viewed

File without changes

policy_analyser/prompts/health/rules.txt ADDED Viewed

	@@ -0,0 +1,114 @@

+IF Adults == 1:
+    IF Is_Top_City:
+        IF Sum_Insured >= 2500000:
+            Verdict = "Good"
+        ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
+            Verdict = "Average"
+        ELSE:
+            Verdict = "Bad"
+    ELSE:
+        IF Sum_Insured >= 1000000:
+            Verdict = "Good"
+        ELSE IF Sum_Insured >= 500000 AND Sum_Insured < 1000000:
+            Verdict = "Average"
+        ELSE:
+            Verdict = "Bad"
+IF Adults >= 2:
+    IF Children == 0:
+        IF Is_Top_City:
+            IF Sum_Insured >= 5000000:
+                Verdict = "Good"
+            ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
+                Verdict = "Average"
+            ELSE:
+                Verdict = "Bad"
+        ELSE:
+            IF Sum_Insured >= 2500000:
+                Verdict = "Good"
+            ELSE IF Sum_Insured >= 1000000 AND Sum_Insured < 2500000:
+                Verdict = "Average"
+            ELSE:
+                Verdict = "Bad"
+    IF Children >= 1:
+        IF Children > 1 OR Is_Top_City:
+            IF Sum_Insured >= 10000000:
+                Verdict = "Good"
+            ELSE IF Sum_Insured >= 5000000 AND Sum_Insured < 10000000:
+                Verdict = "Average"
+            ELSE:
+                Verdict = "Bad"
+        ELSE:
+            IF Sum_Insured >= 5000000:
+                Verdict = "Good"
+            ELSE IF Sum_Insured >= 2500000 AND Sum_Insured < 5000000:
+                Verdict = "Average"
+            ELSE:
+                Verdict = "Bad"
+# Room Rent Limit
+IF Room_Rent_Limit > 0:
+    Verdict = "Bad"
+ELSE:
+    Verdict = "Good"
+# Deductibles
+IF Deductible > 0:
+    Verdict = "Bad"
+ELSE:
+    Verdict = "Good"
+# Sublimits
+IF Sublimits == EMPTY:
+    Verdict = "Good"
+ELSE:
+    Verdict = "Bad"
+# Copayment
+IF Copay <= 5:
+    Verdict = "Good"
+ELSE IF Copay > 5 AND Copay <= 10:
+    Verdict = "Average"
+ELSE:
+    Verdict = "Bad"
+# Pre-existing Diseases (PED) Waiting Period
+IF PED_Waiting_Period > 0:
+    IF Policy_Age > PED_Waiting_Period:
+        Verdict = "Good"
+    ELSE:
+        Verdict = "Bad"
+ELSE:
+    Verdict = "Good"
+# 30-Day Waiting Period
+IF Thirty_Day_Waiting_Period:
+    IF Policy_Age > 1:
+        Verdict = "Good"
+    ELSE:
+        Verdict = "Bad"
+ELSE:
+    Verdict = "Good"
+# Specific Illness Waiting Period
+IF Specific_Illness_Waiting_Period > 0:
+    IF Policy_Age > Specific_Illness_Waiting_Period:
+        Verdict = "Good"
+    ELSE:
+        Verdict = "Bad"
+ELSE:
+    Verdict = "Good"
+# Maternity Benefits
+IF Maternity_Benefits:
+    Verdict = "Good"
+    IF Maternity_Waiting_Period > 0:
+        IF Policy_Age > Maternity_Waiting_Period:
+            Verdict = "Good"
+        ELSE:
+            Verdict = "Bad"
+    ELSE:
+        Verdict = "Good"
+ELSE:
+    Verdict = "Bad"

policy_analyser/prompts/{suggest.txt → health/suggest.txt} RENAMED Viewed

File without changes

policy_analyser/prompts/life/__init__.py ADDED Viewed

File without changes