File size: 5,017 Bytes
eef9e83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import os
import re
import json

from dotenv import load_dotenv


load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
FLASH_API = os.getenv("FLASH_API")
PINECONE_API=os.getenv("PINECONE_API")
PINECONE_INDEX=os.getenv("PINECONE_INDEX")

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-002", temperature=0.2, max_tokens=None, google_api_key=FLASH_API)
system_prompt_text = f"""Please extract the table from the image and return the table data in JSON format, with each row represented as an object containing column headers as keys. Ensure that each cell's content corresponds accurately to its column header. If a cell is empty, Keep None as its value.

Go through the data and give a summary of the table, describing what the data is about in description field.

Go through each column and give a column summary telling what each column header means.

Analyze the data to suggest two columns which can be used to plot the best graph for this table.

If a table contains both hindi and english translations for header or cell then only give english translations. 

Remember to give the response in correct JSON Format.



Expected output format : {{

    "table_data": [

        {{

            "column_1": "Value 1-1",

            "column_2": "Value 1-2",

            "column_3": "Value 1-3"

        }},

        {{

            "column_1": "Value 2-1",

            "column_2": "Value 2-2",

            "column_3": "Value 2-3"

        }}

        // Additional rows as needed

    ],

    "description": "Table Description",

    "column_summary":{{

     "column_1" : "column description",

     "column_2" : "column description",

     "column_3" :"column description"

    }},

    "best_column1" : "Column 1 name",

    "best_column2" : "Column 2 name"

    

    

}}



"""


def process_image_using_llm(image, page_number, max_retries=3):
    for attempt in range(1, max_retries + 1):
        try:
            # Send the image and system prompt to the LLM
            message = HumanMessage(
                content=[
                    {"type": "text", "text": system_prompt_text},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}},
                ],
            )
            response = model.invoke([message])

            # Clean up the response content
            response_content = response.content.strip("```").replace("json", "").replace("\\n", "").strip()
            print(response_content)
            response_content = response_content.strip("```")

            try:
                # Attempt direct JSON parsing
                data = json.loads(response_content)

                # Extract table data and additional notes
                table_data = data.get("table_data", [])
                description = data.get("description", "").strip() if data.get("description") else ""
                column_summary=data.get("column_summary",{})
                best_col1=data.get("best_column1","").strip() if data.get("best_column1") else ""
                best_col2=data.get("best_column2","").strip() if data.get("best_column2") else ""


                # Verify that we have valid table data
                has_table_data = bool(table_data)

                return {
                    "page_number": page_number,
                    "table_data": table_data if has_table_data else None,
                    "description": description if description else None,
                    "column_summary": column_summary if column_summary else None,
                    "best_col1":best_col1 if best_col1 else None,
                    "best_col2":best_col2 if best_col2 else None,
                    "has_table_data": has_table_data
                }
            except json.JSONDecodeError as e:
                print(f"JSON decode error on attempt {attempt} for page {page_number}: {e}")
                if attempt == max_retries:
                    return {
                        "page_number": page_number,
                        "table_data": None,
                        "description": None,
                        "column_summary": None,
                        "best_col1": None,
                        "best_col2": None,
                        "has_table_data": False
                    }

        # Handle any other exceptions without retrying
        except Exception as e:
            print(f"Outer exception for page {page_number}: {e}")
            return {
                "page_number": page_number,
                "table_data": None,
                "description": None,
                "column_summary": None,
                "best_col1": None,
                "best_col2": None,
                "has_table_data": False
            }