MOSPI_analysis_tool / extract_table_from_image.py
akshansh36's picture
Upload 10 files
eef9e83 verified
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import os
import re
import json
from dotenv import load_dotenv
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
FLASH_API = os.getenv("FLASH_API")
PINECONE_API=os.getenv("PINECONE_API")
PINECONE_INDEX=os.getenv("PINECONE_INDEX")
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-002", temperature=0.2, max_tokens=None, google_api_key=FLASH_API)
system_prompt_text = f"""Please extract the table from the image and return the table data in JSON format, with each row represented as an object containing column headers as keys. Ensure that each cell's content corresponds accurately to its column header. If a cell is empty, Keep None as its value.
Go through the data and give a summary of the table, describing what the data is about in description field.
Go through each column and give a column summary telling what each column header means.
Analyze the data to suggest two columns which can be used to plot the best graph for this table.
If a table contains both hindi and english translations for header or cell then only give english translations.
Remember to give the response in correct JSON Format.
Expected output format : {{
"table_data": [
{{
"column_1": "Value 1-1",
"column_2": "Value 1-2",
"column_3": "Value 1-3"
}},
{{
"column_1": "Value 2-1",
"column_2": "Value 2-2",
"column_3": "Value 2-3"
}}
// Additional rows as needed
],
"description": "Table Description",
"column_summary":{{
"column_1" : "column description",
"column_2" : "column description",
"column_3" :"column description"
}},
"best_column1" : "Column 1 name",
"best_column2" : "Column 2 name"
}}
"""
def process_image_using_llm(image, page_number, max_retries=3):
for attempt in range(1, max_retries + 1):
try:
# Send the image and system prompt to the LLM
message = HumanMessage(
content=[
{"type": "text", "text": system_prompt_text},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}},
],
)
response = model.invoke([message])
# Clean up the response content
response_content = response.content.strip("```").replace("json", "").replace("\\n", "").strip()
print(response_content)
response_content = response_content.strip("```")
try:
# Attempt direct JSON parsing
data = json.loads(response_content)
# Extract table data and additional notes
table_data = data.get("table_data", [])
description = data.get("description", "").strip() if data.get("description") else ""
column_summary=data.get("column_summary",{})
best_col1=data.get("best_column1","").strip() if data.get("best_column1") else ""
best_col2=data.get("best_column2","").strip() if data.get("best_column2") else ""
# Verify that we have valid table data
has_table_data = bool(table_data)
return {
"page_number": page_number,
"table_data": table_data if has_table_data else None,
"description": description if description else None,
"column_summary": column_summary if column_summary else None,
"best_col1":best_col1 if best_col1 else None,
"best_col2":best_col2 if best_col2 else None,
"has_table_data": has_table_data
}
except json.JSONDecodeError as e:
print(f"JSON decode error on attempt {attempt} for page {page_number}: {e}")
if attempt == max_retries:
return {
"page_number": page_number,
"table_data": None,
"description": None,
"column_summary": None,
"best_col1": None,
"best_col2": None,
"has_table_data": False
}
# Handle any other exceptions without retrying
except Exception as e:
print(f"Outer exception for page {page_number}: {e}")
return {
"page_number": page_number,
"table_data": None,
"description": None,
"column_summary": None,
"best_col1": None,
"best_col2": None,
"has_table_data": False
}