csv-generation-img

Sleeping

App Files Files Community

Nechba commited on Apr 8

Commit

dbf5064

verified ·

1 Parent(s): 30582c7

Update utils.py

Browse files

Files changed (1) hide show

utils.py +64 -1

utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import google.generativeai as genai
-genai.configure(api_key="AIzaSyAP85jSUKncrIGOAhm3Gvo-TYra_e1wmEA")
 import os
 import pandas as pd
 import io
@@ -32,6 +32,69 @@ def pdf_to_images(pdf_bytes: bytes) -> list[Image.Image]:
         images.append(img)
     return images
 def analyze_single_document(images: list, prompt: str) -> dict:
     """Analyze a single document and return results"""
     model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25')

 import google.generativeai as genai
+genai.configure(api_key="AIzaSyDxp4tYzBK7RB8y3jIIF4TpyPZgCQP8NTY")
 import os
 import pandas as pd
 import io
         images.append(img)
     return images
+def process_local_pdf(pdf_bytes: bytes):
+    """
+    Process a local PDF file with Gemini AI.
+    Args:
+        file_path: Path to the PDF file
+        prompt: The prompt template to use (should contain {page_num} if needed)
+        api_key: Your Google AI Studio API key
+    """
+    # Configure Gemini
+    prompt ="""Please analyze the provided images of the real estate document set and perform the following actions:
+            1.  *Identify Parties:* Determine and list Seller 1, Seller 2 (if applicable), Buyer 1, and Buyer 2.
+            2.  *Identify Missing Items:* Locate and list all instances of missing signatures and missing initials for all parties across all documents.
+            3.  *Identify Checked Boxes:* Locate and list all checkboxes that have been marked or checked.
+            4.  *Generate Secondary Questions:* For checkboxes that indicate significant waivers (e.g., home warranty, inspection rights, lead paint assessment), specific conditions (e.g., cash sale, contingency status), potential conflicts, or reference other documents, formulate a relevant 'Secondary Question' designed to prompt confirmation or clarification from the user/parties involved.
+            5.  *Check for Required Paperwork:* Based only on the checkboxes identified in step 3 that explicitly state or strongly imply a specific addendum or disclosure document should be attached (e.g., "Lead Based Paint Disclosure Addendum attached", "See Counter Offer Addendum", "Seller's Disclosure...Addendum attached", "Retainer Addendum attached", etc.), check if a document matching that description appears to be present within the provided image set. Note whether this implied paperwork is 'Found', 'Missing', or 'Potentially Missing/Ambiguous' within the provided images.
+            6.  *Identify Conflicts:* Specifically look for and note any directly contradictory information or conflicting checked boxes (like the conflicting inspection clauses found previously).
+            7.  *Provide Location:* For every identified item (missing signature/initial, checked box, required paperwork status, party identification, conflict), specify the approximate line number(s) or clear location on the page (e.g., Bottom Right Initials, Seller Signature Block).
+            8.  *Format Output:* Present all findings comprehensively in CSV format. The CSV columns should be:
+                *   Category (e.g., Parties, Missing Item, Checked Box, Required Paperwork, Conflict)
+                *   Location (Document Name/Page, e.g., Sale Contract Pg 2)
+                *   Image number (just make image number {} done)
+                *   Item Type (e.g., Seller Initials, Home Warranty Waiver, Lead Paint Addendum Check, Lead Paint Addendum Document)
+                *   Status (e.g., Identified, Missing, Checked, Found, Potentially Missing, Conflict Detected)
+                *   Details (Specifics like names, text of the checkbox, description of the issue or document status)
+                *   Secondary Question (if applicable) (The question generated in step 4)
+            Please apply this analysis to the entire set of documents provided.
+            """
+    # Convert to images
+    images = pdf_to_images(pdf_bytes)
+    # Process each page
+    combined_df = pd.DataFrame()
+    for i, img in enumerate(images):
+        try:
+            model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25')  # Updated model name
+            local_prompt = prompt.format(i+1)
+            # Send both the prompt and image to Gemini
+            response = model.generate_content([local_prompt, img])
+            # Extract CSV response
+            answer_csv = extract_csv_from_response(response)
+            answer_df = csv_to_dataframe(answer_csv)
+            # Combine DataFrames if needed
+            if not answer_df.empty:
+                combined_df = pd.concat([combined_df, answer_df], ignore_index=True)
+            print(f"Processed page {i+1}")
+            print("Response:")
+            print(answer_csv)
+            print("\n" + "="*50 + "\n")
+        except Exception as e:
+            print(f"Error processing page {i+1}: {str(e)}")
+    return combined_df
 def analyze_single_document(images: list, prompt: str) -> dict:
     """Analyze a single document and return results"""
     model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25')