|
import gradio as gr |
|
import requests |
|
import base64 |
|
import os |
|
import json |
|
import mimetypes |
|
|
|
|
|
OPENROUTER_API_KEY = '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' |
|
IMAGE_MODEL = "opengvlab/internvl3-14b:free" |
|
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" |
|
|
|
|
|
current_batch = [] |
|
|
|
|
|
|
|
def generate_extraction_prompt(doc_type_provided_by_user): |
|
prompt = f"""You are an advanced OCR and information extraction AI. |
|
The user has provided an image and identified it as a '{doc_type_provided_by_user}'. |
|
Your task is to meticulously analyze this image and extract all relevant information. |
|
|
|
Output Format Instructions: |
|
Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON. |
|
The JSON object should have the following top-level keys: |
|
- "document_type_provided": (string) The type provided by the user: "{doc_type_provided_by_user}". |
|
- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person"). |
|
- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples: |
|
- For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code". |
|
- For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type". |
|
- For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]"). |
|
- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present: |
|
- "raw_mrz_lines": (array of strings) Each line of the MRZ. |
|
- "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number"). |
|
If no MRZ, this field should be null. |
|
- "multilingual_info": (array of objects or null) For any text segments not in English: |
|
- Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}} |
|
If no non-English text, this field can be null or an empty array. |
|
- "full_text_ocr": (string) Concatenation of all text found on the document. |
|
|
|
Extraction Guidelines: |
|
1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation. |
|
2. Extract all visible text, including small print, stamps, and handwritten annotations if legible. |
|
3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous. |
|
4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal. |
|
5. If the document is multi-page and only one page is provided, note this if apparent. |
|
|
|
Ensure the entire output strictly adheres to the JSON format. |
|
""" |
|
return prompt |
|
|
|
def process_single_image_with_openrouter(image_path, doc_type): |
|
if not OPENROUTER_API_KEY: |
|
return {"error": "OpenRouter API key not set.", "document_type_provided": doc_type} |
|
try: |
|
with open(image_path, "rb") as f: |
|
encoded_image_bytes = f.read() |
|
encoded_image_string = base64.b64encode(encoded_image_bytes).decode("utf-8") |
|
mime_type, _ = mimetypes.guess_type(image_path) |
|
if not mime_type: |
|
ext = os.path.splitext(image_path)[1].lower() |
|
if ext == ".png": mime_type = "image/png" |
|
elif ext in [".jpg", ".jpeg"]: mime_type = "image/jpeg" |
|
elif ext == ".webp": mime_type = "image/webp" |
|
else: mime_type = "image/jpeg" |
|
data_url = f"data:{mime_type};base64,{encoded_image_string}" |
|
prompt_text = generate_extraction_prompt(doc_type) |
|
payload = { |
|
"model": IMAGE_MODEL, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt_text}, |
|
{"type": "image_url", "image_url": {"url": data_url}} |
|
] |
|
} |
|
], |
|
"max_tokens": 3000, |
|
"temperature": 0.1, |
|
} |
|
headers = { |
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}", |
|
"Content-Type": "application/json", |
|
"HTTP-Referer": "https://huggingface.co/spaces/Passport_Extractor", |
|
"X-Title": "Document Classifier" |
|
} |
|
print(f"Sending request to OpenRouter for image: {os.path.basename(image_path)}, type: {doc_type}") |
|
response = requests.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=120) |
|
response.raise_for_status() |
|
result = response.json() |
|
print(f"Received response from OpenRouter. Status: {response.status_code}") |
|
if "choices" in result and result["choices"]: |
|
content_text = result["choices"][0]["message"]["content"] |
|
clean_content = content_text.strip() |
|
if clean_content.startswith("```json"): |
|
clean_content = clean_content[7:] |
|
if clean_content.endswith("```"): |
|
clean_content = clean_content[:-3] |
|
elif clean_content.startswith("`") and clean_content.endswith("`"): |
|
clean_content = clean_content[1:-1] |
|
try: |
|
parsed_json = json.loads(clean_content) |
|
if "document_type_provided" not in parsed_json: |
|
parsed_json["document_type_provided"] = doc_type |
|
return parsed_json |
|
except json.JSONDecodeError as e: |
|
print(f"JSONDecodeError: {e}. Raw content was:\n{content_text}") |
|
return { |
|
"error": "Failed to parse LLM output as JSON.", |
|
"raw_content_from_llm": content_text, |
|
"document_type_provided": doc_type |
|
} |
|
else: |
|
print(f"No 'choices' in API response: {result}") |
|
return {"error": "No choices in API response.", "details": result, "document_type_provided": doc_type} |
|
except requests.exceptions.Timeout: |
|
print(f"API Request Timeout for {os.path.basename(image_path)}") |
|
return {"error": "API request timed out.", "document_type_provided": doc_type} |
|
except requests.exceptions.RequestException as e: |
|
error_message = f"API Request Error: {str(e)}" |
|
if e.response is not None: |
|
error_message += f" Status: {e.response.status_code}, Response: {e.response.text}" |
|
print(error_message) |
|
return {"error": error_message, "document_type_provided": doc_type} |
|
except Exception as e: |
|
print(f"An unexpected error occurred during processing {os.path.basename(image_path)}: {str(e)}") |
|
return {"error": f"An unexpected error: {str(e)}", "document_type_provided": doc_type} |
|
|
|
def add_document_to_batch_ui(image_filepath, doc_type_selection): |
|
global current_batch |
|
if image_filepath and doc_type_selection: |
|
filename = os.path.basename(image_filepath) |
|
current_batch.append({"path": image_filepath, "type": doc_type_selection, "filename": filename}) |
|
batch_display_data = [[item["filename"], item["type"]] for item in current_batch] |
|
return batch_display_data, f"Added '{filename}' as '{doc_type_selection}'." |
|
batch_display_data = [[item["filename"], item["type"]] for item in current_batch] |
|
return batch_display_data, "Failed to add: Image or document type missing." |
|
|
|
def process_batch_ui(): |
|
global current_batch |
|
if not OPENROUTER_API_KEY: |
|
return {"error": "OPENROUTER_API_KEY is not set. Please configure it."}, "API Key Missing." |
|
if not current_batch: |
|
return {"message": "Batch is empty. Add documents first."}, "Batch is empty." |
|
all_results = [] |
|
status_updates = [] |
|
for i, item_to_process in enumerate(current_batch): |
|
status_msg = f"Processing document {i+1}/{len(current_batch)}: {item_to_process['filename']} ({item_to_process['type']})..." |
|
print(status_msg) |
|
extracted_data = process_single_image_with_openrouter(item_to_process["path"], item_to_process["type"]) |
|
all_results.append(extracted_data) |
|
if "error" in extracted_data: |
|
status_updates.append(f"Error processing {item_to_process['filename']}: {extracted_data['error']}") |
|
else: |
|
status_updates.append(f"Successfully processed {item_to_process['filename']}.") |
|
grouped_by_person = {} |
|
unidentified_docs = [] |
|
for result_item in all_results: |
|
doc_id = None |
|
if isinstance(result_item, dict) and "extracted_fields" in result_item and isinstance(result_item["extracted_fields"], dict): |
|
fields = result_item["extracted_fields"] |
|
passport_no = fields.get("Document Number") or fields.get("Passport Number") or fields.get("passport_number") |
|
name = fields.get("Given Names") or fields.get("Given Name") or fields.get("Name") |
|
surname = fields.get("Surname") or fields.get("Family Name") |
|
dob = fields.get("Date of Birth") or fields.get("DOB") |
|
if passport_no: |
|
doc_id = f"passport_{str(passport_no).replace(' ', '').lower()}" |
|
elif name and surname and dob: |
|
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}_{str(dob).replace(' ', '')}" |
|
elif name and surname: |
|
doc_id = f"{str(name).replace(' ', '').lower()}_{str(surname).replace(' ', '').lower()}" |
|
if doc_id: |
|
if doc_id not in grouped_by_person: |
|
grouped_by_person[doc_id] = {"person_identifier": doc_id, "documents": []} |
|
grouped_by_person[doc_id]["documents"].append(result_item) |
|
else: |
|
unidentified_docs.append(result_item) |
|
final_structured_output = { |
|
"summary": f"Processed {len(current_batch)} documents.", |
|
"grouped_by_person": list(grouped_by_person.values()) if grouped_by_person else [], |
|
"unidentified_documents_or_errors": unidentified_docs |
|
} |
|
final_status = "Batch processing complete. " + " | ".join(status_updates) |
|
print(final_status) |
|
return final_structured_output, final_status |
|
|
|
def clear_batch_ui(): |
|
global current_batch |
|
current_batch = [] |
|
return [], "Batch cleared successfully." |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# π Document Information Extractor (OpenGVLab/InternVL3-14B via OpenRouter)") |
|
gr.Markdown( |
|
"**Instructions:**\n" |
|
"1. Upload a document image (e.g., passport front/back, photo, hotel reservation).\n" |
|
"2. Select the correct document type.\n" |
|
"3. Click 'Add Document to Current Batch'. Repeat for all documents of a person or a related set.\n" |
|
"4. Review the batch. Click 'Clear Entire Batch' to start over.\n" |
|
"5. Click 'Process Batch and Extract Information' to send documents to the AI.\n" |
|
"6. View the extracted information in JSON format below." |
|
) |
|
if not OPENROUTER_API_KEY: |
|
gr.Markdown( |
|
"<h3 style='color:red;'>β οΈ Warning: `OPENROUTER_API_KEY` environment variable is not detected. " |
|
"API calls will fail. Please set it and restart this application.</h3>" |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### Step 1: Add Document") |
|
image_input = gr.Image( |
|
label="Upload Document Image", |
|
type="filepath", |
|
sources=["upload"], |
|
height=300 |
|
) |
|
doc_type_choices = [ |
|
'passport_front', 'passport_back', 'national_id_front', 'national_id_back', |
|
'drivers_license_front', 'drivers_license_back', 'visa_sticker', |
|
'photo', 'hotel_reservation', 'boarding_pass', 'utility_bill', 'other_document' |
|
] |
|
doc_type_input = gr.Dropdown( |
|
label="Select Document Type", |
|
choices=doc_type_choices, |
|
value='passport_front', |
|
filterable=True |
|
) |
|
add_button = gr.Button("β Add Document to Current Batch", variant="secondary") |
|
with gr.Column(scale=2): |
|
gr.Markdown("### Step 2: Review Current Batch") |
|
batch_dataframe = gr.Dataframe( |
|
headers=["Filename", "Document Type"], |
|
datatype=["str", "str"], |
|
row_count=1, |
|
col_count=2, |
|
wrap=True |
|
) |
|
clear_batch_button = gr.Button("ποΈ Clear Entire Batch", variant="stop") |
|
gr.Markdown("### Step 3: Process Batch") |
|
process_button = gr.Button("π Process Batch and Extract Information", variant="primary") |
|
status_message_textbox = gr.Textbox(label="Processing Status", interactive=False, lines=2) |
|
gr.Markdown("### Step 4: View Results") |
|
output_json_display = gr.JSON(label="Extracted Information (JSON Format)") |
|
add_button.click( |
|
fn=add_document_to_batch_ui, |
|
inputs=[image_input, doc_type_input], |
|
outputs=[batch_dataframe, status_message_textbox] |
|
).then(lambda: None, outputs=image_input) |
|
clear_batch_button.click( |
|
fn=clear_batch_ui, |
|
inputs=[], |
|
outputs=[batch_dataframe, status_message_textbox] |
|
) |
|
process_button.click( |
|
fn=process_batch_ui, |
|
inputs=[], |
|
outputs=[output_json_display, status_message_textbox] |
|
) |
|
|
|
if __name__ == "__main__": |
|
if not OPENROUTER_API_KEY: |
|
print("ERROR: The OPENROUTER_API_KEY environment variable is not set.") |
|
print("Please set it before running the application, e.g.:") |
|
print(" export OPENROUTER_API_KEY='your_openrouter_key_here'") |
|
print("The application will launch, but API calls will fail.") |
|
demo.launch(share=True) |