Sandy2636 commited on
Commit
40edde0
·
1 Parent(s): d1c0f9b

New Update

Browse files
Files changed (2) hide show
  1. app.py +272 -8
  2. requirements.txt +6 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
- import os
2
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow INFO and WARNING messages
3
- os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
4
  import gradio as gr
5
  import base64
6
  import requests
@@ -9,13 +9,30 @@ import re
9
  import os
10
  import uuid
11
  from datetime import datetime
 
 
12
  import time # For potential sleeps if needed, or timing
13
 
14
  # Attempt to import deepface and handle import error gracefully
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  try:
16
  from deepface import DeepFace
17
- from deepface.commons import functions as deepface_functions
18
  DEEPFACE_AVAILABLE = True
 
19
  except ImportError:
20
  DEEPFACE_AVAILABLE = False
21
  print("Warning: deepface library not found. Facial recognition features will be disabled.")
@@ -48,6 +65,99 @@ processed_files_data = []
48
  person_profiles = {}
49
 
50
  # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def extract_json_from_text(text):
52
  if not text:
53
  return {"error": "Empty text provided for JSON extraction."}
@@ -343,7 +453,7 @@ def format_dataframe_data(current_files_data):
343
  df_rows = []
344
  for f_data in current_files_data:
345
  entities = f_data.get("entities") or {}
346
- face_info = f_data.get("face_analysis_result", {})
347
  face_detected_status = "Y" if face_info.get("count", 0) > 0 else "N"
348
  if "error" in face_info : face_detected_status = "Error"
349
  elif "message" in face_info and "No face detected" in face_info["message"]: face_detected_status = "N"
@@ -388,7 +498,7 @@ def format_persons_markdown(current_persons_data, current_files_data):
388
  md_parts.append("\n---\n")
389
  return "\n".join(md_parts)
390
 
391
- def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
392
  global processed_files_data, person_profiles
393
  processed_files_data = []
394
  person_profiles = {}
@@ -485,6 +595,160 @@ def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
485
  final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
486
  yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} documents processed.")
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
490
  gr.Markdown("# 📄 Intelligent Document Processor & Classifier v2 (with Face ID)")
@@ -498,7 +762,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
498
  with gr.Row():
499
  with gr.Column(scale=1):
500
  files_input = gr.Files(label="Upload Document Images (Bulk)", file_count="multiple", type="filepath")
501
- process_button = gr.Button("🚀 Process Uploaded Documents", variant="primary")
502
  with gr.Column(scale=2):
503
  overall_status_textbox = gr.Textbox(label="Current Task & Overall Progress", interactive=False, lines=2)
504
 
@@ -508,7 +772,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
508
  document_status_df = gr.Dataframe(
509
  headers=dataframe_headers, datatype=["str"] * len(dataframe_headers),
510
  label="Individual Document Status & Extracted Entities",
511
- row_count=(1, "dynamic"), col_count=(len(dataframe_headers), "fixed"), wrap=True, height=400
512
  )
513
 
514
  with gr.Accordion("Selected Document Full OCR JSON", open=False):
 
1
+ # import os
2
+ # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow INFO and WARNING messages
3
+ # os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
4
  import gradio as gr
5
  import base64
6
  import requests
 
9
  import os
10
  import uuid
11
  from datetime import datetime
12
+ import tempfile # ✅ Add this
13
+ import shutil
14
  import time # For potential sleeps if needed, or timing
15
 
16
  # Attempt to import deepface and handle import error gracefully
17
+ try:
18
+ import fitz # PyMuPDF
19
+ PYMUPDF_AVAILABLE = True
20
+ except ImportError:
21
+ PYMUPDF_AVAILABLE = False
22
+ print("Warning: PyMuPDF not found. PDF processing will be disabled.")
23
+
24
+ try:
25
+ import docx
26
+ from PIL import Image, ImageDraw, ImageFont
27
+ DOCX_AVAILABLE = True
28
+ except ImportError:
29
+ DOCX_AVAILABLE = False
30
+ print("Warning: python-docx or Pillow not found. DOCX processing will be disabled.")
31
  try:
32
  from deepface import DeepFace
33
+ # from deepface.commons import functions as deepface_functions
34
  DEEPFACE_AVAILABLE = True
35
+ print(f"Got DeepFace")
36
  except ImportError:
37
  DEEPFACE_AVAILABLE = False
38
  print("Warning: deepface library not found. Facial recognition features will be disabled.")
 
65
  person_profiles = {}
66
 
67
  # --- Helper Functions ---
68
+
69
+ def render_text_to_image(text, output_path):
70
+ """Renders a string of text onto a new image file."""
71
+ if not DOCX_AVAILABLE:
72
+ raise ImportError("Pillow or python-docx is not installed.")
73
+
74
+ try:
75
+ # Use a built-in font if available, otherwise this might fail on minimal OS
76
+ font = ImageFont.truetype("DejaVuSans.ttf", 15)
77
+ except IOError:
78
+ print("Default font not found, using basic PIL font.")
79
+ font = ImageFont.load_default()
80
+
81
+ padding = 20
82
+ image_width = 800
83
+
84
+ # Simple text wrapping
85
+ lines = []
86
+ for paragraph in text.split('\n'):
87
+ words = paragraph.split()
88
+ line = ""
89
+ for word in words:
90
+ # Use getbbox for more accurate width calculation if available (Pillow >= 9.2.0)
91
+ if hasattr(font, 'getbbox'):
92
+ box = font.getbbox(line + word)
93
+ line_width = box[2] - box[0]
94
+ else: # Fallback for older Pillow
95
+ line_width = font.getsize(line + word)[0]
96
+
97
+ if line_width <= image_width - 2 * padding:
98
+ line += word + " "
99
+ else:
100
+ lines.append(line.strip())
101
+ line = word + " "
102
+ lines.append(line.strip())
103
+
104
+ # Calculate image height
105
+ _, top, _, bottom = font.getbbox("A")
106
+ line_height = bottom - top + 5 # Add some line spacing
107
+ image_height = len(lines) * line_height + 2 * padding
108
+
109
+ img = Image.new('RGB', (image_width, int(image_height)), color='white')
110
+ draw = ImageDraw.Draw(img)
111
+
112
+ y = padding
113
+ for line in lines:
114
+ draw.text((padding, y), line, font=font, fill='black')
115
+ y += line_height
116
+
117
+ img.save(output_path, format='PNG')
118
+
119
+
120
+ def convert_file_to_images(original_filepath, temp_output_dir):
121
+ """
122
+ Converts an uploaded file (PDF, DOCX) into one or more images.
123
+ If the file is already an image, it returns its own path.
124
+ Returns a list of dictionaries, each with 'path' and 'page' keys.
125
+ """
126
+ filename_lower = original_filepath.lower()
127
+ output_paths = []
128
+
129
+ if filename_lower.endswith('.pdf'):
130
+ if not PYMUPDF_AVAILABLE:
131
+ raise RuntimeError("PDF processing is disabled (PyMuPDF not installed).")
132
+ doc = fitz.open(original_filepath)
133
+ for i, page in enumerate(doc):
134
+ pix = page.get_pixmap(dpi=200) # Render page to image
135
+ output_filepath = os.path.join(temp_output_dir, f"{os.path.basename(original_filepath)}_page_{i+1}.png")
136
+ pix.save(output_filepath)
137
+ output_paths.append({"path": output_filepath, "page": i + 1})
138
+ doc.close()
139
+
140
+ elif filename_lower.endswith('.docx'):
141
+ if not DOCX_AVAILABLE:
142
+ raise RuntimeError("DOCX processing is disabled (python-docx or Pillow not installed).")
143
+ doc = docx.Document(original_filepath)
144
+ full_text = "\n".join([para.text for para in doc.paragraphs])
145
+ if not full_text.strip():
146
+ full_text = "--- Document is empty or contains only images/tables ---"
147
+ output_filepath = os.path.join(temp_output_dir, f"{os.path.basename(original_filepath)}.png")
148
+ render_text_to_image(full_text, output_filepath)
149
+ output_paths.append({"path": output_filepath, "page": 1})
150
+
151
+ elif filename_lower.endswith(('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.tiff')):
152
+ # File is already an image, just return its path
153
+ output_paths.append({"path": original_filepath, "page": 1})
154
+
155
+ else:
156
+ raise TypeError(f"Unsupported file type: {os.path.basename(original_filepath)}")
157
+
158
+ return output_paths
159
+
160
+
161
  def extract_json_from_text(text):
162
  if not text:
163
  return {"error": "Empty text provided for JSON extraction."}
 
453
  df_rows = []
454
  for f_data in current_files_data:
455
  entities = f_data.get("entities") or {}
456
+ face_info = f_data.get("face_analysis_result", {}) or {}
457
  face_detected_status = "Y" if face_info.get("count", 0) > 0 else "N"
458
  if "error" in face_info : face_detected_status = "Error"
459
  elif "message" in face_info and "No face detected" in face_info["message"]: face_detected_status = "N"
 
498
  md_parts.append("\n---\n")
499
  return "\n".join(md_parts)
500
 
501
+ def process_uploaded_files_old(files_list, progress=gr.Progress(track_tqdm=True)):
502
  global processed_files_data, person_profiles
503
  processed_files_data = []
504
  person_profiles = {}
 
595
  final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
596
  yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} documents processed.")
597
 
598
+ def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
599
+ global processed_files_data, person_profiles
600
+ processed_files_data = []
601
+ person_profiles = {}
602
+ temp_dir = tempfile.mkdtemp() # Create a temporary directory for converted images
603
+
604
+ empty_df_row = [["N/A"] * 11] # Match number of headers
605
+ if not OPENROUTER_API_KEY:
606
+ yield (empty_df_row, "API Key Missing.", "{}", "Error: API Key not set.")
607
+ shutil.rmtree(temp_dir)
608
+ return
609
+ if not files_list:
610
+ yield ([], "No files uploaded.", "{}", "Upload files to begin.")
611
+ shutil.rmtree(temp_dir)
612
+ return
613
+
614
+ # --- Stage 1: Pre-process files into a job queue of images ---
615
+ job_queue = []
616
+ for original_file_obj in progress.tqdm(files_list, desc="Pre-processing Files"):
617
+ try:
618
+ image_page_list = convert_file_to_images(original_file_obj.name, temp_dir)
619
+ total_pages = len(image_page_list)
620
+ for item in image_page_list:
621
+ job_queue.append({
622
+ "original_filename": os.path.basename(original_file_obj.name),
623
+ "page_number": item["page"],
624
+ "total_pages": total_pages,
625
+ "image_path": item["path"]
626
+ })
627
+ except Exception as e:
628
+ job_queue.append({"original_filename": os.path.basename(original_file_obj.name), "error": str(e)})
629
+
630
+ for job in job_queue:
631
+ if "error" in job:
632
+ processed_files_data.append({
633
+ "doc_id": str(uuid.uuid4()),
634
+ "original_filename": job["original_filename"],
635
+ "page_number": 1,
636
+ "status": f"Error: {job['error']}"
637
+ })
638
+ else:
639
+ processed_files_data.append({
640
+ "doc_id": str(uuid.uuid4()),
641
+ "original_filename": job["original_filename"],
642
+ "page_number": job["page_number"],
643
+ "total_pages": job["total_pages"],
644
+ "filepath": job["image_path"],
645
+ "status": "Queued",
646
+ "ocr_json": None,
647
+ "entities": None,
648
+ "face_analysis_result": None,
649
+ "facial_embeddings": None,
650
+ "assigned_person_key": None,
651
+ "linking_method": ""
652
+ })
653
+
654
+ initial_df_data = format_dataframe_data(processed_files_data)
655
+ initial_persons_md = format_persons_markdown(person_profiles, processed_files_data)
656
+ yield (initial_df_data, initial_persons_md, "{}", f"Pre-processing complete. Analyzing {len(processed_files_data)} pages.")
657
+
658
+ # --- Stage 2: Analyze each page ---
659
+ current_ocr_json_display = "{}"
660
+ for i, file_data_item in enumerate(progress.tqdm(processed_files_data, desc="Analyzing Pages")):
661
+ if file_data_item["status"].startswith("Error"):
662
+ continue
663
+
664
+ current_filename = f"{file_data_item['original_filename']} (p.{file_data_item['page_number']})"
665
+ linking_method_log_for_doc = []
666
+
667
+ # 1. OCR
668
+ file_data_item["status"] = "OCR..."
669
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
670
+ df_data = format_dataframe_data(processed_files_data)
671
+ yield (df_data, persons_md, current_ocr_json_display, f"OCR: {current_filename}")
672
+
673
+ ocr_result = call_openrouter_ocr(file_data_item["filepath"])
674
+ file_data_item["ocr_json"] = ocr_result
675
+ current_ocr_json_display = json.dumps(ocr_result, indent=2)
676
+
677
+ if "error" in ocr_result:
678
+ file_data_item["status"] = f"OCR Err: {str(ocr_result['error'])[:30]}.."
679
+ linking_method_log_for_doc.append("OCR Failed.")
680
+ file_data_item["linking_method"] = " ".join(linking_method_log_for_doc)
681
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
682
+ df_data = format_dataframe_data(processed_files_data)
683
+ yield (df_data, persons_md, current_ocr_json_display, f"OCR Err: {current_filename}")
684
+ continue
685
+
686
+ # 2. Entity Extraction
687
+ file_data_item["status"] = "OCR OK. Entities..."
688
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
689
+ df_data = format_dataframe_data(processed_files_data)
690
+ yield (df_data, persons_md, current_ocr_json_display, f"Entities: {current_filename}")
691
+ entities = extract_entities_from_ocr(ocr_result)
692
+ file_data_item["entities"] = entities
693
+
694
+ # 3. Facial Feature Extraction
695
+ file_data_item["status"] = "Entities OK. Face..."
696
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
697
+ df_data = format_dataframe_data(processed_files_data)
698
+ yield (df_data, persons_md, current_ocr_json_display, f"Face Detect: {current_filename}")
699
+ doc_type_lower = (entities.get("doc_type") or "").lower()
700
+
701
+ if DEEPFACE_AVAILABLE and (
702
+ "photo" in doc_type_lower or
703
+ "passport" in doc_type_lower or
704
+ "id" in doc_type_lower or
705
+ "selfie" in doc_type_lower or
706
+ not doc_type_lower
707
+ ):
708
+ face_result = get_facial_embeddings_with_deepface(file_data_item["filepath"])
709
+ file_data_item["face_analysis_result"] = face_result
710
+ if "embeddings" in face_result and face_result["embeddings"]:
711
+ file_data_item["facial_embeddings"] = face_result["embeddings"]
712
+ linking_method_log_for_doc.append(f"{face_result.get('count', 0)} face(s).")
713
+ elif "error" in face_result:
714
+ linking_method_log_for_doc.append("Face Ext. Error.")
715
+ else:
716
+ linking_method_log_for_doc.append("No face det.")
717
+ else:
718
+ linking_method_log_for_doc.append("Face Ext. Skipped.")
719
+
720
+ file_data_item["status"] = "Face Done. Classify..."
721
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
722
+ df_data = format_dataframe_data(processed_files_data)
723
+ yield (df_data, persons_md, current_ocr_json_display, f"Classifying: {current_filename}")
724
+
725
+ # 4. Person Classification
726
+ person_key = get_person_id_and_update_profiles(
727
+ file_data_item["doc_id"],
728
+ entities,
729
+ file_data_item.get("facial_embeddings"),
730
+ person_profiles,
731
+ linking_method_log_for_doc
732
+ )
733
+ file_data_item["assigned_person_key"] = person_key
734
+ file_data_item["status"] = "Classified"
735
+ file_data_item["linking_method"] = " ".join(linking_method_log_for_doc)
736
+
737
+ persons_md = format_persons_markdown(person_profiles, processed_files_data)
738
+ df_data = format_dataframe_data(processed_files_data)
739
+ yield (df_data, persons_md, current_ocr_json_display, f"Done: {current_filename} -> {person_key}")
740
+
741
+ # Final Result
742
+ final_df_data = format_dataframe_data(processed_files_data)
743
+ final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
744
+ yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} pages analyzed.")
745
+
746
+ # Cleanup
747
+ try:
748
+ shutil.rmtree(temp_dir)
749
+ print(f"Cleaned up temporary directory: {temp_dir}")
750
+ except Exception as e:
751
+ print(f"Error cleaning up temporary directory {temp_dir}: {e}")
752
 
753
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
754
  gr.Markdown("# 📄 Intelligent Document Processor & Classifier v2 (with Face ID)")
 
762
  with gr.Row():
763
  with gr.Column(scale=1):
764
  files_input = gr.Files(label="Upload Document Images (Bulk)", file_count="multiple", type="filepath")
765
+ process_button = gr.Button("Process Uploaded Documents", variant="primary")
766
  with gr.Column(scale=2):
767
  overall_status_textbox = gr.Textbox(label="Current Task & Overall Progress", interactive=False, lines=2)
768
 
 
772
  document_status_df = gr.Dataframe(
773
  headers=dataframe_headers, datatype=["str"] * len(dataframe_headers),
774
  label="Individual Document Status & Extracted Entities",
775
+ row_count=(1, "dynamic"), col_count=(len(dataframe_headers), "fixed"), wrap=True
776
  )
777
 
778
  with gr.Accordion("Selected Document Full OCR JSON", open=False):
requirements.txt CHANGED
@@ -2,6 +2,10 @@ gradio>=4.0.0
2
  requests>=2.25.0
3
  Pillow>=9.0.0
4
  deepface>=0.0.79
5
- tensorflow-cpu>=2.13.0,<2.15.0 # Loosen to a broader range if needed
6
  opencv-python-headless>=4.5.0
7
- retina-face>=0.0.12
 
 
 
 
 
2
  requests>=2.25.0
3
  Pillow>=9.0.0
4
  deepface>=0.0.79
5
+ tensorflow>=2.10.0 # Or tensorflow-cpu if GPU is not available/needed
6
  opencv-python-headless>=4.5.0
7
+ # retina-face Pypi package for the detector if deepface doesn't pull it correctly
8
+ retina-face>=0.0.12
9
+ tf-keras
10
+ PyMuPDF
11
+ python-docx