Sandy2636
commited on
Commit
·
40edde0
1
Parent(s):
d1c0f9b
New Update
Browse files- app.py +272 -8
- requirements.txt +6 -2
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
import os
|
2 |
-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow INFO and WARNING messages
|
3 |
-
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
4 |
import gradio as gr
|
5 |
import base64
|
6 |
import requests
|
@@ -9,13 +9,30 @@ import re
|
|
9 |
import os
|
10 |
import uuid
|
11 |
from datetime import datetime
|
|
|
|
|
12 |
import time # For potential sleeps if needed, or timing
|
13 |
|
14 |
# Attempt to import deepface and handle import error gracefully
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
try:
|
16 |
from deepface import DeepFace
|
17 |
-
from deepface.commons import functions as deepface_functions
|
18 |
DEEPFACE_AVAILABLE = True
|
|
|
19 |
except ImportError:
|
20 |
DEEPFACE_AVAILABLE = False
|
21 |
print("Warning: deepface library not found. Facial recognition features will be disabled.")
|
@@ -48,6 +65,99 @@ processed_files_data = []
|
|
48 |
person_profiles = {}
|
49 |
|
50 |
# --- Helper Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def extract_json_from_text(text):
|
52 |
if not text:
|
53 |
return {"error": "Empty text provided for JSON extraction."}
|
@@ -343,7 +453,7 @@ def format_dataframe_data(current_files_data):
|
|
343 |
df_rows = []
|
344 |
for f_data in current_files_data:
|
345 |
entities = f_data.get("entities") or {}
|
346 |
-
face_info = f_data.get("face_analysis_result", {})
|
347 |
face_detected_status = "Y" if face_info.get("count", 0) > 0 else "N"
|
348 |
if "error" in face_info : face_detected_status = "Error"
|
349 |
elif "message" in face_info and "No face detected" in face_info["message"]: face_detected_status = "N"
|
@@ -388,7 +498,7 @@ def format_persons_markdown(current_persons_data, current_files_data):
|
|
388 |
md_parts.append("\n---\n")
|
389 |
return "\n".join(md_parts)
|
390 |
|
391 |
-
def
|
392 |
global processed_files_data, person_profiles
|
393 |
processed_files_data = []
|
394 |
person_profiles = {}
|
@@ -485,6 +595,160 @@ def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
|
|
485 |
final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
486 |
yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} documents processed.")
|
487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
|
489 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
490 |
gr.Markdown("# 📄 Intelligent Document Processor & Classifier v2 (with Face ID)")
|
@@ -498,7 +762,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
498 |
with gr.Row():
|
499 |
with gr.Column(scale=1):
|
500 |
files_input = gr.Files(label="Upload Document Images (Bulk)", file_count="multiple", type="filepath")
|
501 |
-
process_button = gr.Button("
|
502 |
with gr.Column(scale=2):
|
503 |
overall_status_textbox = gr.Textbox(label="Current Task & Overall Progress", interactive=False, lines=2)
|
504 |
|
@@ -508,7 +772,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
508 |
document_status_df = gr.Dataframe(
|
509 |
headers=dataframe_headers, datatype=["str"] * len(dataframe_headers),
|
510 |
label="Individual Document Status & Extracted Entities",
|
511 |
-
row_count=(1, "dynamic"), col_count=(len(dataframe_headers), "fixed"), wrap=True
|
512 |
)
|
513 |
|
514 |
with gr.Accordion("Selected Document Full OCR JSON", open=False):
|
|
|
1 |
+
# import os
|
2 |
+
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow INFO and WARNING messages
|
3 |
+
# os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
4 |
import gradio as gr
|
5 |
import base64
|
6 |
import requests
|
|
|
9 |
import os
|
10 |
import uuid
|
11 |
from datetime import datetime
|
12 |
+
import tempfile # ✅ Add this
|
13 |
+
import shutil
|
14 |
import time # For potential sleeps if needed, or timing
|
15 |
|
16 |
# Attempt to import deepface and handle import error gracefully
|
17 |
+
try:
|
18 |
+
import fitz # PyMuPDF
|
19 |
+
PYMUPDF_AVAILABLE = True
|
20 |
+
except ImportError:
|
21 |
+
PYMUPDF_AVAILABLE = False
|
22 |
+
print("Warning: PyMuPDF not found. PDF processing will be disabled.")
|
23 |
+
|
24 |
+
try:
|
25 |
+
import docx
|
26 |
+
from PIL import Image, ImageDraw, ImageFont
|
27 |
+
DOCX_AVAILABLE = True
|
28 |
+
except ImportError:
|
29 |
+
DOCX_AVAILABLE = False
|
30 |
+
print("Warning: python-docx or Pillow not found. DOCX processing will be disabled.")
|
31 |
try:
|
32 |
from deepface import DeepFace
|
33 |
+
# from deepface.commons import functions as deepface_functions
|
34 |
DEEPFACE_AVAILABLE = True
|
35 |
+
print(f"Got DeepFace")
|
36 |
except ImportError:
|
37 |
DEEPFACE_AVAILABLE = False
|
38 |
print("Warning: deepface library not found. Facial recognition features will be disabled.")
|
|
|
65 |
person_profiles = {}
|
66 |
|
67 |
# --- Helper Functions ---
|
68 |
+
|
69 |
+
def render_text_to_image(text, output_path):
|
70 |
+
"""Renders a string of text onto a new image file."""
|
71 |
+
if not DOCX_AVAILABLE:
|
72 |
+
raise ImportError("Pillow or python-docx is not installed.")
|
73 |
+
|
74 |
+
try:
|
75 |
+
# Use a built-in font if available, otherwise this might fail on minimal OS
|
76 |
+
font = ImageFont.truetype("DejaVuSans.ttf", 15)
|
77 |
+
except IOError:
|
78 |
+
print("Default font not found, using basic PIL font.")
|
79 |
+
font = ImageFont.load_default()
|
80 |
+
|
81 |
+
padding = 20
|
82 |
+
image_width = 800
|
83 |
+
|
84 |
+
# Simple text wrapping
|
85 |
+
lines = []
|
86 |
+
for paragraph in text.split('\n'):
|
87 |
+
words = paragraph.split()
|
88 |
+
line = ""
|
89 |
+
for word in words:
|
90 |
+
# Use getbbox for more accurate width calculation if available (Pillow >= 9.2.0)
|
91 |
+
if hasattr(font, 'getbbox'):
|
92 |
+
box = font.getbbox(line + word)
|
93 |
+
line_width = box[2] - box[0]
|
94 |
+
else: # Fallback for older Pillow
|
95 |
+
line_width = font.getsize(line + word)[0]
|
96 |
+
|
97 |
+
if line_width <= image_width - 2 * padding:
|
98 |
+
line += word + " "
|
99 |
+
else:
|
100 |
+
lines.append(line.strip())
|
101 |
+
line = word + " "
|
102 |
+
lines.append(line.strip())
|
103 |
+
|
104 |
+
# Calculate image height
|
105 |
+
_, top, _, bottom = font.getbbox("A")
|
106 |
+
line_height = bottom - top + 5 # Add some line spacing
|
107 |
+
image_height = len(lines) * line_height + 2 * padding
|
108 |
+
|
109 |
+
img = Image.new('RGB', (image_width, int(image_height)), color='white')
|
110 |
+
draw = ImageDraw.Draw(img)
|
111 |
+
|
112 |
+
y = padding
|
113 |
+
for line in lines:
|
114 |
+
draw.text((padding, y), line, font=font, fill='black')
|
115 |
+
y += line_height
|
116 |
+
|
117 |
+
img.save(output_path, format='PNG')
|
118 |
+
|
119 |
+
|
120 |
+
def convert_file_to_images(original_filepath, temp_output_dir):
|
121 |
+
"""
|
122 |
+
Converts an uploaded file (PDF, DOCX) into one or more images.
|
123 |
+
If the file is already an image, it returns its own path.
|
124 |
+
Returns a list of dictionaries, each with 'path' and 'page' keys.
|
125 |
+
"""
|
126 |
+
filename_lower = original_filepath.lower()
|
127 |
+
output_paths = []
|
128 |
+
|
129 |
+
if filename_lower.endswith('.pdf'):
|
130 |
+
if not PYMUPDF_AVAILABLE:
|
131 |
+
raise RuntimeError("PDF processing is disabled (PyMuPDF not installed).")
|
132 |
+
doc = fitz.open(original_filepath)
|
133 |
+
for i, page in enumerate(doc):
|
134 |
+
pix = page.get_pixmap(dpi=200) # Render page to image
|
135 |
+
output_filepath = os.path.join(temp_output_dir, f"{os.path.basename(original_filepath)}_page_{i+1}.png")
|
136 |
+
pix.save(output_filepath)
|
137 |
+
output_paths.append({"path": output_filepath, "page": i + 1})
|
138 |
+
doc.close()
|
139 |
+
|
140 |
+
elif filename_lower.endswith('.docx'):
|
141 |
+
if not DOCX_AVAILABLE:
|
142 |
+
raise RuntimeError("DOCX processing is disabled (python-docx or Pillow not installed).")
|
143 |
+
doc = docx.Document(original_filepath)
|
144 |
+
full_text = "\n".join([para.text for para in doc.paragraphs])
|
145 |
+
if not full_text.strip():
|
146 |
+
full_text = "--- Document is empty or contains only images/tables ---"
|
147 |
+
output_filepath = os.path.join(temp_output_dir, f"{os.path.basename(original_filepath)}.png")
|
148 |
+
render_text_to_image(full_text, output_filepath)
|
149 |
+
output_paths.append({"path": output_filepath, "page": 1})
|
150 |
+
|
151 |
+
elif filename_lower.endswith(('.png', '.jpg', '.jpeg', '.webp', '.bmp', '.tiff')):
|
152 |
+
# File is already an image, just return its path
|
153 |
+
output_paths.append({"path": original_filepath, "page": 1})
|
154 |
+
|
155 |
+
else:
|
156 |
+
raise TypeError(f"Unsupported file type: {os.path.basename(original_filepath)}")
|
157 |
+
|
158 |
+
return output_paths
|
159 |
+
|
160 |
+
|
161 |
def extract_json_from_text(text):
|
162 |
if not text:
|
163 |
return {"error": "Empty text provided for JSON extraction."}
|
|
|
453 |
df_rows = []
|
454 |
for f_data in current_files_data:
|
455 |
entities = f_data.get("entities") or {}
|
456 |
+
face_info = f_data.get("face_analysis_result", {}) or {}
|
457 |
face_detected_status = "Y" if face_info.get("count", 0) > 0 else "N"
|
458 |
if "error" in face_info : face_detected_status = "Error"
|
459 |
elif "message" in face_info and "No face detected" in face_info["message"]: face_detected_status = "N"
|
|
|
498 |
md_parts.append("\n---\n")
|
499 |
return "\n".join(md_parts)
|
500 |
|
501 |
+
def process_uploaded_files_old(files_list, progress=gr.Progress(track_tqdm=True)):
|
502 |
global processed_files_data, person_profiles
|
503 |
processed_files_data = []
|
504 |
person_profiles = {}
|
|
|
595 |
final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
596 |
yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} documents processed.")
|
597 |
|
598 |
+
def process_uploaded_files(files_list, progress=gr.Progress(track_tqdm=True)):
|
599 |
+
global processed_files_data, person_profiles
|
600 |
+
processed_files_data = []
|
601 |
+
person_profiles = {}
|
602 |
+
temp_dir = tempfile.mkdtemp() # Create a temporary directory for converted images
|
603 |
+
|
604 |
+
empty_df_row = [["N/A"] * 11] # Match number of headers
|
605 |
+
if not OPENROUTER_API_KEY:
|
606 |
+
yield (empty_df_row, "API Key Missing.", "{}", "Error: API Key not set.")
|
607 |
+
shutil.rmtree(temp_dir)
|
608 |
+
return
|
609 |
+
if not files_list:
|
610 |
+
yield ([], "No files uploaded.", "{}", "Upload files to begin.")
|
611 |
+
shutil.rmtree(temp_dir)
|
612 |
+
return
|
613 |
+
|
614 |
+
# --- Stage 1: Pre-process files into a job queue of images ---
|
615 |
+
job_queue = []
|
616 |
+
for original_file_obj in progress.tqdm(files_list, desc="Pre-processing Files"):
|
617 |
+
try:
|
618 |
+
image_page_list = convert_file_to_images(original_file_obj.name, temp_dir)
|
619 |
+
total_pages = len(image_page_list)
|
620 |
+
for item in image_page_list:
|
621 |
+
job_queue.append({
|
622 |
+
"original_filename": os.path.basename(original_file_obj.name),
|
623 |
+
"page_number": item["page"],
|
624 |
+
"total_pages": total_pages,
|
625 |
+
"image_path": item["path"]
|
626 |
+
})
|
627 |
+
except Exception as e:
|
628 |
+
job_queue.append({"original_filename": os.path.basename(original_file_obj.name), "error": str(e)})
|
629 |
+
|
630 |
+
for job in job_queue:
|
631 |
+
if "error" in job:
|
632 |
+
processed_files_data.append({
|
633 |
+
"doc_id": str(uuid.uuid4()),
|
634 |
+
"original_filename": job["original_filename"],
|
635 |
+
"page_number": 1,
|
636 |
+
"status": f"Error: {job['error']}"
|
637 |
+
})
|
638 |
+
else:
|
639 |
+
processed_files_data.append({
|
640 |
+
"doc_id": str(uuid.uuid4()),
|
641 |
+
"original_filename": job["original_filename"],
|
642 |
+
"page_number": job["page_number"],
|
643 |
+
"total_pages": job["total_pages"],
|
644 |
+
"filepath": job["image_path"],
|
645 |
+
"status": "Queued",
|
646 |
+
"ocr_json": None,
|
647 |
+
"entities": None,
|
648 |
+
"face_analysis_result": None,
|
649 |
+
"facial_embeddings": None,
|
650 |
+
"assigned_person_key": None,
|
651 |
+
"linking_method": ""
|
652 |
+
})
|
653 |
+
|
654 |
+
initial_df_data = format_dataframe_data(processed_files_data)
|
655 |
+
initial_persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
656 |
+
yield (initial_df_data, initial_persons_md, "{}", f"Pre-processing complete. Analyzing {len(processed_files_data)} pages.")
|
657 |
+
|
658 |
+
# --- Stage 2: Analyze each page ---
|
659 |
+
current_ocr_json_display = "{}"
|
660 |
+
for i, file_data_item in enumerate(progress.tqdm(processed_files_data, desc="Analyzing Pages")):
|
661 |
+
if file_data_item["status"].startswith("Error"):
|
662 |
+
continue
|
663 |
+
|
664 |
+
current_filename = f"{file_data_item['original_filename']} (p.{file_data_item['page_number']})"
|
665 |
+
linking_method_log_for_doc = []
|
666 |
+
|
667 |
+
# 1. OCR
|
668 |
+
file_data_item["status"] = "OCR..."
|
669 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
670 |
+
df_data = format_dataframe_data(processed_files_data)
|
671 |
+
yield (df_data, persons_md, current_ocr_json_display, f"OCR: {current_filename}")
|
672 |
+
|
673 |
+
ocr_result = call_openrouter_ocr(file_data_item["filepath"])
|
674 |
+
file_data_item["ocr_json"] = ocr_result
|
675 |
+
current_ocr_json_display = json.dumps(ocr_result, indent=2)
|
676 |
+
|
677 |
+
if "error" in ocr_result:
|
678 |
+
file_data_item["status"] = f"OCR Err: {str(ocr_result['error'])[:30]}.."
|
679 |
+
linking_method_log_for_doc.append("OCR Failed.")
|
680 |
+
file_data_item["linking_method"] = " ".join(linking_method_log_for_doc)
|
681 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
682 |
+
df_data = format_dataframe_data(processed_files_data)
|
683 |
+
yield (df_data, persons_md, current_ocr_json_display, f"OCR Err: {current_filename}")
|
684 |
+
continue
|
685 |
+
|
686 |
+
# 2. Entity Extraction
|
687 |
+
file_data_item["status"] = "OCR OK. Entities..."
|
688 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
689 |
+
df_data = format_dataframe_data(processed_files_data)
|
690 |
+
yield (df_data, persons_md, current_ocr_json_display, f"Entities: {current_filename}")
|
691 |
+
entities = extract_entities_from_ocr(ocr_result)
|
692 |
+
file_data_item["entities"] = entities
|
693 |
+
|
694 |
+
# 3. Facial Feature Extraction
|
695 |
+
file_data_item["status"] = "Entities OK. Face..."
|
696 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
697 |
+
df_data = format_dataframe_data(processed_files_data)
|
698 |
+
yield (df_data, persons_md, current_ocr_json_display, f"Face Detect: {current_filename}")
|
699 |
+
doc_type_lower = (entities.get("doc_type") or "").lower()
|
700 |
+
|
701 |
+
if DEEPFACE_AVAILABLE and (
|
702 |
+
"photo" in doc_type_lower or
|
703 |
+
"passport" in doc_type_lower or
|
704 |
+
"id" in doc_type_lower or
|
705 |
+
"selfie" in doc_type_lower or
|
706 |
+
not doc_type_lower
|
707 |
+
):
|
708 |
+
face_result = get_facial_embeddings_with_deepface(file_data_item["filepath"])
|
709 |
+
file_data_item["face_analysis_result"] = face_result
|
710 |
+
if "embeddings" in face_result and face_result["embeddings"]:
|
711 |
+
file_data_item["facial_embeddings"] = face_result["embeddings"]
|
712 |
+
linking_method_log_for_doc.append(f"{face_result.get('count', 0)} face(s).")
|
713 |
+
elif "error" in face_result:
|
714 |
+
linking_method_log_for_doc.append("Face Ext. Error.")
|
715 |
+
else:
|
716 |
+
linking_method_log_for_doc.append("No face det.")
|
717 |
+
else:
|
718 |
+
linking_method_log_for_doc.append("Face Ext. Skipped.")
|
719 |
+
|
720 |
+
file_data_item["status"] = "Face Done. Classify..."
|
721 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
722 |
+
df_data = format_dataframe_data(processed_files_data)
|
723 |
+
yield (df_data, persons_md, current_ocr_json_display, f"Classifying: {current_filename}")
|
724 |
+
|
725 |
+
# 4. Person Classification
|
726 |
+
person_key = get_person_id_and_update_profiles(
|
727 |
+
file_data_item["doc_id"],
|
728 |
+
entities,
|
729 |
+
file_data_item.get("facial_embeddings"),
|
730 |
+
person_profiles,
|
731 |
+
linking_method_log_for_doc
|
732 |
+
)
|
733 |
+
file_data_item["assigned_person_key"] = person_key
|
734 |
+
file_data_item["status"] = "Classified"
|
735 |
+
file_data_item["linking_method"] = " ".join(linking_method_log_for_doc)
|
736 |
+
|
737 |
+
persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
738 |
+
df_data = format_dataframe_data(processed_files_data)
|
739 |
+
yield (df_data, persons_md, current_ocr_json_display, f"Done: {current_filename} -> {person_key}")
|
740 |
+
|
741 |
+
# Final Result
|
742 |
+
final_df_data = format_dataframe_data(processed_files_data)
|
743 |
+
final_persons_md = format_persons_markdown(person_profiles, processed_files_data)
|
744 |
+
yield (final_df_data, final_persons_md, "{}", f"All {len(processed_files_data)} pages analyzed.")
|
745 |
+
|
746 |
+
# Cleanup
|
747 |
+
try:
|
748 |
+
shutil.rmtree(temp_dir)
|
749 |
+
print(f"Cleaned up temporary directory: {temp_dir}")
|
750 |
+
except Exception as e:
|
751 |
+
print(f"Error cleaning up temporary directory {temp_dir}: {e}")
|
752 |
|
753 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
754 |
gr.Markdown("# 📄 Intelligent Document Processor & Classifier v2 (with Face ID)")
|
|
|
762 |
with gr.Row():
|
763 |
with gr.Column(scale=1):
|
764 |
files_input = gr.Files(label="Upload Document Images (Bulk)", file_count="multiple", type="filepath")
|
765 |
+
process_button = gr.Button("Process Uploaded Documents", variant="primary")
|
766 |
with gr.Column(scale=2):
|
767 |
overall_status_textbox = gr.Textbox(label="Current Task & Overall Progress", interactive=False, lines=2)
|
768 |
|
|
|
772 |
document_status_df = gr.Dataframe(
|
773 |
headers=dataframe_headers, datatype=["str"] * len(dataframe_headers),
|
774 |
label="Individual Document Status & Extracted Entities",
|
775 |
+
row_count=(1, "dynamic"), col_count=(len(dataframe_headers), "fixed"), wrap=True
|
776 |
)
|
777 |
|
778 |
with gr.Accordion("Selected Document Full OCR JSON", open=False):
|
requirements.txt
CHANGED
@@ -2,6 +2,10 @@ gradio>=4.0.0
|
|
2 |
requests>=2.25.0
|
3 |
Pillow>=9.0.0
|
4 |
deepface>=0.0.79
|
5 |
-
tensorflow
|
6 |
opencv-python-headless>=4.5.0
|
7 |
-
retina-face
|
|
|
|
|
|
|
|
|
|
2 |
requests>=2.25.0
|
3 |
Pillow>=9.0.0
|
4 |
deepface>=0.0.79
|
5 |
+
tensorflow>=2.10.0 # Or tensorflow-cpu if GPU is not available/needed
|
6 |
opencv-python-headless>=4.5.0
|
7 |
+
# retina-face Pypi package for the detector if deepface doesn't pull it correctly
|
8 |
+
retina-face>=0.0.12
|
9 |
+
tf-keras
|
10 |
+
PyMuPDF
|
11 |
+
python-docx
|