sort subtopic by num
Browse files- __pycache__/mineru_single.cpython-310.pyc +0 -0
- topic_extr.py +118 -0
- topic_extraction.log +242 -0
__pycache__/mineru_single.cpython-310.pyc
CHANGED
Binary files a/__pycache__/mineru_single.cpython-310.pyc and b/__pycache__/mineru_single.cpython-310.pyc differ
|
|
topic_extr.py
CHANGED
@@ -782,6 +782,119 @@ class MineruNoTextProcessor:
|
|
782 |
except Exception as e:
|
783 |
logger.error(f"Error during GPU cleanup: {e}")
|
784 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
785 |
def process(self, pdf_path: str) -> Dict[str, Any]:
|
786 |
logger.info(f"Processing PDF: {pdf_path}")
|
787 |
try:
|
@@ -869,7 +982,12 @@ class MineruNoTextProcessor:
|
|
869 |
final_markdown = writer.post_process(md_prefix, md_content)
|
870 |
|
871 |
subtopic_list = list(writer.extracted_subtopics.values())
|
|
|
872 |
|
|
|
|
|
|
|
|
|
873 |
out_path = os.path.join(self.output_folder, "final_subtopics.json")
|
874 |
with open(out_path, "w", encoding="utf-8") as f:
|
875 |
json.dump(subtopic_list, f, indent=2)
|
|
|
782 |
except Exception as e:
|
783 |
logger.error(f"Error during GPU cleanup: {e}")
|
784 |
|
785 |
+
def unify_topic_name(raw_title: str, children_subtopics: list) -> str:
|
786 |
+
"""
|
787 |
+
Produce a cleaned-up topic name, removing any trailing '... continued'
|
788 |
+
and fixing partial or empty titles if it’s obvious from the subtopic numbering.
|
789 |
+
E.g. 'gonometry' with children '5.1', '5.2' → '5 Trigonometry'
|
790 |
+
"""
|
791 |
+
title = raw_title.strip()
|
792 |
+
|
793 |
+
# Remove trailing " continued"
|
794 |
+
# E.g. "2 Algebra and functions continued" -> "2 Algebra and functions"
|
795 |
+
title = re.sub(r"\s+continued\s*$", "", title, flags=re.IGNORECASE)
|
796 |
+
|
797 |
+
# If the entire title is missing or obviously broken (like "gonometry"),
|
798 |
+
# guess a fix from the subtopics if they share a leading integer.
|
799 |
+
# e.g. if subtopics start with "5." => rename to "5 Trigonometry".
|
800 |
+
# You can add more sophisticated logic as needed.
|
801 |
+
if not title or title.lower().strip() in {"gonometry"}:
|
802 |
+
# Try to deduce from subtopic numbering
|
803 |
+
# Example: if children are "5.1", "5.2", that suggests a "5 Trigonometry"
|
804 |
+
all_subs = [child["title"] for child in children_subtopics]
|
805 |
+
# We'll parse the integer part from e.g. "5.1", "5.2"
|
806 |
+
# and guess "5 Trigonometry" if they're all "5.xxx".
|
807 |
+
if all_subs:
|
808 |
+
# Grab the first subtopic
|
809 |
+
first_sub = all_subs[0].strip()
|
810 |
+
m = re.match(r"^(\d+)\.", first_sub)
|
811 |
+
if m:
|
812 |
+
parent_num = m.group(1)
|
813 |
+
if parent_num == "5":
|
814 |
+
title = "5 Trigonometry"
|
815 |
+
elif parent_num == "2":
|
816 |
+
title = "2 Algebra and functions"
|
817 |
+
elif parent_num == "3":
|
818 |
+
title = "3 Coordinate geometry in the (x, y) plane"
|
819 |
+
elif parent_num == "4":
|
820 |
+
title = "4 Statistical distributions"
|
821 |
+
# etc., adapt to your needs
|
822 |
+
# or leave as e.g. f"{parent_num} ???" if you cannot guess.
|
823 |
+
|
824 |
+
return title
|
825 |
+
|
826 |
+
|
827 |
+
def merge_topics(subtopic_list: list) -> list:
|
828 |
+
"""
|
829 |
+
1. Cleans up each topic's title (remove " continued", fix partial titles).
|
830 |
+
2. Merges subtopics under the same cleaned-up parent name.
|
831 |
+
3. Sorts final output in ascending numeric order of the parent's leading number.
|
832 |
+
4. Sorts each parent's children in ascending numeric subtopic order.
|
833 |
+
"""
|
834 |
+
# Dictionary keyed by *cleaned* parent title => {"title": "...", "contents": [...], "children": [...]}
|
835 |
+
merged = {}
|
836 |
+
|
837 |
+
for topic_obj in subtopic_list:
|
838 |
+
raw_title = topic_obj.get("title", "")
|
839 |
+
children = topic_obj.get("children", [])
|
840 |
+
contents = topic_obj.get("contents", [])
|
841 |
+
|
842 |
+
# Clean up the parent's title
|
843 |
+
new_title = unify_topic_name(raw_title, children)
|
844 |
+
|
845 |
+
# If we have already seen this (cleaned) title, merge
|
846 |
+
if new_title not in merged:
|
847 |
+
merged[new_title] = {
|
848 |
+
"title": new_title,
|
849 |
+
"contents": list(contents), # copy
|
850 |
+
"children": list(children),
|
851 |
+
}
|
852 |
+
else:
|
853 |
+
# Merge contents and children
|
854 |
+
merged[new_title]["contents"].extend(contents)
|
855 |
+
merged[new_title]["children"].extend(children)
|
856 |
+
|
857 |
+
# Next, for each parent's children, we might want to remove duplicates
|
858 |
+
# or unify them more. Here we simply unify if they have the same "title".
|
859 |
+
# If you have no duplicates, you can skip this loop.
|
860 |
+
for par_title, par_info in merged.items():
|
861 |
+
# Turn child list into map for merging
|
862 |
+
child_map = {}
|
863 |
+
for ch in par_info["children"]:
|
864 |
+
ctitle = ch.get("title", "").strip()
|
865 |
+
if ctitle not in child_map:
|
866 |
+
child_map[ctitle] = ch
|
867 |
+
else:
|
868 |
+
# Merge the "contents" and "children" if needed
|
869 |
+
child_map[ctitle]["contents"].extend(ch.get("contents", []))
|
870 |
+
child_map[ctitle]["children"].extend(ch.get("children", []))
|
871 |
+
# Overwrite the parent's children list with the merged versions
|
872 |
+
par_info["children"] = list(child_map.values())
|
873 |
+
|
874 |
+
# Sort the top-level topics by leading integer (e.g. "2 Algebra" < "5 Trigonometry")
|
875 |
+
# We'll parse the first integer from the parent's title, or push them last if no integer found.
|
876 |
+
def parse_parent_num(t):
|
877 |
+
match = re.match(r"^(\d+)", t)
|
878 |
+
return int(match.group(1)) if match else 9999
|
879 |
+
|
880 |
+
# Build the final list
|
881 |
+
final_list = list(merged.values())
|
882 |
+
final_list.sort(key=lambda x: parse_parent_num(x["title"]))
|
883 |
+
|
884 |
+
# Sort each parent's children by their numeric portion. E.g. "2.1" < "2.2" < "3.1"
|
885 |
+
def parse_subtopic_num(subtitle):
|
886 |
+
# "2.11" => (2, 11), "10.5" => (10, 5)
|
887 |
+
# or just parse all groups of digits
|
888 |
+
digits = re.findall(r"\d+", subtitle)
|
889 |
+
if not digits:
|
890 |
+
return (9999,) # if no digits, push to end
|
891 |
+
return tuple(int(d) for d in digits)
|
892 |
+
|
893 |
+
for par_info in final_list:
|
894 |
+
par_info["children"].sort(key=lambda ch: parse_subtopic_num(ch["title"]))
|
895 |
+
|
896 |
+
return final_list
|
897 |
+
|
898 |
def process(self, pdf_path: str) -> Dict[str, Any]:
|
899 |
logger.info(f"Processing PDF: {pdf_path}")
|
900 |
try:
|
|
|
982 |
final_markdown = writer.post_process(md_prefix, md_content)
|
983 |
|
984 |
subtopic_list = list(writer.extracted_subtopics.values())
|
985 |
+
subtopic_list = merge_topics(subtopic_list)
|
986 |
|
987 |
+
# out_path = os.path.join(self.output_folder, "final_subtopics.json")
|
988 |
+
# with open(out_path, "w", encoding="utf-8") as f:
|
989 |
+
# json.dump(subtopic_list, f, indent=2)
|
990 |
+
# logger.info(f"Final subtopics JSON saved locally at {out_path}")
|
991 |
out_path = os.path.join(self.output_folder, "final_subtopics.json")
|
992 |
with open(out_path, "w", encoding="utf-8") as f:
|
993 |
json.dump(subtopic_list, f, indent=2)
|
topic_extraction.log
CHANGED
@@ -5316,3 +5316,245 @@ and series'. Using page 7.
|
|
5316 |
2025-03-03 17:32:51,859 [INFO] __main__ - Processing table image => img_28.jpg, columns=two
|
5317 |
2025-03-03 17:32:55,099 [INFO] __main__ - GPU memory cleaned up.
|
5318 |
2025-03-03 17:32:55,099 [ERROR] __main__ - Processing failed: 'LocalImageWriter' object has no attribute 'extracted_subtopics'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5316 |
2025-03-03 17:32:51,859 [INFO] __main__ - Processing table image => img_28.jpg, columns=two
|
5317 |
2025-03-03 17:32:55,099 [INFO] __main__ - GPU memory cleaned up.
|
5318 |
2025-03-03 17:32:55,099 [ERROR] __main__ - Processing failed: 'LocalImageWriter' object has no attribute 'extracted_subtopics'
|
5319 |
+
2025-03-03 18:01:47,365 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
|
5320 |
+
2025-03-03 18:01:48,166 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
|
5321 |
+
2025-03-03 18:01:48,167 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
|
5322 |
+
2025-03-03 18:01:48,529 [INFO] __main__ - Computed global offset: 4
|
5323 |
+
2025-03-03 18:01:48,530 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
|
5324 |
+
2025-03-03 18:02:45,151 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg
|
5325 |
+
2025-03-03 18:02:47,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg
|
5326 |
+
2025-03-03 18:02:47,996 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg
|
5327 |
+
2025-03-03 18:02:48,658 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg
|
5328 |
+
2025-03-03 18:02:49,352 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg
|
5329 |
+
2025-03-03 18:02:49,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg
|
5330 |
+
2025-03-03 18:02:50,659 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg
|
5331 |
+
2025-03-03 18:02:51,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg
|
5332 |
+
2025-03-03 18:02:51,742 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg
|
5333 |
+
2025-03-03 18:02:52,344 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg
|
5334 |
+
2025-03-03 18:02:52,901 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg
|
5335 |
+
2025-03-03 18:02:53,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg
|
5336 |
+
2025-03-03 18:02:54,179 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg
|
5337 |
+
2025-03-03 18:02:54,858 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg
|
5338 |
+
2025-03-03 18:02:55,462 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg
|
5339 |
+
2025-03-03 18:02:56,140 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg
|
5340 |
+
2025-03-03 18:02:56,834 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg
|
5341 |
+
2025-03-03 18:02:57,186 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg
|
5342 |
+
2025-03-03 18:02:57,895 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg
|
5343 |
+
2025-03-03 18:02:58,699 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg
|
5344 |
+
2025-03-03 18:02:59,469 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg
|
5345 |
+
2025-03-03 18:03:00,063 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg
|
5346 |
+
2025-03-03 18:03:00,715 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg
|
5347 |
+
2025-03-03 18:03:01,305 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg
|
5348 |
+
2025-03-03 18:03:01,790 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg
|
5349 |
+
2025-03-03 18:03:02,427 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg
|
5350 |
+
2025-03-03 18:03:03,086 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg
|
5351 |
+
2025-03-03 18:03:03,840 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg
|
5352 |
+
2025-03-03 18:03:04,394 [INFO] __main__ - Classifying images to detect tables.
|
5353 |
+
2025-03-03 18:03:09,642 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three
|
5354 |
+
2025-03-03 18:03:13,344 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r0_c0.png
|
5355 |
+
2025-03-03 18:03:14,713 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r0_c1.png
|
5356 |
+
2025-03-03 18:03:16,386 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c0.png
|
5357 |
+
2025-03-03 18:03:18,238 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c1.png
|
5358 |
+
2025-03-03 18:03:19,729 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three
|
5359 |
+
2025-03-03 18:03:23,829 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r0_c0.png
|
5360 |
+
2025-03-03 18:03:25,255 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r0_c1.png
|
5361 |
+
2025-03-03 18:03:26,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r1_c0.png
|
5362 |
+
2025-03-03 18:03:28,211 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r1_c1.png
|
5363 |
+
2025-03-03 18:03:29,861 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r2_c0.png
|
5364 |
+
2025-03-03 18:03:31,766 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r3_c0.png
|
5365 |
+
2025-03-03 18:03:33,633 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_2.jpg_r4_c0.png
|
5366 |
+
2025-03-03 18:03:35,112 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three
|
5367 |
+
2025-03-03 18:03:38,486 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_3.jpg_r0_c0.png
|
5368 |
+
2025-03-03 18:03:39,547 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_3.jpg_r0_c1.png
|
5369 |
+
2025-03-03 18:03:40,727 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_3.jpg_r1_c0.png
|
5370 |
+
2025-03-03 18:03:42,551 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_3.jpg_r1_c1.png
|
5371 |
+
2025-03-03 18:03:43,977 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three
|
5372 |
+
2025-03-03 18:03:47,007 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_4.jpg_r0_c0.png
|
5373 |
+
2025-03-03 18:03:47,944 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_4.jpg_r0_c1.png
|
5374 |
+
2025-03-03 18:03:49,417 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_4.jpg_r1_c0.png
|
5375 |
+
2025-03-03 18:03:51,202 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_4.jpg_r1_c1.png
|
5376 |
+
2025-03-03 18:03:52,822 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three
|
5377 |
+
2025-03-03 18:03:57,664 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_5.jpg_r0_c0.png
|
5378 |
+
2025-03-03 18:03:59,070 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_5.jpg_r0_c1.png
|
5379 |
+
2025-03-03 18:04:00,494 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_5.jpg_r1_c0.png
|
5380 |
+
2025-03-03 18:04:02,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_5.jpg_r1_c1.png
|
5381 |
+
2025-03-03 18:04:04,099 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_5.jpg_r2_c0.png
|
5382 |
+
2025-03-03 18:04:05,512 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three
|
5383 |
+
2025-03-03 18:04:09,932 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r0_c0.png
|
5384 |
+
2025-03-03 18:04:11,364 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r0_c1.png
|
5385 |
+
2025-03-03 18:04:12,780 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r1_c0.png
|
5386 |
+
2025-03-03 18:04:14,521 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r1_c1.png
|
5387 |
+
2025-03-03 18:04:16,038 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r2_c0.png
|
5388 |
+
2025-03-03 18:04:17,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_6.jpg_r2_c1.png
|
5389 |
+
2025-03-03 18:04:19,184 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three
|
5390 |
+
2025-03-03 18:04:23,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_7.jpg_r0_c0.png
|
5391 |
+
2025-03-03 18:04:24,739 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_7.jpg_r0_c1.png
|
5392 |
+
2025-03-03 18:04:26,232 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_7.jpg_r1_c0.png
|
5393 |
+
2025-03-03 18:04:28,388 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_7.jpg_r1_c1.png
|
5394 |
+
2025-03-03 18:04:30,206 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_7.jpg_r2_c0.png
|
5395 |
+
2025-03-03 18:04:31,473 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three
|
5396 |
+
2025-03-03 18:04:34,576 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r0_c0.png
|
5397 |
+
2025-03-03 18:04:35,800 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r0_c1.png
|
5398 |
+
2025-03-03 18:04:37,238 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r0_c2.png
|
5399 |
+
2025-03-03 18:04:38,721 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r1_c0.png
|
5400 |
+
2025-03-03 18:04:40,069 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r1_c1.png
|
5401 |
+
2025-03-03 18:04:41,915 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r1_c2.png
|
5402 |
+
2025-03-03 18:04:43,317 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r2_c0.png
|
5403 |
+
2025-03-03 18:04:44,758 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r2_c1.png
|
5404 |
+
2025-03-03 18:04:46,354 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r3_c0.png
|
5405 |
+
2025-03-03 18:04:47,962 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r3_c1.png
|
5406 |
+
2025-03-03 18:04:49,441 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r4_c0.png
|
5407 |
+
2025-03-03 18:04:51,291 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_8.jpg_r4_c1.png
|
5408 |
+
2025-03-03 18:04:52,840 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=two
|
5409 |
+
2025-03-03 18:04:56,905 [WARNING] __main__ - Cell image not found: /tmp/tmp5hkh4jpv.jpg_rows/row_0/col_0.png
|
5410 |
+
2025-03-03 18:04:57,168 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_9.jpg_r0_c1.png
|
5411 |
+
2025-03-03 18:04:58,554 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_9.jpg_r1_c0.png
|
5412 |
+
2025-03-03 18:05:00,216 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_9.jpg_r1_c1.png
|
5413 |
+
2025-03-03 18:05:02,113 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_9.jpg_r2_c0.png
|
5414 |
+
2025-03-03 18:05:03,482 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three
|
5415 |
+
2025-03-03 18:05:07,505 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r0_c0.png
|
5416 |
+
2025-03-03 18:05:08,634 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r0_c1.png
|
5417 |
+
2025-03-03 18:05:09,820 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r1_c0.png
|
5418 |
+
2025-03-03 18:05:11,434 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r1_c1.png
|
5419 |
+
2025-03-03 18:05:13,181 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r2_c0.png
|
5420 |
+
2025-03-03 18:05:15,198 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r3_c0.png
|
5421 |
+
2025-03-03 18:05:16,840 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r4_c0.png
|
5422 |
+
2025-03-03 18:05:18,564 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_10.jpg_r5_c0.png
|
5423 |
+
2025-03-03 18:05:19,773 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two
|
5424 |
+
2025-03-03 18:05:23,555 [WARNING] __main__ - Cell image not found: /tmp/tmpmqfa7baf.jpg_rows/row_0/col_0.png
|
5425 |
+
2025-03-03 18:05:23,816 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r0_c1.png
|
5426 |
+
2025-03-03 18:05:25,422 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r1_c0.png
|
5427 |
+
2025-03-03 18:05:27,100 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r2_c0.png
|
5428 |
+
2025-03-03 18:05:28,749 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r3_c0.png
|
5429 |
+
2025-03-03 18:05:30,528 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r4_c0.png
|
5430 |
+
2025-03-03 18:05:32,582 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_11.jpg_r5_c0.png
|
5431 |
+
2025-03-03 18:05:34,027 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three
|
5432 |
+
2025-03-03 18:05:38,149 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r0_c0.png
|
5433 |
+
2025-03-03 18:05:39,165 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r0_c1.png
|
5434 |
+
2025-03-03 18:05:40,592 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r1_c0.png
|
5435 |
+
2025-03-03 18:05:42,353 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r1_c1.png
|
5436 |
+
2025-03-03 18:05:44,001 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r2_c0.png
|
5437 |
+
2025-03-03 18:05:45,790 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_12.jpg_r2_c1.png
|
5438 |
+
2025-03-03 18:05:47,303 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three
|
5439 |
+
2025-03-03 18:05:50,408 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r0_c0.png
|
5440 |
+
2025-03-03 18:05:51,470 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r0_c1.png
|
5441 |
+
2025-03-03 18:05:53,168 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r1_c0.png
|
5442 |
+
2025-03-03 18:05:54,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r1_c1.png
|
5443 |
+
2025-03-03 18:05:56,341 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r2_c0.png
|
5444 |
+
2025-03-03 18:05:58,223 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_13.jpg_r3_c0.png
|
5445 |
+
2025-03-03 18:05:59,584 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three
|
5446 |
+
2025-03-03 18:06:02,910 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r0_c0.png
|
5447 |
+
2025-03-03 18:06:03,811 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r0_c1.png
|
5448 |
+
2025-03-03 18:06:04,999 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r1_c0.png
|
5449 |
+
2025-03-03 18:06:06,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r1_c1.png
|
5450 |
+
2025-03-03 18:06:08,714 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r2_c0.png
|
5451 |
+
2025-03-03 18:06:10,540 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r3_c0.png
|
5452 |
+
2025-03-03 18:06:12,055 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r4_c0.png
|
5453 |
+
2025-03-03 18:06:13,546 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_14.jpg_r4_c1.png
|
5454 |
+
2025-03-03 18:06:14,846 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three
|
5455 |
+
2025-03-03 18:06:18,269 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r0_c0.png
|
5456 |
+
2025-03-03 18:06:19,578 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r0_c1.png
|
5457 |
+
2025-03-03 18:06:20,905 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r0_c2.png
|
5458 |
+
2025-03-03 18:06:22,274 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r1_c0.png
|
5459 |
+
2025-03-03 18:06:23,653 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r1_c1.png
|
5460 |
+
2025-03-03 18:06:25,318 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r1_c2.png
|
5461 |
+
2025-03-03 18:06:26,709 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r2_c0.png
|
5462 |
+
2025-03-03 18:06:28,080 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r2_c1.png
|
5463 |
+
2025-03-03 18:06:29,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r3_c0.png
|
5464 |
+
2025-03-03 18:06:31,739 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_15.jpg_r3_c1.png
|
5465 |
+
2025-03-03 18:06:33,031 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three
|
5466 |
+
2025-03-03 18:06:36,448 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r0_c0.png
|
5467 |
+
2025-03-03 18:06:37,446 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r0_c1.png
|
5468 |
+
2025-03-03 18:06:38,581 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r1_c0.png
|
5469 |
+
2025-03-03 18:06:40,126 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r1_c1.png
|
5470 |
+
2025-03-03 18:06:41,796 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r2_c0.png
|
5471 |
+
2025-03-03 18:06:43,330 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r3_c0.png
|
5472 |
+
2025-03-03 18:06:45,025 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r3_c1.png
|
5473 |
+
2025-03-03 18:06:46,832 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_16.jpg_r4_c0.png
|
5474 |
+
2025-03-03 18:06:48,096 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three
|
5475 |
+
2025-03-03 18:06:51,269 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r0_c0.png
|
5476 |
+
2025-03-03 18:06:52,315 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r0_c1.png
|
5477 |
+
2025-03-03 18:06:53,734 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r1_c0.png
|
5478 |
+
2025-03-03 18:06:55,297 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r1_c1.png
|
5479 |
+
2025-03-03 18:06:57,056 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r2_c0.png
|
5480 |
+
2025-03-03 18:06:58,396 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r3_c0.png
|
5481 |
+
2025-03-03 18:06:59,922 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r3_c1.png
|
5482 |
+
2025-03-03 18:07:01,718 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r4_c0.png
|
5483 |
+
2025-03-03 18:07:03,388 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_17.jpg_r5_c0.png
|
5484 |
+
2025-03-03 18:07:04,651 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three
|
5485 |
+
2025-03-03 18:07:05,752 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_18.jpg_r0_c0.png
|
5486 |
+
2025-03-03 18:07:06,841 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_18.jpg_r0_c1.png
|
5487 |
+
2025-03-03 18:07:08,263 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_18.jpg_r1_c0.png
|
5488 |
+
2025-03-03 18:07:09,611 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_18.jpg_r1_c1.png
|
5489 |
+
2025-03-03 18:07:11,064 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three
|
5490 |
+
2025-03-03 18:07:13,613 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r0_c0.png
|
5491 |
+
2025-03-03 18:07:14,973 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r0_c1.png
|
5492 |
+
2025-03-03 18:07:16,339 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r1_c0.png
|
5493 |
+
2025-03-03 18:07:18,493 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r1_c1.png
|
5494 |
+
2025-03-03 18:07:20,139 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r2_c0.png
|
5495 |
+
2025-03-03 18:07:21,807 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_19.jpg_r2_c1.png
|
5496 |
+
2025-03-03 18:07:23,091 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three
|
5497 |
+
2025-03-03 18:07:26,528 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_20.jpg_r0_c0.png
|
5498 |
+
2025-03-03 18:07:27,947 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_20.jpg_r0_c1.png
|
5499 |
+
2025-03-03 18:07:29,439 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_20.jpg_r1_c0.png
|
5500 |
+
2025-03-03 18:07:31,257 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_20.jpg_r1_c1.png
|
5501 |
+
2025-03-03 18:07:32,705 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three
|
5502 |
+
2025-03-03 18:07:35,869 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r0_c0.png
|
5503 |
+
2025-03-03 18:07:37,403 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r0_c1.png
|
5504 |
+
2025-03-03 18:07:38,804 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r1_c0.png
|
5505 |
+
2025-03-03 18:07:40,651 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r1_c1.png
|
5506 |
+
2025-03-03 18:07:42,292 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r2_c0.png
|
5507 |
+
2025-03-03 18:07:43,964 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_21.jpg_r2_c1.png
|
5508 |
+
2025-03-03 18:07:45,304 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three
|
5509 |
+
2025-03-03 18:07:48,679 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r0_c0.png
|
5510 |
+
2025-03-03 18:07:49,776 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r0_c1.png
|
5511 |
+
2025-03-03 18:07:51,108 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r1_c0.png
|
5512 |
+
2025-03-03 18:07:52,641 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r1_c1.png
|
5513 |
+
2025-03-03 18:07:54,136 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r2_c0.png
|
5514 |
+
2025-03-03 18:07:55,772 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_22.jpg_r2_c1.png
|
5515 |
+
2025-03-03 18:07:57,135 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three
|
5516 |
+
2025-03-03 18:08:00,271 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r0_c0.png
|
5517 |
+
2025-03-03 18:08:01,671 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r0_c1.png
|
5518 |
+
2025-03-03 18:08:03,176 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r1_c0.png
|
5519 |
+
2025-03-03 18:08:04,801 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r1_c1.png
|
5520 |
+
2025-03-03 18:08:06,459 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r2_c0.png
|
5521 |
+
2025-03-03 18:08:08,351 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_23.jpg_r2_c1.png
|
5522 |
+
2025-03-03 18:08:10,016 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three
|
5523 |
+
2025-03-03 18:08:13,320 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_24.jpg_r0_c0.png
|
5524 |
+
2025-03-03 18:08:14,451 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_24.jpg_r0_c1.png
|
5525 |
+
2025-03-03 18:08:15,533 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_24.jpg_r1_c0.png
|
5526 |
+
2025-03-03 18:08:17,196 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_24.jpg_r1_c1.png
|
5527 |
+
2025-03-03 18:08:19,105 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_24.jpg_r2_c0.png
|
5528 |
+
2025-03-03 18:08:20,442 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=three
|
5529 |
+
2025-03-03 18:08:23,613 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r0_c0.png
|
5530 |
+
2025-03-03 18:08:24,927 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r0_c1.png
|
5531 |
+
2025-03-03 18:08:26,271 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r0_c2.png
|
5532 |
+
2025-03-03 18:08:27,756 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r1_c0.png
|
5533 |
+
2025-03-03 18:08:29,059 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r1_c1.png
|
5534 |
+
2025-03-03 18:08:30,619 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r1_c2.png
|
5535 |
+
2025-03-03 18:08:32,028 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r2_c0.png
|
5536 |
+
2025-03-03 18:08:33,702 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r2_c1.png
|
5537 |
+
2025-03-03 18:08:35,546 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r3_c0.png
|
5538 |
+
2025-03-03 18:08:37,241 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_25.jpg_r3_c1.png
|
5539 |
+
2025-03-03 18:08:38,602 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three
|
5540 |
+
2025-03-03 18:08:41,789 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r0_c0.png
|
5541 |
+
2025-03-03 18:08:42,904 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r0_c1.png
|
5542 |
+
2025-03-03 18:08:44,299 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r1_c0.png
|
5543 |
+
2025-03-03 18:08:45,765 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r1_c1.png
|
5544 |
+
2025-03-03 18:08:47,625 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r2_c0.png
|
5545 |
+
2025-03-03 18:08:49,450 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_26.jpg_r3_c0.png
|
5546 |
+
2025-03-03 18:08:50,706 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three
|
5547 |
+
2025-03-03 18:08:53,864 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r0_c0.png
|
5548 |
+
2025-03-03 18:08:55,294 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r0_c1.png
|
5549 |
+
2025-03-03 18:08:56,673 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r1_c0.png
|
5550 |
+
2025-03-03 18:08:58,397 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r1_c1.png
|
5551 |
+
2025-03-03 18:09:00,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r2_c0.png
|
5552 |
+
2025-03-03 18:09:01,840 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r3_c0.png
|
5553 |
+
2025-03-03 18:09:03,256 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r4_c0.png
|
5554 |
+
2025-03-03 18:09:04,820 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_27.jpg_r4_c1.png
|
5555 |
+
2025-03-03 18:09:06,037 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two
|
5556 |
+
2025-03-03 18:09:09,419 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_28.jpg_r0_c0.png
|
5557 |
+
2025-03-03 18:09:11,243 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_28.jpg_r1_c0.png
|
5558 |
+
2025-03-03 18:09:13,257 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_28.jpg_r2_c0.png
|
5559 |
+
2025-03-03 18:09:15,022 [INFO] __main__ - GPU memory cleaned up.
|
5560 |
+
2025-03-03 18:09:15,023 [ERROR] __main__ - Processing failed: name 'merge_topics' is not defined
|