diff --git a/__pycache__/inference_svm_model.cpython-310.pyc b/__pycache__/inference_svm_model.cpython-310.pyc index c2e5686622214dbe9efb15513dcf4b60e76723b2..14e2dd754c6546390aa7860d2fa5873e83fdce47 100644 Binary files a/__pycache__/inference_svm_model.cpython-310.pyc and b/__pycache__/inference_svm_model.cpython-310.pyc differ diff --git a/__pycache__/mineru_single.cpython-310.pyc b/__pycache__/mineru_single.cpython-310.pyc index 9a3555afde2db516171afdd2fe1a37fac21b291f..78c49f6bbe26e4ff02a0c84dba9a7cb5aaeaf2ce 100644 Binary files a/__pycache__/mineru_single.cpython-310.pyc and b/__pycache__/mineru_single.cpython-310.pyc differ diff --git a/__pycache__/table_row_extraction.cpython-310.pyc b/__pycache__/table_row_extraction.cpython-310.pyc index 3904e2d34c23b3a686eab18c4f92159744a26607..405f53ca7557a0f2248828aefc588fec218033b9 100644 Binary files a/__pycache__/table_row_extraction.cpython-310.pyc and b/__pycache__/table_row_extraction.cpython-310.pyc differ diff --git a/__pycache__/worker.cpython-310.pyc b/__pycache__/worker.cpython-310.pyc index 623e7710ac50bda8f049693fb9fc5f266ce74a01..f03b0f89d0828fa758758d24473ba7e1c791e793 100644 Binary files a/__pycache__/worker.cpython-310.pyc and b/__pycache__/worker.cpython-310.pyc differ diff --git a/input_output/output/final_output.md b/input_output/output/final_output.md new file mode 100644 index 0000000000000000000000000000000000000000..e76aae6db80441944079f37c5eaa49e0a99acb55 --- /dev/null +++ b/input_output/output/final_output.md @@ -0,0 +1,170 @@ +![Row 0 Col 0](images/img_1.png_rows/row_0/col_0.png) +![Row 1 Col 0](images/img_1.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_1.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_1.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_1.png_rows/row_4/col_0.png) +![Row 0 Col 0](images/img_3.png_rows/row_0/col_0.png) +![Row 1 Col 0](images/img_3.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_3.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_3.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_3.png_rows/row_4/col_0.png) +![Row 5 Col 0](images/img_3.png_rows/row_5/col_0.png) +![Row 6 Col 0](images/img_3.png_rows/row_6/col_0.png) +![Row 0 Col 0](images/img_2.png_rows/row_0/col_0.png) +![Row 1 Col 0](images/img_2.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_2.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_2.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_2.png_rows/row_4/col_0.png) +![Row 0 Col 0](images/img_4.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_4.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_4.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_4.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_5.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_5.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_5.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_5.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_5.png_rows/row_3/col_0.png) +![Row 0 Col 0](images/img_6.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_6.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_6.png_rows/row_1/col_0.png) +![Row 0 Col 0](images/img_7.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_7.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_7.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_7.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_8.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_8.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_8.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_8.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_8.png_rows/row_2/col_0.png) +![Row 0 Col 0](images/img_9.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_9.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_9.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_9.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_10.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_10.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_10.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_10.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_10.png_rows/row_2/col_1.png) +![Row 0 Col 0](images/img_11.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_11.png_rows/row_0/col_1.png) +![Row 0 Col 2](images/img_11.png_rows/row_0/col_2.png) +![Row 1 Col 0](images/img_11.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_11.png_rows/row_1/col_1.png) +![Row 1 Col 2](images/img_11.png_rows/row_1/col_2.png) +![Row 2 Col 0](images/img_11.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_11.png_rows/row_2/col_1.png) +![Row 3 Col 0](images/img_11.png_rows/row_3/col_0.png) +![Row 3 Col 1](images/img_11.png_rows/row_3/col_1.png) +![Row 4 Col 0](images/img_11.png_rows/row_4/col_0.png) +![Row 4 Col 1](images/img_11.png_rows/row_4/col_1.png) +![Row 5 Col 0](images/img_11.png_rows/row_5/col_0.png) +![Row 5 Col 1](images/img_11.png_rows/row_5/col_1.png) +![Row 0 Col 0](images/img_12.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_12.png_rows/row_0/col_1.png) +![Row 0 Col 2](images/img_12.png_rows/row_0/col_2.png) +![Row 1 Col 0](images/img_12.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_12.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_12.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_12.png_rows/row_2/col_1.png) +![Row 3 Col 0](images/img_12.png_rows/row_3/col_0.png) +![Row 3 Col 1](images/img_12.png_rows/row_3/col_1.png) +![Row 0 Col 0](images/img_13.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_13.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_13.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_13.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_13.png_rows/row_3/col_0.png) +![Row 0 Col 0](images/img_14.png_rows/row_0/col_0.png) +![Row 1 Col 0](images/img_14.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_14.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_14.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_14.png_rows/row_4/col_0.png) +![Row 5 Col 0](images/img_14.png_rows/row_5/col_0.png) +![Row 0 Col 0](images/img_15.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_15.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_15.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_15.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_15.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_15.png_rows/row_2/col_1.png) +![Row 0 Col 0](images/img_16.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_16.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_16.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_16.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_16.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_16.png_rows/row_3/col_0.png) +![Row 0 Col 0](images/img_17.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_17.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_17.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_17.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_17.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_17.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_17.png_rows/row_4/col_0.png) +![Row 4 Col 1](images/img_17.png_rows/row_4/col_1.png) +![Row 5 Col 0](images/img_17.png_rows/row_5/col_0.png) +![Row 0 Col 0](images/img_18.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_18.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_18.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_18.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_18.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_18.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_18.png_rows/row_4/col_0.png) +![Row 0 Col 0](images/img_19.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_19.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_19.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_19.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_19.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_19.png_rows/row_3/col_0.png) +![Row 3 Col 1](images/img_19.png_rows/row_3/col_1.png) +![Row 4 Col 0](images/img_19.png_rows/row_4/col_0.png) +![Row 5 Col 0](images/img_19.png_rows/row_5/col_0.png) +![Row 0 Col 0](images/img_20.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_20.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_20.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_20.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_20.png_rows/row_2/col_1.png) +![Row 3 Col 0](images/img_20.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_20.png_rows/row_4/col_0.png) +![Row 5 Col 0](images/img_20.png_rows/row_5/col_0.png) +![Row 0 Col 0](images/img_21.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_21.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_21.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_21.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_22.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_22.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_22.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_22.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_22.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_22.png_rows/row_2/col_1.png) +![Row 0 Col 0](images/img_23.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_23.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_23.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_23.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_24.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_24.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_24.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_24.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_25.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_25.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_25.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_25.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_25.png_rows/row_2/col_0.png) +![Row 2 Col 1](images/img_25.png_rows/row_2/col_1.png) +![Row 3 Col 0](images/img_25.png_rows/row_3/col_0.png) +![Row 0 Col 0](images/img_26.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_26.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_26.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_26.png_rows/row_1/col_1.png) +![Row 0 Col 0](images/img_27.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_27.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_27.png_rows/row_1/col_0.png) +![Row 1 Col 1](images/img_27.png_rows/row_1/col_1.png) +![Row 2 Col 0](images/img_27.png_rows/row_2/col_0.png) +![Row 0 Col 0](images/img_28.png_rows/row_0/col_0.png) +![Row 1 Col 0](images/img_28.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_28.png_rows/row_2/col_0.png) +![Row 3 Col 0](images/img_28.png_rows/row_3/col_0.png) +![Row 4 Col 0](images/img_28.png_rows/row_4/col_0.png) +![Row 5 Col 0](images/img_28.png_rows/row_5/col_0.png) +![Row 0 Col 0](images/img_29.png_rows/row_0/col_0.png) +![Row 0 Col 1](images/img_29.png_rows/row_0/col_1.png) +![Row 1 Col 0](images/img_29.png_rows/row_1/col_0.png) +![Row 2 Col 0](images/img_29.png_rows/row_2/col_0.png) \ No newline at end of file diff --git a/input_output/output/images/img_1.png b/input_output/output/images/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d1136c6ec94dcede46ceebc1a50675767ca50e50 Binary files /dev/null and b/input_output/output/images/img_1.png differ diff --git a/input_output/output/images/img_1.png_rows/row_0/col_0.png b/input_output/output/images/img_1.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..55fdf0cc473851a61ad3d7fb4b1b2dafde67966b Binary files /dev/null and b/input_output/output/images/img_1.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_1.png_rows/row_1/col_0.png b/input_output/output/images/img_1.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c7bce6f517a14afff66dfa8df16d8c4f87eed9b0 Binary files /dev/null and b/input_output/output/images/img_1.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_1.png_rows/row_2/col_0.png b/input_output/output/images/img_1.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d347e9997a821a3cbe25d55da5298f5cb36bae32 Binary files /dev/null and b/input_output/output/images/img_1.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_1.png_rows/row_3/col_0.png b/input_output/output/images/img_1.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a7edd24b9ad65b8d133f9b8633031d13517de654 Binary files /dev/null and b/input_output/output/images/img_1.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_1.png_rows/row_4/col_0.png b/input_output/output/images/img_1.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..dda779a366f90248a32792d135a85e65cc932397 Binary files /dev/null and b/input_output/output/images/img_1.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_10.png b/input_output/output/images/img_10.png new file mode 100644 index 0000000000000000000000000000000000000000..62265fabd71cebd0cbbc3a95a72223af0dae9d3a Binary files /dev/null and b/input_output/output/images/img_10.png differ diff --git a/input_output/output/images/img_10.png_rows/row_0/col_0.png b/input_output/output/images/img_10.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..07e92d3d4896ce937f1e5782e9d06ab289bdbfaf Binary files /dev/null and b/input_output/output/images/img_10.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_10.png_rows/row_0/col_1.png b/input_output/output/images/img_10.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..5c0ed14732d76945b6d87027872c44d1294a55e5 Binary files /dev/null and b/input_output/output/images/img_10.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_10.png_rows/row_1/col_0.png b/input_output/output/images/img_10.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..16746135d02d76403d7ec85c31278cf4bcebb97b Binary files /dev/null and b/input_output/output/images/img_10.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_10.png_rows/row_2/col_0.png b/input_output/output/images/img_10.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..6e2ba1294e2f408ef63515082862e41e5da5df49 Binary files /dev/null and b/input_output/output/images/img_10.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_10.png_rows/row_2/col_1.png b/input_output/output/images/img_10.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9f0d9f538fac95baf72307da8cfb33717608b3b1 Binary files /dev/null and b/input_output/output/images/img_10.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_11.png b/input_output/output/images/img_11.png new file mode 100644 index 0000000000000000000000000000000000000000..c7f8f61c781d1af176af1a1d208c308730a77e63 Binary files /dev/null and b/input_output/output/images/img_11.png differ diff --git a/input_output/output/images/img_11.png_rows/row_0/col_0.png b/input_output/output/images/img_11.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5c1f835fc680133dfd618ebe2fec0ef420c698cf Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_0/col_1.png b/input_output/output/images/img_11.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0136828af8bd0f803fc1a91781b5888d2e3b9a24 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_11.png_rows/row_0/col_2.png b/input_output/output/images/img_11.png_rows/row_0/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..611fc15c42a338e6a9bb73953e97bf43d0553fde Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_0/col_2.png differ diff --git a/input_output/output/images/img_11.png_rows/row_1/col_0.png b/input_output/output/images/img_11.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..e7257506e4e6b7ab40352b12cd3477ae8452aec4 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_1/col_1.png b/input_output/output/images/img_11.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..f23ea460fb2789f513ef6f257e10dc8dc5d3a923 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_11.png_rows/row_1/col_2.png b/input_output/output/images/img_11.png_rows/row_1/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..9d761e2b90b2d9e1632a370c1ef7d0593da4b9a7 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_1/col_2.png differ diff --git a/input_output/output/images/img_11.png_rows/row_2/col_0.png b/input_output/output/images/img_11.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8190f8f5b9d13764960ca6f16a66c479fcec7953 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_2/col_1.png b/input_output/output/images/img_11.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..fe84fed74cb805b3dee88ae7032400c28e8f5b6e Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_11.png_rows/row_3/col_0.png b/input_output/output/images/img_11.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b9daf99a0617d8c7a3b671c6d8a5902ea88c0172 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_3/col_1.png b/input_output/output/images/img_11.png_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e96a186e6b1787040bbe924aaab20d4b9a0e8d6f Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_3/col_1.png differ diff --git a/input_output/output/images/img_11.png_rows/row_4/col_0.png b/input_output/output/images/img_11.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..125c91f2551c117f499b9ccacf3d654c23a2eebe Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_4/col_1.png b/input_output/output/images/img_11.png_rows/row_4/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d987462a61782d0e4f3bb4f43827ef2d8dd40e3e Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_4/col_1.png differ diff --git a/input_output/output/images/img_11.png_rows/row_5/col_0.png b/input_output/output/images/img_11.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..caea0b519da35482dede79485696e586ac481103 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_11.png_rows/row_5/col_1.png b/input_output/output/images/img_11.png_rows/row_5/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..83bf3f65e91b95ff1632222978c17640d1339f52 Binary files /dev/null and b/input_output/output/images/img_11.png_rows/row_5/col_1.png differ diff --git a/input_output/output/images/img_12.png b/input_output/output/images/img_12.png new file mode 100644 index 0000000000000000000000000000000000000000..0e0c5a6a6deaa2548ad97f53e5747d11d8bc21cc Binary files /dev/null and b/input_output/output/images/img_12.png differ diff --git a/input_output/output/images/img_12.png_rows/row_0/col_0.png b/input_output/output/images/img_12.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..75cf6af5bfc97f1e9cf7e78f77dc232929a2d822 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_12.png_rows/row_0/col_1.png b/input_output/output/images/img_12.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b6f32c9badda695415521c860d6d30465319772c Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_12.png_rows/row_0/col_2.png b/input_output/output/images/img_12.png_rows/row_0/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..47e497e30cb420488e259425f825e8a86a4be4a6 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_0/col_2.png differ diff --git a/input_output/output/images/img_12.png_rows/row_1/col_0.png b/input_output/output/images/img_12.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..18badb90924b08fb59c67b0bf6f89001d9de1c5e Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_12.png_rows/row_1/col_1.png b/input_output/output/images/img_12.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e7849c2219a018146ebe5fe38adc893260defea5 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_12.png_rows/row_2/col_0.png b/input_output/output/images/img_12.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3d1ad2fcc29169b5c70e6298750c0b1b83670c43 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_12.png_rows/row_2/col_1.png b/input_output/output/images/img_12.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bf0eae79a8b54916fab6e943ff26fb1e6e5db1e4 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_12.png_rows/row_3/col_0.png b/input_output/output/images/img_12.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0732583e28d02f28f3bb7cee11d3dae71fc31b15 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_12.png_rows/row_3/col_1.png b/input_output/output/images/img_12.png_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0366c7a6fe78f482f5adf418a2aea192c09a57f8 Binary files /dev/null and b/input_output/output/images/img_12.png_rows/row_3/col_1.png differ diff --git a/input_output/output/images/img_13.png b/input_output/output/images/img_13.png new file mode 100644 index 0000000000000000000000000000000000000000..477396d69ba38c7ed708c1385864cafd922d1827 Binary files /dev/null and b/input_output/output/images/img_13.png differ diff --git a/input_output/output/images/img_13.png_rows/row_0/col_0.png b/input_output/output/images/img_13.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..26537b046972529528899d31bc4400f298bd22b0 Binary files /dev/null and b/input_output/output/images/img_13.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_13.png_rows/row_0/col_1.png b/input_output/output/images/img_13.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..dc7a83f30102732b630e03d6a2e61e6ce321b994 Binary files /dev/null and b/input_output/output/images/img_13.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_13.png_rows/row_1/col_0.png b/input_output/output/images/img_13.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..cfdd9efe4bf99a6a83411e9b94b98eedf626ac54 Binary files /dev/null and b/input_output/output/images/img_13.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_13.png_rows/row_2/col_0.png b/input_output/output/images/img_13.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3b45bf1a45620aca8f27e8c7daf89ae919a38af4 Binary files /dev/null and b/input_output/output/images/img_13.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_13.png_rows/row_3/col_0.png b/input_output/output/images/img_13.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7b28fc9c8ae796c35442a9352552386006ed42f1 Binary files /dev/null and b/input_output/output/images/img_13.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_14.png b/input_output/output/images/img_14.png new file mode 100644 index 0000000000000000000000000000000000000000..8129a372a50354bbb0c31cfd35e60b0d5c5442f2 Binary files /dev/null and b/input_output/output/images/img_14.png differ diff --git a/input_output/output/images/img_14.png_rows/row_0/col_0.png b/input_output/output/images/img_14.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7f852785ff31389c4c143d9b38c33c92d584f17b Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_14.png_rows/row_1/col_0.png b/input_output/output/images/img_14.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5b9b0ea20a6fa3b878f3450634eab82d445f190d Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_14.png_rows/row_2/col_0.png b/input_output/output/images/img_14.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d7e08e40ddc5463301f55f3bf81f819276fb3e0a Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_14.png_rows/row_3/col_0.png b/input_output/output/images/img_14.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ac389b4f121b3ea01fcf29ad14cddf6d615f180b Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_14.png_rows/row_4/col_0.png b/input_output/output/images/img_14.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0af51308d6e73eb3ad78dad9afbc6c6dc5f9a4c8 Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_14.png_rows/row_5/col_0.png b/input_output/output/images/img_14.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..470b619e96352c26a400c0f5e41320904c77efba Binary files /dev/null and b/input_output/output/images/img_14.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_15.png b/input_output/output/images/img_15.png new file mode 100644 index 0000000000000000000000000000000000000000..cdff005c1726624012e65ef41a73583310c168b3 Binary files /dev/null and b/input_output/output/images/img_15.png differ diff --git a/input_output/output/images/img_15.png_rows/row_0/col_0.png b/input_output/output/images/img_15.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1775bde2170a565216b62b821cbe612ff8a5a9ab Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_15.png_rows/row_0/col_1.png b/input_output/output/images/img_15.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..682f7963b4e84196b7f449ece97f228b22e893cc Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_15.png_rows/row_1/col_0.png b/input_output/output/images/img_15.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4ca08524c8c80015e252be243d08d8c2dddf43e8 Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_15.png_rows/row_1/col_1.png b/input_output/output/images/img_15.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..ab3c69b0c0524d0a04c6f9cce3391400b41a0ca2 Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_15.png_rows/row_2/col_0.png b/input_output/output/images/img_15.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..25edc6ae617976a3b147b957f073f54fe3523f7e Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_15.png_rows/row_2/col_1.png b/input_output/output/images/img_15.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9cf1cd5c2fd92fb11e7024c4138e1a67d1d6cb5a Binary files /dev/null and b/input_output/output/images/img_15.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_16.png b/input_output/output/images/img_16.png new file mode 100644 index 0000000000000000000000000000000000000000..be5654992ea9eeb595a0b518041592ed94d8c1d8 Binary files /dev/null and b/input_output/output/images/img_16.png differ diff --git a/input_output/output/images/img_16.png_rows/row_0/col_0.png b/input_output/output/images/img_16.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_16.png_rows/row_0/col_1.png b/input_output/output/images/img_16.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a800de22d2e8583dd9f72a5de826d64cc6934548 Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_16.png_rows/row_1/col_0.png b/input_output/output/images/img_16.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..6995e9eda17f7344e635d8fa6a665553967e67bf Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_16.png_rows/row_1/col_1.png b/input_output/output/images/img_16.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..cb83b1971ce1cd6af17c8ebf6077966235aaf6d4 Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_16.png_rows/row_2/col_0.png b/input_output/output/images/img_16.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..10356ac1f58485a3c71facfd31aeab0abc7d3d02 Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_16.png_rows/row_3/col_0.png b/input_output/output/images/img_16.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0159a2d1a73906347ce9906b8c218736134e8f54 Binary files /dev/null and b/input_output/output/images/img_16.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_17.png b/input_output/output/images/img_17.png new file mode 100644 index 0000000000000000000000000000000000000000..3006672c401f342a74b9094002f0cb90deae2a65 Binary files /dev/null and b/input_output/output/images/img_17.png differ diff --git a/input_output/output/images/img_17.png_rows/row_0/col_0.png b/input_output/output/images/img_17.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..50c6e51ed7dd096db7325aff304f4f6ffd86107d Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_17.png_rows/row_0/col_1.png b/input_output/output/images/img_17.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1477e2ba7cb40c82f06451d6493eb2b81bbe7b95 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_17.png_rows/row_1/col_0.png b/input_output/output/images/img_17.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..08f9ab77eab81088823b924f88dc297cfb5b2d82 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_17.png_rows/row_1/col_1.png b/input_output/output/images/img_17.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0cbc6f9597361c588b12e3ec5b4756b6ecafbdf6 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_17.png_rows/row_2/col_0.png b/input_output/output/images/img_17.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3770dde72e1879a179f1f55a0b9ae8f78198281a Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_17.png_rows/row_3/col_0.png b/input_output/output/images/img_17.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2fc24d45b98c5135cfda0594a883c7abac23f046 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_17.png_rows/row_4/col_0.png b/input_output/output/images/img_17.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f1cf0db022ba7108229ce76e337bfd434c5f44f3 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_17.png_rows/row_4/col_1.png b/input_output/output/images/img_17.png_rows/row_4/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..442ded075c10bfc147f513cc8aa1c96457144f1e Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_4/col_1.png differ diff --git a/input_output/output/images/img_17.png_rows/row_5/col_0.png b/input_output/output/images/img_17.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bbfcc51c21e3daa2e2a994ab8b966f59488062c7 Binary files /dev/null and b/input_output/output/images/img_17.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_18.png b/input_output/output/images/img_18.png new file mode 100644 index 0000000000000000000000000000000000000000..2baa70803cef6e98985736e198a56d44a40626ab Binary files /dev/null and b/input_output/output/images/img_18.png differ diff --git a/input_output/output/images/img_18.png_rows/row_0/col_0.png b/input_output/output/images/img_18.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_18.png_rows/row_0/col_1.png b/input_output/output/images/img_18.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..c2f19c57d8268d7a3937fe6edf0ebc0b84f694ff Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_18.png_rows/row_1/col_0.png b/input_output/output/images/img_18.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1a54e55461bd2545391c43f8b3735aa2c1be8832 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_18.png_rows/row_1/col_1.png b/input_output/output/images/img_18.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..df85e02c599bc4c53b15a07172375f46c24032f5 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_18.png_rows/row_2/col_0.png b/input_output/output/images/img_18.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a80f09fd932d3373e8461e620c23a790ef2c8112 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_18.png_rows/row_3/col_0.png b/input_output/output/images/img_18.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..27e00f6d1e649901b041aedc3bfee9a1c125ff72 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_18.png_rows/row_4/col_0.png b/input_output/output/images/img_18.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..48b1b79cd6b31320ee8b8f542632fbe75f8ac691 Binary files /dev/null and b/input_output/output/images/img_18.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_19.png b/input_output/output/images/img_19.png new file mode 100644 index 0000000000000000000000000000000000000000..f21e9711639a5ae0acd518eb9ff3b0ad95e18ebf Binary files /dev/null and b/input_output/output/images/img_19.png differ diff --git a/input_output/output/images/img_19.png_rows/row_0/col_0.png b/input_output/output/images/img_19.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b9daca8820847089338fad518fa021684ef3c02f Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_19.png_rows/row_0/col_1.png b/input_output/output/images/img_19.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2cccb9e0fe5934acb673aba90920f2fbe2710d9a Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_19.png_rows/row_1/col_0.png b/input_output/output/images/img_19.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..90557039a5dfea53a0d22ec5f97ae23c4a1fa84f Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_19.png_rows/row_1/col_1.png b/input_output/output/images/img_19.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b6d323cae60d093d7782adcc31c18d356344e365 Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_19.png_rows/row_2/col_0.png b/input_output/output/images/img_19.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c4b9f4443e52ada4fe2833532df203aef61edca7 Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_19.png_rows/row_3/col_0.png b/input_output/output/images/img_19.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c7fbdd450463483bda559e2097f108894c415018 Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_19.png_rows/row_3/col_1.png b/input_output/output/images/img_19.png_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9ca527e500a7dbce6f6adcd383a408de8bd2aace Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_3/col_1.png differ diff --git a/input_output/output/images/img_19.png_rows/row_4/col_0.png b/input_output/output/images/img_19.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..90d84556cd5c8c97246fea7ad735e7ae2c606940 Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_19.png_rows/row_5/col_0.png b/input_output/output/images/img_19.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0a4acc3778c6bfe0320807ad11110b0fc4388cd2 Binary files /dev/null and b/input_output/output/images/img_19.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_2.png b/input_output/output/images/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..ba3ea2a7627df08a54824b945fe822cfe168400f Binary files /dev/null and b/input_output/output/images/img_2.png differ diff --git a/input_output/output/images/img_2.png_rows/row_0/col_0.png b/input_output/output/images/img_2.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..246566cdc91ac362cb222fd87ed467512bded6f7 Binary files /dev/null and b/input_output/output/images/img_2.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_2.png_rows/row_1/col_0.png b/input_output/output/images/img_2.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0f7164dd1bb9edd66c0d3f1bccbd9001c933a499 Binary files /dev/null and b/input_output/output/images/img_2.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_2.png_rows/row_2/col_0.png b/input_output/output/images/img_2.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b24b4f0a976890f34bf3088b33dc44e900bc51ba Binary files /dev/null and b/input_output/output/images/img_2.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_2.png_rows/row_3/col_0.png b/input_output/output/images/img_2.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d15bef47e48c9863e0f54b838230b72c31e332be Binary files /dev/null and b/input_output/output/images/img_2.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_2.png_rows/row_4/col_0.png b/input_output/output/images/img_2.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..17942bcec143e04deaa23541ad75016e61986238 Binary files /dev/null and b/input_output/output/images/img_2.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_20.png b/input_output/output/images/img_20.png new file mode 100644 index 0000000000000000000000000000000000000000..a90ac42ae9986fb54db8c0dac074c9a2c5af3573 Binary files /dev/null and b/input_output/output/images/img_20.png differ diff --git a/input_output/output/images/img_20.png_rows/row_0/col_0.png b/input_output/output/images/img_20.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..59185c8c6eaa964d669fe104752a3746c6c95cde Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_20.png_rows/row_0/col_1.png b/input_output/output/images/img_20.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..7de0f485e84753f996afefecdf2ac518811ec0cd Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_20.png_rows/row_1/col_0.png b/input_output/output/images/img_20.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2d5890555351629532c2812d263cbb6ed661813b Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_20.png_rows/row_2/col_0.png b/input_output/output/images/img_20.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..84ba6a923104f1cc50677b2808886f6738927db1 Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_20.png_rows/row_2/col_1.png b/input_output/output/images/img_20.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0e8a1180045b6e0415fdbb5b92d92af4826e0c3e Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_20.png_rows/row_3/col_0.png b/input_output/output/images/img_20.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..dcc96ae0ddb397a69ab9d91ee264598a3b58f358 Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_20.png_rows/row_4/col_0.png b/input_output/output/images/img_20.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..db30f63ca320da95f7ac276b79696d544cb09284 Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_20.png_rows/row_5/col_0.png b/input_output/output/images/img_20.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1c546a467901a44a8f74b604fab5ced98b312723 Binary files /dev/null and b/input_output/output/images/img_20.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_21.png b/input_output/output/images/img_21.png new file mode 100644 index 0000000000000000000000000000000000000000..28db5fa7ebb46819d5129d7afd1eac8f398c862f Binary files /dev/null and b/input_output/output/images/img_21.png differ diff --git a/input_output/output/images/img_21.png_rows/row_0/col_0.png b/input_output/output/images/img_21.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/input_output/output/images/img_21.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_21.png_rows/row_0/col_1.png b/input_output/output/images/img_21.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..45ae4a2e3228dfc8fdc2114137951e2065a42084 Binary files /dev/null and b/input_output/output/images/img_21.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_21.png_rows/row_1/col_0.png b/input_output/output/images/img_21.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..dd31a8c27a82da3de67c83c61149a689513c58e1 Binary files /dev/null and b/input_output/output/images/img_21.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_21.png_rows/row_1/col_1.png b/input_output/output/images/img_21.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..3fab4c63fe85d2a5583f9031ade0c198d4a8c7d9 Binary files /dev/null and b/input_output/output/images/img_21.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_22.png b/input_output/output/images/img_22.png new file mode 100644 index 0000000000000000000000000000000000000000..9c631015fa075ded8a2d91eebb3fb6afd1c69836 Binary files /dev/null and b/input_output/output/images/img_22.png differ diff --git a/input_output/output/images/img_22.png_rows/row_0/col_0.png b/input_output/output/images/img_22.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..542e040e2ab5230775c46b07faaf56a875c2c71d Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_22.png_rows/row_0/col_1.png b/input_output/output/images/img_22.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bae682b04e93a5bf3289a55e64227d252b3f3da2 Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_22.png_rows/row_1/col_0.png b/input_output/output/images/img_22.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..31fe1e1b68010cd2c40d910196866d8632d0c781 Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_22.png_rows/row_1/col_1.png b/input_output/output/images/img_22.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..564af1f7ec0b600300da698270ca7cc5a3891598 Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_22.png_rows/row_2/col_0.png b/input_output/output/images/img_22.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4a388a3a0663a27fffe8086e97dad12b23f2152e Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_22.png_rows/row_2/col_1.png b/input_output/output/images/img_22.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1f77be6f3aa1f5c738808cd3031ab759c6d986ff Binary files /dev/null and b/input_output/output/images/img_22.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_23.png b/input_output/output/images/img_23.png new file mode 100644 index 0000000000000000000000000000000000000000..6af117b59207c1ed6f5f47828e097dcf42aed539 Binary files /dev/null and b/input_output/output/images/img_23.png differ diff --git a/input_output/output/images/img_23.png_rows/row_0/col_0.png b/input_output/output/images/img_23.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..80b1020e24d835450c76f556cfac8032f5688fa5 Binary files /dev/null and b/input_output/output/images/img_23.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_23.png_rows/row_0/col_1.png b/input_output/output/images/img_23.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..11d064d5cf5186d32d2e072418b8549a7c935129 Binary files /dev/null and b/input_output/output/images/img_23.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_23.png_rows/row_1/col_0.png b/input_output/output/images/img_23.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..503e017e826056707f174e42a105c2bc3492ad12 Binary files /dev/null and b/input_output/output/images/img_23.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_23.png_rows/row_1/col_1.png b/input_output/output/images/img_23.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9d51a48a5e944a79bce3f6f4114cc963b9f44904 Binary files /dev/null and b/input_output/output/images/img_23.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_24.png b/input_output/output/images/img_24.png new file mode 100644 index 0000000000000000000000000000000000000000..ac4c9c02344ce09dc7d290320c642f0e341238d3 Binary files /dev/null and b/input_output/output/images/img_24.png differ diff --git a/input_output/output/images/img_24.png_rows/row_0/col_0.png b/input_output/output/images/img_24.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9a2a7a39fdfffbead06604019a82e4c00f89d3fc Binary files /dev/null and b/input_output/output/images/img_24.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_24.png_rows/row_0/col_1.png b/input_output/output/images/img_24.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d29815729d9c055d508ec5fae87ea203fc58fba2 Binary files /dev/null and b/input_output/output/images/img_24.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_24.png_rows/row_1/col_0.png b/input_output/output/images/img_24.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..46c67ec9ec13925c0dc8587830a0e81223e01f3a Binary files /dev/null and b/input_output/output/images/img_24.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_24.png_rows/row_1/col_1.png b/input_output/output/images/img_24.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9ca59f8e9962e17473aa785d8fd8aee834d53dcb Binary files /dev/null and b/input_output/output/images/img_24.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_25.png b/input_output/output/images/img_25.png new file mode 100644 index 0000000000000000000000000000000000000000..3250e2fc874639f030b04d017a99fce4a792cc41 Binary files /dev/null and b/input_output/output/images/img_25.png differ diff --git a/input_output/output/images/img_25.png_rows/row_0/col_0.png b/input_output/output/images/img_25.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..25fc81dd2c0f04fa3c4aaa5026096df57fd42d9e Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_25.png_rows/row_0/col_1.png b/input_output/output/images/img_25.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e10fda6ae7f5c12a7f10e035e8c323ba5f8ba24d Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_25.png_rows/row_1/col_0.png b/input_output/output/images/img_25.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fe767c8364896e8467ca47cc9bed22a5b85816ab Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_25.png_rows/row_1/col_1.png b/input_output/output/images/img_25.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..6f0e6841a49a32f309bf98ef1002aca3858c5278 Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_25.png_rows/row_2/col_0.png b/input_output/output/images/img_25.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..77c0a6073ae3b39dce0f18eed86c744f2ce32afc Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_25.png_rows/row_2/col_1.png b/input_output/output/images/img_25.png_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..33b9590288cf74040ddcf34944b7f182af70fbda Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_2/col_1.png differ diff --git a/input_output/output/images/img_25.png_rows/row_3/col_0.png b/input_output/output/images/img_25.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4bab81fc617a554e49a7a8e6d87ecca9d527b819 Binary files /dev/null and b/input_output/output/images/img_25.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_26.png b/input_output/output/images/img_26.png new file mode 100644 index 0000000000000000000000000000000000000000..bec7bb6d80922f098aafaff1b72d4f798d1a44a2 Binary files /dev/null and b/input_output/output/images/img_26.png differ diff --git a/input_output/output/images/img_26.png_rows/row_0/col_0.png b/input_output/output/images/img_26.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..925b06a520ceea02b32f1dcb0dbcdadf83495b6a Binary files /dev/null and b/input_output/output/images/img_26.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_26.png_rows/row_0/col_1.png b/input_output/output/images/img_26.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..6b92bd4090fc43f249d13bf2f4b4e0a370706e78 Binary files /dev/null and b/input_output/output/images/img_26.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_26.png_rows/row_1/col_0.png b/input_output/output/images/img_26.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f21ea39e738ab6fd67457079d74a011ada749e75 Binary files /dev/null and b/input_output/output/images/img_26.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_26.png_rows/row_1/col_1.png b/input_output/output/images/img_26.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0cc7394cbc111c67bd6f225f5ee70475ab75e5 Binary files /dev/null and b/input_output/output/images/img_26.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_27.png b/input_output/output/images/img_27.png new file mode 100644 index 0000000000000000000000000000000000000000..494f5450af1d6b9b56de5860ef5ef883113efa83 Binary files /dev/null and b/input_output/output/images/img_27.png differ diff --git a/input_output/output/images/img_27.png_rows/row_0/col_0.png b/input_output/output/images/img_27.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5f189e59f1a57dce17a6dc7ef25de166a0666d87 Binary files /dev/null and b/input_output/output/images/img_27.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_27.png_rows/row_0/col_1.png b/input_output/output/images/img_27.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..22144130c32593a18ef7452ac5062f66fc413804 Binary files /dev/null and b/input_output/output/images/img_27.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_27.png_rows/row_1/col_0.png b/input_output/output/images/img_27.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..35e0c6dfcc1ec79b084b444292b7d2d5b7df556a Binary files /dev/null and b/input_output/output/images/img_27.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_27.png_rows/row_1/col_1.png b/input_output/output/images/img_27.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..fa72c88f927fb132fae1015d0e416ca9abae7b7e Binary files /dev/null and b/input_output/output/images/img_27.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_27.png_rows/row_2/col_0.png b/input_output/output/images/img_27.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ce8c285da83830c4f22991c7260b6fd2ffd9a66f Binary files /dev/null and b/input_output/output/images/img_27.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_28.png b/input_output/output/images/img_28.png new file mode 100644 index 0000000000000000000000000000000000000000..f0a6f78cc3ca75cdc639d805c7380effb4b91fd6 Binary files /dev/null and b/input_output/output/images/img_28.png differ diff --git a/input_output/output/images/img_28.png_rows/row_0/col_0.png b/input_output/output/images/img_28.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..64c8a51e88652511f483c13578c25f2c4d7e6f73 Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_28.png_rows/row_1/col_0.png b/input_output/output/images/img_28.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..68c86122e275e53ce0c3984255f015af455acc7d Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_28.png_rows/row_2/col_0.png b/input_output/output/images/img_28.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7b44a69aba898c2ed707ea46681c859462647901 Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_28.png_rows/row_3/col_0.png b/input_output/output/images/img_28.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..80372c6b3273ed5f6390e0b649813040ac9e65b3 Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_28.png_rows/row_4/col_0.png b/input_output/output/images/img_28.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bbf7aec7eaacca196309a84080c967f836a348d4 Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_28.png_rows/row_5/col_0.png b/input_output/output/images/img_28.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bc4adfb4e07d51167ee134352ec4c2edfc559ce7 Binary files /dev/null and b/input_output/output/images/img_28.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_29.png b/input_output/output/images/img_29.png new file mode 100644 index 0000000000000000000000000000000000000000..4dcfe87d0103452fe0fcad0df9316045e845c58f Binary files /dev/null and b/input_output/output/images/img_29.png differ diff --git a/input_output/output/images/img_29.png_rows/row_0/col_0.png b/input_output/output/images/img_29.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..61264796bf92d3cc3aacd3cf4160fa2870ba4f84 Binary files /dev/null and b/input_output/output/images/img_29.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_29.png_rows/row_0/col_1.png b/input_output/output/images/img_29.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1450a461086e0f97c24d07f88374732e5b453e7a Binary files /dev/null and b/input_output/output/images/img_29.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_29.png_rows/row_1/col_0.png b/input_output/output/images/img_29.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d7d31e4235e356d27c16c115bc97937b7fc62cc5 Binary files /dev/null and b/input_output/output/images/img_29.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_29.png_rows/row_2/col_0.png b/input_output/output/images/img_29.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..e4fb8c58639e05fee1d8f05002bdff2f255dd083 Binary files /dev/null and b/input_output/output/images/img_29.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_3.png b/input_output/output/images/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..b04bff99059820c3397a20fdbbadf5558dda64ab Binary files /dev/null and b/input_output/output/images/img_3.png differ diff --git a/input_output/output/images/img_3.png_rows/row_0/col_0.png b/input_output/output/images/img_3.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7868d8724b864ea2b40a148f93ab09e74da234ce Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_1/col_0.png b/input_output/output/images/img_3.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b04e786a6830cc961f37ae7d93583d734ba7231b Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_2/col_0.png b/input_output/output/images/img_3.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c9b8682b6441edab09dea84bcbe5ce023cc76284 Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_3/col_0.png b/input_output/output/images/img_3.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a16defb9177b38e90378b632c3a9d0c90029e949 Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_4/col_0.png b/input_output/output/images/img_3.png_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..066623070920addc2847ae14b0bf6322fa264782 Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_4/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_5/col_0.png b/input_output/output/images/img_3.png_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ff43aaf3eace3ca02564904bc3cdfd528f09aaf3 Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_5/col_0.png differ diff --git a/input_output/output/images/img_3.png_rows/row_6/col_0.png b/input_output/output/images/img_3.png_rows/row_6/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f08abe59438c56feb088cdf7a73bec8942e81864 Binary files /dev/null and b/input_output/output/images/img_3.png_rows/row_6/col_0.png differ diff --git a/input_output/output/images/img_4.png b/input_output/output/images/img_4.png new file mode 100644 index 0000000000000000000000000000000000000000..a64e0f1ac48c6edc180e2f128347a94bda760de6 Binary files /dev/null and b/input_output/output/images/img_4.png differ diff --git a/input_output/output/images/img_4.png_rows/row_0/col_0.png b/input_output/output/images/img_4.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..89e913c42dfb6bbc021f668a8e25b2a91d8a7895 Binary files /dev/null and b/input_output/output/images/img_4.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_4.png_rows/row_0/col_1.png b/input_output/output/images/img_4.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0388229256c3325d728ed9b8ad9c683940dc43f0 Binary files /dev/null and b/input_output/output/images/img_4.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_4.png_rows/row_1/col_0.png b/input_output/output/images/img_4.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..eece6d1fe11464358b7c7c9ffb20827ba38d3dad Binary files /dev/null and b/input_output/output/images/img_4.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_4.png_rows/row_1/col_1.png b/input_output/output/images/img_4.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..38a09260f541eeaa4fa5fe9cb7cb64ca0625f1b8 Binary files /dev/null and b/input_output/output/images/img_4.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_5.png b/input_output/output/images/img_5.png new file mode 100644 index 0000000000000000000000000000000000000000..150dde1dae1774fc40b94f4db06fae4367b4eaed Binary files /dev/null and b/input_output/output/images/img_5.png differ diff --git a/input_output/output/images/img_5.png_rows/row_0/col_0.png b/input_output/output/images/img_5.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fbd51d91192cd83eabacf3a9edcee6ae180b3d00 Binary files /dev/null and b/input_output/output/images/img_5.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_5.png_rows/row_0/col_1.png b/input_output/output/images/img_5.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..255c4195f33dedf5335f836ab93e46579d04bd39 Binary files /dev/null and b/input_output/output/images/img_5.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_5.png_rows/row_1/col_0.png b/input_output/output/images/img_5.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..46d49431a250a290437d6851e4f4e612991f3c4e Binary files /dev/null and b/input_output/output/images/img_5.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_5.png_rows/row_2/col_0.png b/input_output/output/images/img_5.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9da4a65d8ff31f95550ba07eebd178b9b14fcf2b Binary files /dev/null and b/input_output/output/images/img_5.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_5.png_rows/row_3/col_0.png b/input_output/output/images/img_5.png_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bb9bb009b135374b46b62e827a6b6f545724a3df Binary files /dev/null and b/input_output/output/images/img_5.png_rows/row_3/col_0.png differ diff --git a/input_output/output/images/img_6.png b/input_output/output/images/img_6.png new file mode 100644 index 0000000000000000000000000000000000000000..e9ffa759df80398192db92a27b41b6971cfd8274 Binary files /dev/null and b/input_output/output/images/img_6.png differ diff --git a/input_output/output/images/img_6.png_rows/row_0/col_0.png b/input_output/output/images/img_6.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d34f77527c632ccb5ea0ed5660f479d764e49f97 Binary files /dev/null and b/input_output/output/images/img_6.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_6.png_rows/row_0/col_1.png b/input_output/output/images/img_6.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..57aba9dc40fd6f5476b035e2530695fa07c8c4ae Binary files /dev/null and b/input_output/output/images/img_6.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_6.png_rows/row_1/col_0.png b/input_output/output/images/img_6.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..58814d84ffefea09eb19cb36bb2f1dd71096497d Binary files /dev/null and b/input_output/output/images/img_6.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_7.png b/input_output/output/images/img_7.png new file mode 100644 index 0000000000000000000000000000000000000000..9970d23e9ac329f010eacce45684e773367971cb Binary files /dev/null and b/input_output/output/images/img_7.png differ diff --git a/input_output/output/images/img_7.png_rows/row_0/col_0.png b/input_output/output/images/img_7.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1965758d70d2cc2cef8f7521a3b1860b01bf4668 Binary files /dev/null and b/input_output/output/images/img_7.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_7.png_rows/row_0/col_1.png b/input_output/output/images/img_7.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..34952b369e5b2d1cf28f31b08911092ca278d27d Binary files /dev/null and b/input_output/output/images/img_7.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_7.png_rows/row_1/col_0.png b/input_output/output/images/img_7.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8ecda41d195525e1526f1a620fd1162cf1914ad2 Binary files /dev/null and b/input_output/output/images/img_7.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_7.png_rows/row_1/col_1.png b/input_output/output/images/img_7.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..56648484c180a4a12b511ed88c065c8184a9b8bd Binary files /dev/null and b/input_output/output/images/img_7.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_8.png b/input_output/output/images/img_8.png new file mode 100644 index 0000000000000000000000000000000000000000..6ddea5c750a2073af579e8ccfcb41ec97137462d Binary files /dev/null and b/input_output/output/images/img_8.png differ diff --git a/input_output/output/images/img_8.png_rows/row_0/col_0.png b/input_output/output/images/img_8.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/input_output/output/images/img_8.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_8.png_rows/row_0/col_1.png b/input_output/output/images/img_8.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a800de22d2e8583dd9f72a5de826d64cc6934548 Binary files /dev/null and b/input_output/output/images/img_8.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_8.png_rows/row_1/col_0.png b/input_output/output/images/img_8.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..257aac0e47a2b1b25ffe3f2827f158d48e39aa74 Binary files /dev/null and b/input_output/output/images/img_8.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_8.png_rows/row_1/col_1.png b/input_output/output/images/img_8.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..18ef074956b7d70759cca25788f51195c90e898d Binary files /dev/null and b/input_output/output/images/img_8.png_rows/row_1/col_1.png differ diff --git a/input_output/output/images/img_8.png_rows/row_2/col_0.png b/input_output/output/images/img_8.png_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..cade544edb83ae507d809e8285fc9b022825f119 Binary files /dev/null and b/input_output/output/images/img_8.png_rows/row_2/col_0.png differ diff --git a/input_output/output/images/img_9.png b/input_output/output/images/img_9.png new file mode 100644 index 0000000000000000000000000000000000000000..754d612eec7443e4c79b438f2da9f65ac55d3d9b Binary files /dev/null and b/input_output/output/images/img_9.png differ diff --git a/input_output/output/images/img_9.png_rows/row_0/col_0.png b/input_output/output/images/img_9.png_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d2bcb6341c7a0161b77dcac99d606eeec14b5659 Binary files /dev/null and b/input_output/output/images/img_9.png_rows/row_0/col_0.png differ diff --git a/input_output/output/images/img_9.png_rows/row_0/col_1.png b/input_output/output/images/img_9.png_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..04eec05e20370257adb0d9707a19c357fc141eea Binary files /dev/null and b/input_output/output/images/img_9.png_rows/row_0/col_1.png differ diff --git a/input_output/output/images/img_9.png_rows/row_1/col_0.png b/input_output/output/images/img_9.png_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4a03b25084a20ea7ad2c2843cec5aed01c3d5fa3 Binary files /dev/null and b/input_output/output/images/img_9.png_rows/row_1/col_0.png differ diff --git a/input_output/output/images/img_9.png_rows/row_1/col_1.png b/input_output/output/images/img_9.png_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..3aee171a64abcf92d743db31bb93b24e1d152da9 Binary files /dev/null and b/input_output/output/images/img_9.png_rows/row_1/col_1.png differ diff --git a/topic_extr.py b/topic_extr.py new file mode 100644 index 0000000000000000000000000000000000000000..a7bcbe50cab3a2de243a5878594f96afcb73f509 --- /dev/null +++ b/topic_extr.py @@ -0,0 +1,663 @@ +#!/usr/bin/env python3 +import os +import re +import gc +import json +import logging +import fitz +import base64 +import cv2 +import numpy as np +from io import BytesIO +from typing import List, Dict, Any + +import torch + +# Try to import google.genai +try: + from google import genai + from google.genai import types +except ImportError: + genai = None + types = None + +# magic-pdf imports +from magic_pdf.data.dataset import PymuDocDataset +from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze + +# table extraction logic +from table_row_extraction import TableExtractor + +############################################################################### +# Logging Setup +############################################################################### +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +############################################################################### +# PDF Subset Creation +############################################################################### +def create_subset_pdf(original_pdf_bytes: bytes, page_indices: List[int]) -> bytes: + """ + Creates a new PDF (in memory) containing only the pages in page_indices (0-based). + """ + if not page_indices: + raise ValueError("No page indices provided for subset creation.") + + doc = fitz.open(stream=original_pdf_bytes, filetype="pdf") + new_doc = fitz.open() + sorted_pages = sorted(set(page_indices)) + for p in sorted_pages: + if 0 <= p < doc.page_count: + new_doc.insert_pdf(doc, from_page=p, to_page=p) + else: + logger.error(f"Page index {p} is out of range (0..{doc.page_count - 1}).") + raise ValueError(f"Page index {p} is out of range.") + subset_bytes = new_doc.tobytes() + new_doc.close() + doc.close() + return subset_bytes + +############################################################################### +# Utility: Shrink Images Before Sending to Gemini +############################################################################### +def shrink_image_to_jpeg(image_data: bytes, max_dim: int = 800, jpeg_quality: int = 80) -> bytes: + """ + Decode image_data, resize so largest dimension <= max_dim, then re-encode as JPEG. + This reduces request size to Gemini significantly. + """ + try: + # Decode + arr = np.frombuffer(image_data, np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if img is None: + # Not a valid image, return as is + return image_data + + h, w, _ = img.shape + scale = 1.0 + if max(h, w) > max_dim: + scale = max_dim / float(max(h, w)) + if scale < 1.0: + new_w = int(w * scale) + new_h = int(h * scale) + img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA) + + # Re-encode + encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality] + success, enc = cv2.imencode(".jpg", img, encode_params) + if success: + return enc.tobytes() + else: + logger.warning("Could not encode resized image, returning original.") + return image_data + except Exception as e: + logger.warning(f"shrink_image_to_jpeg error: {e}. Returning original data.") + return image_data + +############################################################################### +# Gemini LLM - Subtopic Extraction +############################################################################### +class GeminiTopicExtractor: + """ + Reads the first few pages of a PDF to get the table of contents text, + then uses Gemini to parse out topics -> [start_page, end_page]. + """ + def __init__(self, api_key: str = None, num_pages: int = 14): + self.api_key = api_key or os.getenv("GEMINI_API_KEY", "") + if not self.api_key: + logger.warning("No Gemini API key provided for subtopic extraction.") + self.num_pages = num_pages + + def extract_subtopics(self, pdf_path: str) -> Dict[str, Any]: + text_content = self._read_first_pages(pdf_path, self.num_pages) + if not text_content.strip(): + logger.error("No text extracted from the first pages of the PDF.") + return {} + + if genai is None or types is None: + logger.warning("google.genai is not installed. Returning empty subtopics.") + return {} + + prompt = f""" +You will be provided with the first pages of an exam board document. +Your goal is to extract the main subject-related topics from the "Contents" section and structure them in a valid JSON format.Instructions: + Instructions: + 1. Identify the 'Contents' section listing all topics, subtopics, and their corresponding pages. + 2. Extract only the **highest-level, subject-related subtopics** (ignore administrative sections). + 3. For each subtopic, return [start_page, end_page] (1-based). + 4. Output valid JSON in the following format: + {{ + "Topic A": [start_page, end_page], + "Topic B": [start_page, end_page] + }} + +Important Notes: +- Ignore non-subject-related sections (e.g., "Introduction", "Exam Guidelines", "Appendices", "Assessment, Qualification at a glance"). +- The extracted subtopics should represent major academic areas, not organizational or structural elements. +- Make sure that all of the pages for a subtopic are included, end page should be the -1 start page of the topic + that comes next after the extracted one in contents section. + +Examples: +1. Given this table of contents: + + 1 Introduction – 2 + Why choose Edexcel A Level Mathematics? - 2 + Supporting you in planning and implementing this qualification - 3 + Qualification at a glance - 5 + 2 Subject content and assessment information – 7 + Paper 1 and Paper 2: Pure Mathematics - 11 + Paper 3: Statistics and Mechanics - 30 + Assessment Objectives - 40 + 3 Administration and general information – 42 + Entries - 42 + Access arrangements, reasonable adjustments, special consideration and malpractice - 42 + Student recruitment and progression - 45 + Appendix 1: Formulae – 49 + Appendix 2: Notation – 53 + Appendix 3: Use of calculators – 59 + Appendix 4: Assessment Objectives – 60 + Appendix 5: The context for the development of this qualification – 62 + Appendix 6: Transferable skills – 64 + Appendix 7: Level 3 Extended Project qualification – 65 + Appendix 8: Codes – 67 + + The correct output should be: + + {{ + "Paper 1 and Paper 2: Pure Mathematics": [11, 29], + "Paper 3: Statistics and Mechanics": [30, 42] + }} + +2. Given this table of contents: + + Qualification at a glance – 1 + Assessment Objectives and weightings - 4 + Knowledge, skills and understanding – 5 + Theme 1: Introduction to markets and market failure - 5 + Theme 2: The UK economy – performance and policies - 11 + Theme 3: Business behaviour and the labour market - 21 + Theme 4: A global perspective - 29 + Assessment – 39 + Assessment summary - 39 + Assessment objectives - 41 + Assessment overview - 42 + Breakdown of assessment objectives - 42 + Synoptic assessment - 43 + Discount code and performance tables - 43 + Access arrangements, reasonable adjustments and special consideration - 44 + Malpractice - 45 + Equality Act 2010 and Pearson equality policy - 45 + Synoptic assessment - 46 + Awarding and reporting - 47 + Other information – 49 + Student recruitment -49 + Prior learning and other requirements -49 + Progression - 49 + Appendix 1: Transferable skills – 53 + Appendix 2: Level 3 Extended Project qualification – 55 + Appendix 3: Quantitative skills – 59 + Appendix 4: Codes – 61 + Appendix 5: Index – 63 + + The correct output should be: + + {{ + "Theme 1: Introduction to markets and market failure": [5, 10] + "Theme 2: The UK economy – performance and policies": - [11, 20] + "Theme 3: Business behaviour and the labour market": [21, 28] + "Theme 4: A global perspective": [29, 38] + }} + + Now, extract topics from this text: {text_content} +""" + + try: + logger.debug("Calling Gemini to extract subtopics...") + client = genai.Client(api_key=self.api_key) + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[prompt], + config=types.GenerateContentConfig(temperature=0.0) + ) + # Log partial or full LLM response for debugging + if response and response.text: + logger.info(f"[Gemini subtopic extraction] Raw LLM response:\n{response.text}") + + raw_text = response.text.strip() if (response and response.text) else "{}" + cleaned = raw_text.replace("```json", "").replace("```", "") + data = json.loads(cleaned) + return data + except Exception as e: + logger.error(f"Gemini subtopic extraction error: {e}") + return {} + + def _read_first_pages(self, pdf_path: str, num_pages: int) -> str: + text_parts = [] + try: + doc = fitz.open(pdf_path) + pages_to_read = min(doc.page_count, num_pages) + for p in range(pages_to_read): + page_text = doc.load_page(p).get_text() + text_parts.append(page_text) + doc.close() + except Exception as e: + logger.error(f"Could not open/read PDF: {e}") + return "\n".join(text_parts) + +############################################################################### +# Gemini-based Image Classification +############################################################################### +def call_gemini_for_table_classification(image_data: bytes, api_key: str) -> str: + """ + Classify an image as TWO_COLUMN, THREE_COLUMN, or NO_TABLE using Gemini (Flash). + We shrink the image first to speed up requests. + """ + if not api_key: + logger.warning("No Gemini API key found, returning NO_TABLE.") + return "NO_TABLE" + if not genai or not types: + logger.warning("google.genai not installed, returning NO_TABLE.") + return "NO_TABLE" + + # Shrink image + shrunk_data = shrink_image_to_jpeg(image_data, max_dim=800, jpeg_quality=80) + + prompt = """You are given an image. Determine if it shows a table that has exactly 2 or 3 columns. +The three-column 'table' image include such key features: + - Three columns header columns + - Headers like 'Topics', 'Content', 'Guidelines' + - Numbered sections (e.g., 8.4, 9.1) + - Educational curriculum-style structure +The two-column 'table' image include such key features: + - Two columns header columns + - Headers like 'Subject content' and 'Additional information' + - Numbered sections (e.g., 2.1, 3.4) + - Educational curriculum-style structure + - Bullet description in 'Additional information' +If the image is a relevant table with 2 columns, respond with 'TWO_COLUMN'. +If the image is a relevant table with 3 columns, respond with 'THREE_COLUMN'. +If the image does not show a table at all, respond with 'NO_TABLE'. +Return only one of these exact labels as your entire response: +TWO_COLUMN +THREE_COLUMN +NO_TABLE +""" + try: + # Example of optional manual timeout approach (commented out): + # import signal + # def handler(signum, frame): + # raise TimeoutError("Table classification timed out!") + # signal.signal(signal.SIGALRM, handler) + # signal.alarm(30) # 30s timeout + + logger.debug("Sending image to Gemini for table classification...") + client = genai.Client(api_key=api_key) + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[ + { + "parts": [ + {"text": prompt}, + { + "inline_data": { + "mime_type": "image/jpeg", + "data": base64.b64encode(shrunk_data).decode('utf-8') + } + } + ] + } + ], + config=types.GenerateContentConfig(temperature=0.0) + ) + # signal.alarm(0) # cancel timeout + + if response and response.text: + logger.info(f"[Gemini table classification] LLM raw response:\n{response.text}") + + classification = (response.text.strip().upper() + if (response and response.text) else "NO_TABLE") + if "THREE" in classification: + return "THREE_COLUMN" + elif "TWO" in classification: + return "TWO_COLUMN" + else: + return "NO_TABLE" + except Exception as e: + logger.error(f"Gemini table classification error: {e}") + return "NO_TABLE" + +def call_gemini_for_image_description(image_data: bytes, api_key: str) -> str: + """ + Use Gemini (Flash) to extract a short description from an image. + We also shrink the image first to reduce request time. + """ + if not api_key: + logger.warning("No Gemini API key found, returning fallback description.") + return "Image description unavailable" + if not genai or not types: + logger.warning("google.genai not installed, returning fallback description.") + return "Image description unavailable" + + shrunk_data = shrink_image_to_jpeg(image_data, max_dim=800, jpeg_quality=80) + + prompt_text = """The provided image is a part of a question paper or markscheme. +Extract all the necessary information from the image to be able to identify the question. +To identify the question, we only need the following: question number and question part. +Don't include redundant information. +For example, if image contains text like: "Q1 Part A Answer: Life on earth was created by diety..." +you should return just "Q1 Part A Mark Scheme" +If there is no text on this image, return the description of the image. 20 words max. +If there are not enough data, consider information from the surrounding context. +Additionally, if the image contains a truncated part, you must describe it and mark as a +part of some another image that goes before or after current image. +If the image is of a multiple-choice question’s options, then modify your answer by appending +'MCQ: A [option] B [option] C [option] D [option]' (replacing [option] with the actual options). +Otherwise, follow the above instructions strictly. +""" + try: + logger.debug("Sending image to Gemini for description...") + client = genai.Client(api_key=api_key) + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[ + { + "parts": [ + {"text": prompt_text}, + { + "inline_data": { + "mime_type": "image/jpeg", + "data": base64.b64encode(shrunk_data).decode('utf-8') + } + } + ] + } + ], + config=types.GenerateContentConfig(temperature=0.0) + ) + if response and response.text: + logger.info(f"[Gemini image description] LLM raw response:\n{response.text}") + + return response.text.strip() if (response and response.text) else "Image description unavailable" + except Exception as e: + logger.error(f"Gemini image description error: {e}") + return "Image description unavailable" + +############################################################################### +# Local Image Writer (Sequential Gemini Calls) +############################################################################### +class LocalImageWriter: + """ + Saves extracted images, classifies them with Gemini for table/no-table, + describes them if no-table, then modifies the Markdown to replace + the original references with final alt text. Also processes table images + into row/column cell images. + """ + def __init__(self, output_folder: str, gemini_api_key: str): + self.output_folder = output_folder + os.makedirs(self.output_folder, exist_ok=True) + + self.images_dir = os.path.join(self.output_folder, "images") + os.makedirs(self.images_dir, exist_ok=True) + + self.descriptions = {} + self._img_count = 0 + self.gemini_api_key = gemini_api_key + + def write(self, path: str, data: bytes) -> None: + """ + Called by magic-pdf to save each extracted image. + We store metadata so we can classify the images later. + """ + self._img_count += 1 + local_filename = f"img_{self._img_count}.png" + local_path = os.path.join(self.images_dir, local_filename) + + with open(local_path, "wb") as f: + f.write(data) + + rel_path_for_md = os.path.relpath(local_path, self.output_folder) + self.descriptions[path] = { + "data": data, + "relative_path": rel_path_for_md, + "table_classification": "NO_TABLE", + "final_alt": "" + } + + def post_process(self, key: str, md_content: str) -> str: + """ + 1) Classify images as table/no-table (sequential). + 2) Describe non-table images (sequential). + 3) Replace placeholders in the Markdown with final alt text. + 4) Process table images => row/col cell images => update Markdown. + 5) Keep only image-reference lines in the final Markdown. + """ + # 1) Table classification + logger.info("Classifying images to detect tables (sequential)...") + for p, info in self.descriptions.items(): + classification = call_gemini_for_table_classification(info["data"], self.gemini_api_key) + self.descriptions[p]['table_classification'] = classification + + # 2) Image description for non-table + logger.info("Generating image descriptions for non-table images (sequential)...") + for p, info in self.descriptions.items(): + if info['table_classification'] == "NO_TABLE": + desc = call_gemini_for_image_description(info["data"], self.gemini_api_key) + info['final_alt'] = desc + + # For images classified as 2/3-column tables => set alt + for p, info in self.descriptions.items(): + cls = info['table_classification'] + if cls == "TWO_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - two column table" + elif cls == "THREE_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - three column table" + elif not info['final_alt']: + info['final_alt'] = "Image description unavailable" + + # 3) Replace placeholders in the Markdown + for p, info in self.descriptions.items(): + old_md = f"![]({key}{p})" + new_md = f"![{info['final_alt']}]({info['relative_path']})" + md_content = md_content.replace(old_md, new_md) + + # 4) Process table images => row/col + md_content = self._process_table_images_in_markdown(md_content) + + # 5) Keep only image-reference lines + final_lines = [] + for line in md_content.split("\n"): + if re.match(r"^\!\[.*\]\(.*\)", line.strip()): + final_lines.append(line.strip()) + + return "\n".join(final_lines) + + def _process_table_images_in_markdown(self, md_content: str) -> str: + """ + For images flagged as 2/3-column tables, run TableExtractor, + split into row/column cell images, and replace the single + table image reference with multiple cell references. + """ + pattern = r"!\[HAS TO BE PROCESSED - (two|three) column table\]\(([^)]+)\)" + matches = re.findall(pattern, md_content, flags=re.IGNORECASE) + if not matches: + return md_content + + for (col_type, image_path) in matches: + logger.info(f"Processing table image => {image_path}, columns={col_type}") + abs_image_path = os.path.join(self.output_folder, image_path) + try: + if col_type.lower() == 'two': + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=True, + enable_subtopic_merge=True, + subtopic_threshold=0.2 + ) + else: + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=False, + enable_subtopic_merge=False, + subtopic_threshold=0.2 + ) + + row_boxes = extractor.process_image(abs_image_path) + out_folder = abs_image_path + "_rows" + os.makedirs(out_folder, exist_ok=True) + extractor.save_extracted_cells(abs_image_path, row_boxes, out_folder) + + snippet_lines = ["**Extracted table cells:**"] + for i, row in enumerate(row_boxes): + row_dir = os.path.join(out_folder, f"row_{i}") + for j, _ in enumerate(row): + cell_filename = f"col_{j}.png" + cell_abs_path = os.path.join(row_dir, cell_filename) + cell_rel_path = os.path.relpath(cell_abs_path, self.output_folder) + snippet_lines.append(f"![Row {i} Col {j}]({cell_rel_path})") + + new_snippet = "\n".join(snippet_lines) + old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({image_path})" + md_content = md_content.replace(old_line, new_snippet) + except Exception as e: + logger.error(f"Error processing table image {image_path}: {e}") + + return md_content + +############################################################################### +# Mineru (magic-pdf) Pipeline with Page-Range Preprocessing +############################################################################### +class MineruNoTextProcessor: + """ + 1) Extracts page ranges from the PDF's table of contents (via Gemini). + 2) Creates a subset PDF in memory for those pages. + 3) Runs magic-pdf analysis on the subset PDF. + 4) Generates a Markdown file with images, including table images + split into row/column cells. + """ + def __init__(self, output_folder: str, gemini_api_key: str = None): + self.output_folder = output_folder + os.makedirs(self.output_folder, exist_ok=True) + + self.layout_model = "doclayout_yolo" + self.formula_enable = True + # keep table_enable=False so that entire table is an image + self.table_enable = False + self.language = "en" + + self.subtopic_extractor = GeminiTopicExtractor(api_key=gemini_api_key, num_pages=4) + self.gemini_api_key = gemini_api_key or os.getenv("GEMINI_API_KEY", "") + + def cleanup_gpu(self): + try: + gc.collect() + torch.cuda.empty_cache() + logger.info("GPU memory cleaned up.") + except Exception as e: + logger.error(f"Error during GPU cleanup: {e}") + + def process(self, pdf_path: str) -> str: + logger.info(f"Processing PDF: {pdf_path}") + try: + # 1) Extract subtopics from the PDF's contents + topics_data = self.subtopic_extractor.extract_subtopics(pdf_path) + if not topics_data: + raise ValueError("No valid topics extracted from the PDF's table of contents.") + + # 2) Flatten page indices from all topics (1-based) + page_indices = self._collect_page_indices(topics_data) + if not page_indices: + raise ValueError("Extracted page indices are empty.") + + # 3) Read the original PDF into memory + with open(pdf_path, "rb") as f: + original_pdf_bytes = f.read() + + # 4) Validate pages and create subset (convert 1-based to 0-based) + doc = fitz.open(stream=original_pdf_bytes, filetype="pdf") + total_pages = doc.page_count + doc.close() + + zero_based = [] + for p in page_indices: + z = p - 1 + if 0 <= z < total_pages: + zero_based.append(z) + else: + logger.error(f"Page {p} (converted to {z}) is out of 1..{total_pages}") + raise ValueError(f"Page {p} is out of valid range.") + zero_based = sorted(set(zero_based)) + if not zero_based: + raise ValueError("No valid pages after conversion to 0-based indices.") + + logger.info(f"Processing pages (0-based): {zero_based}") + subset_pdf_bytes = create_subset_pdf(original_pdf_bytes, zero_based) + + # 5) Run magic-pdf analysis on the subset PDF + dataset = PymuDocDataset(subset_pdf_bytes) + inference = doc_analyze( + dataset, + ocr=True, + lang=self.language, + layout_model=self.layout_model, + formula_enable=self.formula_enable, + table_enable=self.table_enable + ) + logger.info("doc_analyze complete. Extracting images...") + + # 6) Convert to Markdown (images only) via pipe_ocr_mode + image_writer = LocalImageWriter(self.output_folder, gemini_api_key=self.gemini_api_key) + pipe_result = inference.pipe_ocr_mode(image_writer, lang=self.language) + md_content = pipe_result.get_markdown("local-unique-prefix/") + + # 7) Post-process => classify table images => final MD + final_markdown = image_writer.post_process("local-unique-prefix/", md_content) + + # 8) Save final Markdown + md_path = os.path.join(self.output_folder, "final_output.md") + with open(md_path, "w", encoding="utf-8") as f: + f.write(final_markdown) + + logger.info(f"Markdown saved to: {md_path}") + return final_markdown + + finally: + self.cleanup_gpu() + + def _collect_page_indices(self, topics_data: Dict[str, Any]) -> List[int]: + """ + Flatten the subtopic ranges into a list of pages (1-based). + Example: {"Topic A": [11,29], "Topic B": [30,42]} => [11..29, 30..42] + """ + pages = [] + for topic, rng in topics_data.items(): + if isinstance(rng, list) and len(rng) == 2: + start_p, end_p = rng + if start_p > end_p: + logger.error(f"Invalid page range for topic '{topic}': {rng}") + raise ValueError(f"Invalid page range for topic '{topic}': {rng}") + pages.extend(range(start_p, end_p + 1)) + else: + logger.warning(f"Skipping topic '{topic}' with invalid range: {rng}") + return pages + +############################################################################### +# Main Execution +############################################################################### +if __name__ == "__main__": + # Example usage: + input_pdf = "/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf" + output_dir = "/home/user/app/input_output/output" + + # Provide your Gemini API key (or rely on GEMINI_API_KEY env var). + gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU") + # gemini_key = "YOUR_GEMINI_API_KEY" + + try: + processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key) + final_md = processor.process(input_pdf) + print("Final Markdown Output:") + print(final_md) + except Exception as e: + logger.error(f"Processing failed: {e}") \ No newline at end of file