vumichien commited on
Commit
324d5c7
·
verified ·
1 Parent(s): 12b2df2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -1
app.py CHANGED
@@ -11,6 +11,11 @@ from PIL import Image
11
  import glob
12
  import pandas as pd
13
  import time
 
 
 
 
 
14
  import subprocess
15
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
16
 
@@ -34,6 +39,14 @@ def filter_detections(detections, target_class_name="mark"):
34
  detections.data['class_name'] = filtered_class_name
35
  return detections
36
 
 
 
 
 
 
 
 
 
37
 
38
  def ends_with_number(s):
39
  return s[-1].isdigit()
@@ -185,6 +198,99 @@ def inference(
185
  return annotated_image, output_path_list
186
 
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  TITLE = "<h1 style='font-size: 2.5em; text-align: center;'>Identify objects in construction design</h1>"
189
  DESCRIPTION = """<p style='font-size: 1.5em; line-height: 1.6em; text-align: left;'>Welcome to the object
190
  identification application. This tool allows you to upload an image, and it will identify and annotate objects within
@@ -211,6 +317,7 @@ EXAMPLES = [
211
  with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
212
  gr.HTML(TITLE)
213
  gr.HTML(DESCRIPTION)
 
214
  with gr.Tab(label="Identify objects"):
215
  with gr.Row(equal_height=False):
216
  input_img = gr.Image(type="filepath", label="Upload Image")
@@ -239,5 +346,39 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
239
  outputs=[output_img, gallery],
240
  cache_examples=False,
241
  )
242
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  demo.launch(debug=True)
 
11
  import glob
12
  import pandas as pd
13
  import time
14
+ from pdf2image import convert_from_path
15
+ import pymupdf
16
+ import camelot
17
+ import numpy as np
18
+ import fitz
19
  import subprocess
20
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
21
 
 
39
  detections.data['class_name'] = filtered_class_name
40
  return detections
41
 
42
+
43
+ def add_label_detection(detections):
44
+ updated_class = [f"{class_name} {i + 1}" for i, class_name in enumerate(detections.data['class_name'])]
45
+ updated_id = [class_id + i for i, class_id in enumerate(detections.class_id)]
46
+ detections.data['class_name'] = np.array(updated_class)
47
+ detections.class_id = np.array(updated_id)
48
+ return detections
49
+
50
 
51
  def ends_with_number(s):
52
  return s[-1].isdigit()
 
198
  return annotated_image, output_path_list
199
 
200
 
201
+ def read_table(sheet):
202
+ excel_path = "output_tables.xlsx"
203
+ if os.path.exists(excel_path):
204
+ sheetnames = pd.ExcelFile(excel_path).sheet_names
205
+ if sheet in sheetnames:
206
+ df = pd.read_excel(excel_path, sheet_name=sheet)
207
+ else:
208
+ df = pd.DataFrame()
209
+ else:
210
+ df = pd.DataFrame()
211
+ return df
212
+
213
+
214
+ def validate_df(df):
215
+ columns = []
216
+ count = 1
217
+ for col in df.columns:
218
+ if type(col) == int:
219
+ columns.append(f"Col {count}")
220
+ count += 1
221
+ else:
222
+ columns.append(col)
223
+ df.columns = columns
224
+ return df
225
+
226
+
227
+ def analyze_table(file, conf_threshold, iou_threshold, progress=gr.Progress()):
228
+ progress(0, desc="Parsing table...")
229
+ img = convert_from_path(file)[0]
230
+ doc = pymupdf.open(file)
231
+ zoom_x = 1.0 # horizontal zoom
232
+ zoom_y = 1.0 # vertical zoom
233
+ mat = pymupdf.Matrix(zoom_x, zoom_y)
234
+
235
+ for i, page in enumerate(doc):
236
+ pix = page.get_pixmap(matrix=mat)
237
+ pix.save("temp.png")
238
+ image = cv2.imread("temp.png")
239
+ file_height, file_width, _ = image.shape
240
+ results = onnx_model_table(image, conf=conf_threshold, iou=iou_threshold, imgsz=640)[0]
241
+ detections = sv.Detections.from_ultralytics(results)
242
+ detections = add_label_detection(detections)
243
+ parsed_detections = parse_detection(detections)
244
+ # print(parsed_detections)
245
+ output_dir = "output_table"
246
+ # Check if the output directory exists, clear all the files inside
247
+ if not os.path.exists(output_dir):
248
+ os.makedirs(output_dir)
249
+ else:
250
+ for f in os.listdir(output_dir):
251
+ os.remove(os.path.join(output_dir, f))
252
+
253
+ box_annotator = sv.BoxAnnotator()
254
+ label_annotator = sv.LabelAnnotator(text_position=sv.Position.TOP_LEFT, text_thickness=1, text_padding=2)
255
+ annotated_image = image.copy()
256
+ annotated_image = box_annotator.annotate(
257
+ scene=annotated_image,
258
+ detections=detections
259
+ )
260
+ annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)
261
+
262
+ pdf = fitz.open(file)
263
+ pdf_page = pdf[0]
264
+ table_area = [(ind,
265
+ fitz.Rect(det['left'], det['top'], det['left'] + det['width'], det['top'] + det['height']))
266
+ for ind, det in enumerate(parsed_detections)
267
+ ]
268
+ table_list = []
269
+ for ind, area in progress.tqdm(table_area):
270
+
271
+ pdf_tabs = pdf_page.find_tables(clip=area)
272
+ if len(pdf_tabs.tables) > 0:
273
+ pdf_df = pdf_tabs[0].to_pandas()
274
+ print("Fitz Table Found!")
275
+ else:
276
+ cur = parsed_detections[ind]
277
+ table_areas = [f"{cur['left']},{file_height - cur['top']},{cur['left'] + cur['width']},{file_height - (cur['top'] + cur['height'])}"]
278
+ tables = camelot.read_pdf(file, pages='0', flavor='stream', row_tol=10, table_areas=table_areas)
279
+ pdf_df = tables[0].df
280
+ print("Camelot Table Found!")
281
+ pdf_df = validate_df(pdf_df)
282
+ table_list.append(pdf_df)
283
+ excel_path = "output_tables.xlsx"
284
+ sheet_list = []
285
+ with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer:
286
+ for i in range(len(table_list)):
287
+ sheet_name = f"Table_{i + 1}"
288
+ table_list[i].to_excel(writer, sheet_name=sheet_name, index=False)
289
+ sheet_list.append(sheet_name)
290
+
291
+ return img, annotated_image, excel_path, ", ".join(sheet_list)
292
+
293
+
294
  TITLE = "<h1 style='font-size: 2.5em; text-align: center;'>Identify objects in construction design</h1>"
295
  DESCRIPTION = """<p style='font-size: 1.5em; line-height: 1.6em; text-align: left;'>Welcome to the object
296
  identification application. This tool allows you to upload an image, and it will identify and annotate objects within
 
317
  with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
318
  gr.HTML(TITLE)
319
  gr.HTML(DESCRIPTION)
320
+
321
  with gr.Tab(label="Identify objects"):
322
  with gr.Row(equal_height=False):
323
  input_img = gr.Image(type="filepath", label="Upload Image")
 
346
  outputs=[output_img, gallery],
347
  cache_examples=False,
348
  )
349
+
350
+ with gr.Tab(label="Detect and read table"):
351
+ with gr.Row():
352
+ with gr.Column():
353
+ upload_pdf = gr.Image(label="Upload PDF file")
354
+ upload_button = gr.UploadButton(label="Upload PDF file", file_types=[".pdf"])
355
+ with gr.Column():
356
+ output_img = gr.Image(label="Output Image", interactive=False)
357
+
358
+ with gr.Row():
359
+ with gr.Column():
360
+ conf_thres_table = gr.Slider(minimum=0.0, maximum=1.0, value=0.45, step=0.05,
361
+ label="Confidence Threshold")
362
+ with gr.Column():
363
+ iou_table = gr.Slider(minimum=0.0, maximum=1.0, value=0.25, step=0.05, label="IOU Threshold")
364
+
365
+ with gr.Row():
366
+ with gr.Column():
367
+ text_output = gr.Textbox(label="Table List")
368
+ with gr.Column():
369
+ file_output = gr.File()
370
+
371
+ with gr.Row():
372
+ sheet_name = gr.Dropdown(choices=SHEET_LIST, allow_custom_value=True, label="Sheet Name")
373
+
374
+ with gr.Row():
375
+ output_df = gr.Dataframe(label="Results")
376
+ upload_button.upload(analyze_table, [upload_button, conf_thres_table, iou_table],
377
+ [upload_pdf, output_img, file_output, text_output])
378
+ conf_thres_table.change(analyze_table, [upload_button, conf_thres_table, iou_table],
379
+ [upload_pdf, output_img, file_output, text_output])
380
+ iou_table.change(analyze_table, [upload_button, conf_thres_table, iou_table],
381
+ [upload_pdf, output_img, file_output, text_output])
382
+ sheet_name.change(read_table, sheet_name, output_df)
383
+
384
  demo.launch(debug=True)