Joshnicholas commited on
Commit
7b99985
·
verified ·
1 Parent(s): 32e01ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -13
app.py CHANGED
@@ -256,17 +256,17 @@ def apply_ocr(cell_coordinates, cropped_table):
256
  row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
257
  data[str(idx)] = row_data
258
 
259
- # write to csv
260
- with open('output.csv','w') as result_file:
261
- wr = csv.writer(result_file, dialect='excel')
262
 
263
- for row, row_text in data.items():
264
- wr.writerow(row_text)
265
 
266
- # return as Pandas dataframe
267
- df = pd.read_csv('output.csv')
268
 
269
- return df, data
270
 
271
 
272
  def process_pdf(image):
@@ -284,13 +284,9 @@ def process_pdf(image):
284
  title = "Demo: table detection & recognition with Table Transformer (TATR)."
285
  description = """Demo for table extraction with the Table Transformer. First, table detection is performed on the input image using https://huggingface.co/microsoft/table-transformer-detection,
286
  after which the detected table is extracted and https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all is leveraged to recognize the individual rows, columns and cells. OCR is then performed per cell, row by row."""
287
- examples = [['image.png'], ['mistral_paper.png']]
288
 
289
  app = gr.Interface(fn=process_pdf,
290
  inputs=gr.Image(type="pil"),
291
- outputs=[gr.Image(type="pil", label="Detected table"), gr.Dataframe(label="Table as CSV"), gr.JSON(label="Data as JSON")],
292
- title=title,
293
- description=description,
294
- examples=examples)
295
  app.queue()
296
  app.launch(debug=True)
 
256
  row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
257
  data[str(idx)] = row_data
258
 
259
+ # # write to csv
260
+ # with open('output.csv','w') as result_file:
261
+ # wr = csv.writer(result_file, dialect='excel')
262
 
263
+ # for row, row_text in data.items():
264
+ # wr.writerow(row_text)
265
 
266
+ # # return as Pandas dataframe
267
+ # df = pd.read_csv('output.csv')
268
 
269
+ return data
270
 
271
 
272
  def process_pdf(image):
 
284
  title = "Demo: table detection & recognition with Table Transformer (TATR)."
285
  description = """Demo for table extraction with the Table Transformer. First, table detection is performed on the input image using https://huggingface.co/microsoft/table-transformer-detection,
286
  after which the detected table is extracted and https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all is leveraged to recognize the individual rows, columns and cells. OCR is then performed per cell, row by row."""
 
287
 
288
  app = gr.Interface(fn=process_pdf,
289
  inputs=gr.Image(type="pil"),
290
+ outputs=[gr.Image(type="pil", label="Detected table"), gr.JSON(label="JSON")])
 
 
 
291
  app.queue()
292
  app.launch(debug=True)