Spaces:

Joshnicholas
/

table-extract

Sleeping

App Files Files Community

Joshnicholas commited on Feb 18, 2024

Commit

7b99985

verified ·

1 Parent(s): 32e01ec

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -13

app.py CHANGED Viewed

@@ -256,17 +256,17 @@ def apply_ocr(cell_coordinates, cropped_table):
           row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
         data[str(idx)] = row_data
-    # write to csv
-    with open('output.csv','w') as result_file:
-        wr = csv.writer(result_file, dialect='excel')
-        for row, row_text in data.items():
-            wr.writerow(row_text)
-    # return as Pandas dataframe
-    df = pd.read_csv('output.csv')
-    return df, data
 def process_pdf(image):
@@ -284,13 +284,9 @@ def process_pdf(image):
 title = "Demo: table detection & recognition with Table Transformer (TATR)."
 description = """Demo for table extraction with the Table Transformer. First, table detection is performed on the input image using https://huggingface.co/microsoft/table-transformer-detection,
 after which the detected table is extracted and https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all is leveraged to recognize the individual rows, columns and cells. OCR is then performed per cell, row by row."""
-examples = [['image.png'], ['mistral_paper.png']]
 app = gr.Interface(fn=process_pdf,
                      inputs=gr.Image(type="pil"),
-                     outputs=[gr.Image(type="pil", label="Detected table"), gr.Dataframe(label="Table as CSV"), gr.JSON(label="Data as JSON")],
-                     title=title,
-                     description=description,
-                     examples=examples)
 app.queue()
 app.launch(debug=True)

           row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
         data[str(idx)] = row_data
+    # # write to csv
+    # with open('output.csv','w') as result_file:
+    #     wr = csv.writer(result_file, dialect='excel')
+    #     for row, row_text in data.items():
+    #         wr.writerow(row_text)
+    # # return as Pandas dataframe
+    # df = pd.read_csv('output.csv')
+    return data
 def process_pdf(image):
 title = "Demo: table detection & recognition with Table Transformer (TATR)."
 description = """Demo for table extraction with the Table Transformer. First, table detection is performed on the input image using https://huggingface.co/microsoft/table-transformer-detection,
 after which the detected table is extracted and https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all is leveraged to recognize the individual rows, columns and cells. OCR is then performed per cell, row by row."""
 app = gr.Interface(fn=process_pdf,
                      inputs=gr.Image(type="pil"),
+                     outputs=[gr.Image(type="pil", label="Detected table"), gr.JSON(label="JSON")])
 app.queue()
 app.launch(debug=True)