Spaces:

Vihang28
/

extract_images_text

Runtime error

Vihang28 commited on Apr 24, 2024

Commit

fe3c6cf

verified ·

1 Parent(s): b0a8ef0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,17 +24,38 @@ def pdf_to_img(pdf_path):
             counter += 1
     return (img_list)
 title = "Extract Image and Text"
 with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
     gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
-    with gr.Row():
         with gr.Row():
             with gr.Column():
                 file_input = gr.File(type="filepath", label="Upload .pdf file")
                 upload_button = gr.Button(value="Show Images")
                 img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
     upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
 demo.launch()

             counter += 1
     return (img_list)
+def extract_text_from_pdf(pdf_file):
+    # Open the PDF file
+    doc = fitz.open(pdf_file)
+    # Initialize an empty string to store the extracted text
+    extracted_text = ""
+    # Iterate through each page of the PDF
+    for page_num in range(len(doc)):
+        # Load the page
+        page = doc.load_page(page_num)
+        # Extract text from the page and append it to the extracted_text string
+        extracted_text += page.get_text()
+    # Close the PDF document
+    doc.close()
+    return extracted_text
 title = "Extract Image and Text"
 with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
     gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
         with gr.Row():
             with gr.Column():
                 file_input = gr.File(type="filepath", label="Upload .pdf file")
                 upload_button = gr.Button(value="Show Images")
                 img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
+    with gr.Row():
+        output_text = text = gr.Textbox(label="Output", lines=4)
     upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
+    upload_button.click(extract_text_from_pdf, inputs=file_input, outputs=[output_text])
 demo.launch()