Spaces:
Runtime error
Runtime error
import fitz | |
import io | |
import base64 | |
from PIL import Image | |
import gradio as gr | |
import cv2 | |
import tempfile | |
import os | |
def pdf_to_img(pdf_path): | |
pdf_document = fitz.open(pdf_path) | |
counter = 1 | |
img_list = [] | |
for page_number in range(len(pdf_document)): | |
page = pdf_document[page_number] | |
image_list = page.get_images() | |
for image in image_list: | |
base_img = pdf_document.extract_image(image[0]) | |
image_data = base_img["image"] | |
img = Image.open(io.BytesIO(image_data)) | |
# if img.mode == "RGBA": | |
# # Convert RGBA image to RGB | |
# img = img.convert("RGB") | |
extention = base_img['ext'] | |
img.save(open(f"image{counter}.{extention}","wb")) | |
img_list.append(f"image{counter}.{extention}") | |
counter += 1 | |
return (img_list) | |
def extract_text_from_pdf(pdf_file): | |
# Open the PDF file | |
doc = fitz.open(pdf_file) | |
# Initialize an empty string to store the extracted text | |
extracted_text = "" | |
# Iterate through each page of the PDF | |
for page_num in range(len(doc)): | |
# Load the page | |
page = doc.load_page(page_num) | |
# Extract text from the page and append it to the extracted_text string | |
extracted_text += page.get_text() | |
# Close the PDF document | |
doc.close() | |
return extracted_text | |
title = "Extract Image and Text" | |
with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo: | |
gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>') | |
with gr.Row(): | |
with gr.Row(): | |
with gr.Column(): | |
file_input = gr.File(type="filepath", label="Upload .pdf file") | |
upload_button = gr.Button(value="Show Images") | |
img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True) | |
with gr.Row(): | |
with gr.Column(): | |
output_text = text = gr.Textbox(label="Output", lines=4, autoscroll=False) | |
upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery]) | |
upload_button.click(extract_text_from_pdf, inputs=file_input, outputs=[output_text]) | |
demo.launch() |