Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -24,17 +24,38 @@ def pdf_to_img(pdf_path):
|
|
24 |
counter += 1
|
25 |
return (img_list)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
title = "Extract Image and Text"
|
28 |
|
29 |
|
30 |
with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
|
31 |
gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
|
32 |
-
with gr.Row():
|
33 |
with gr.Row():
|
34 |
with gr.Column():
|
35 |
file_input = gr.File(type="filepath", label="Upload .pdf file")
|
36 |
upload_button = gr.Button(value="Show Images")
|
37 |
img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
|
38 |
|
|
|
|
|
39 |
upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
|
|
|
40 |
demo.launch()
|
|
|
24 |
counter += 1
|
25 |
return (img_list)
|
26 |
|
27 |
+
def extract_text_from_pdf(pdf_file):
|
28 |
+
# Open the PDF file
|
29 |
+
doc = fitz.open(pdf_file)
|
30 |
+
|
31 |
+
# Initialize an empty string to store the extracted text
|
32 |
+
extracted_text = ""
|
33 |
+
|
34 |
+
# Iterate through each page of the PDF
|
35 |
+
for page_num in range(len(doc)):
|
36 |
+
# Load the page
|
37 |
+
page = doc.load_page(page_num)
|
38 |
+
|
39 |
+
# Extract text from the page and append it to the extracted_text string
|
40 |
+
extracted_text += page.get_text()
|
41 |
+
|
42 |
+
# Close the PDF document
|
43 |
+
doc.close()
|
44 |
+
return extracted_text
|
45 |
+
|
46 |
title = "Extract Image and Text"
|
47 |
|
48 |
|
49 |
with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
|
50 |
gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
|
|
|
51 |
with gr.Row():
|
52 |
with gr.Column():
|
53 |
file_input = gr.File(type="filepath", label="Upload .pdf file")
|
54 |
upload_button = gr.Button(value="Show Images")
|
55 |
img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
|
56 |
|
57 |
+
with gr.Row():
|
58 |
+
output_text = text = gr.Textbox(label="Output", lines=4)
|
59 |
upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
|
60 |
+
upload_button.click(extract_text_from_pdf, inputs=file_input, outputs=[output_text])
|
61 |
demo.launch()
|