Vihang28 commited on
Commit
fe3c6cf
·
verified ·
1 Parent(s): b0a8ef0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -1
app.py CHANGED
@@ -24,17 +24,38 @@ def pdf_to_img(pdf_path):
24
  counter += 1
25
  return (img_list)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  title = "Extract Image and Text"
28
 
29
 
30
  with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
31
  gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
32
- with gr.Row():
33
  with gr.Row():
34
  with gr.Column():
35
  file_input = gr.File(type="filepath", label="Upload .pdf file")
36
  upload_button = gr.Button(value="Show Images")
37
  img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
38
 
 
 
39
  upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
 
40
  demo.launch()
 
24
  counter += 1
25
  return (img_list)
26
 
27
+ def extract_text_from_pdf(pdf_file):
28
+ # Open the PDF file
29
+ doc = fitz.open(pdf_file)
30
+
31
+ # Initialize an empty string to store the extracted text
32
+ extracted_text = ""
33
+
34
+ # Iterate through each page of the PDF
35
+ for page_num in range(len(doc)):
36
+ # Load the page
37
+ page = doc.load_page(page_num)
38
+
39
+ # Extract text from the page and append it to the extracted_text string
40
+ extracted_text += page.get_text()
41
+
42
+ # Close the PDF document
43
+ doc.close()
44
+ return extracted_text
45
+
46
  title = "Extract Image and Text"
47
 
48
 
49
  with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
50
  gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
 
51
  with gr.Row():
52
  with gr.Column():
53
  file_input = gr.File(type="filepath", label="Upload .pdf file")
54
  upload_button = gr.Button(value="Show Images")
55
  img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
56
 
57
+ with gr.Row():
58
+ output_text = text = gr.Textbox(label="Output", lines=4)
59
  upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
60
+ upload_button.click(extract_text_from_pdf, inputs=file_input, outputs=[output_text])
61
  demo.launch()