okewunmi commited on
Commit
625982b
·
verified ·
1 Parent(s): c1ce0f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -19
app.py CHANGED
@@ -1,25 +1,66 @@
1
- import streamlit as st
2
  import fitz # PyMuPDF
3
 
4
  def extract_text_from_pdf(pdf_file):
5
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
6
- text = ""
7
- for page in doc:
8
- text += page.get_text("text") + "\n"
9
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def main():
12
- st.title("PDF Text Extraction App")
13
- st.write("Upload a PDF file to extract its text.")
14
-
15
- uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
16
-
17
- if uploaded_file is not None:
18
- text = extract_text_from_pdf(uploaded_file)
19
- st.subheader("Extracted Text:")
20
- st.text_area("", text, height=300)
21
-
22
- st.download_button("Download Extracted Text", text, file_name="extracted_text.txt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
24
  if __name__ == "__main__":
25
- main()
 
1
+ import gradio as gr
2
  import fitz # PyMuPDF
3
 
4
  def extract_text_from_pdf(pdf_file):
5
+ """Extract text from uploaded PDF file"""
6
+ if pdf_file is None:
7
+ return "No file uploaded"
8
+
9
+ try:
10
+ # Open the PDF file
11
+ doc = fitz.open(pdf_file.name)
12
+ text = ""
13
+
14
+ # Extract text from each page
15
+ for page in doc:
16
+ text += page.get_text("text") + "\n"
17
+
18
+ doc.close()
19
+
20
+ if not text.strip():
21
+ return "No text found in the PDF file"
22
+
23
+ return text
24
+
25
+ except Exception as e:
26
+ return f"Error processing PDF: {str(e)}"
27
 
28
+ # Create the Gradio interface
29
+ with gr.Blocks(title="PDF Text Extraction App") as demo:
30
+ gr.Markdown("# 📄 PDF Text Extraction App")
31
+ gr.Markdown("Upload a PDF file to extract its text content.")
32
+
33
+ with gr.Row():
34
+ with gr.Column():
35
+ pdf_input = gr.File(
36
+ label="Upload PDF File",
37
+ file_types=[".pdf"],
38
+ type="filepath"
39
+ )
40
+ extract_btn = gr.Button("Extract Text", variant="primary")
41
+
42
+ with gr.Column():
43
+ text_output = gr.Textbox(
44
+ label="Extracted Text",
45
+ lines=20,
46
+ max_lines=30,
47
+ placeholder="Extracted text will appear here..."
48
+ )
49
+
50
+ # Connect the button to the function
51
+ extract_btn.click(
52
+ fn=extract_text_from_pdf,
53
+ inputs=pdf_input,
54
+ outputs=text_output
55
+ )
56
+
57
+ # Also allow automatic extraction when file is uploaded
58
+ pdf_input.change(
59
+ fn=extract_text_from_pdf,
60
+ inputs=pdf_input,
61
+ outputs=text_output
62
+ )
63
 
64
+ # Launch the app
65
  if __name__ == "__main__":
66
+ demo.launch()