Spaces:

okewunmi
/

pdf-text-extraction

Sleeping

App Files Files Community

okewunmi commited on Jul 16

Commit

625982b

verified ·

1 Parent(s): c1ce0f7

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -19

app.py CHANGED Viewed

@@ -1,25 +1,66 @@
-import streamlit as st
 import fitz  # PyMuPDF
 def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
-    text = ""
-    for page in doc:
-        text += page.get_text("text") + "\n"
-    return text
-def main():
-    st.title("PDF Text Extraction App")
-    st.write("Upload a PDF file to extract its text.")
-    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
-    if uploaded_file is not None:
-        text = extract_text_from_pdf(uploaded_file)
-        st.subheader("Extracted Text:")
-        st.text_area("", text, height=300)
-        st.download_button("Download Extracted Text", text, file_name="extracted_text.txt")
 if __name__ == "__main__":
-    main()

+import gradio as gr
 import fitz  # PyMuPDF
 def extract_text_from_pdf(pdf_file):
+    """Extract text from uploaded PDF file"""
+    if pdf_file is None:
+        return "No file uploaded"
+    try:
+        # Open the PDF file
+        doc = fitz.open(pdf_file.name)
+        text = ""
+        # Extract text from each page
+        for page in doc:
+            text += page.get_text("text") + "\n"
+        doc.close()
+        if not text.strip():
+            return "No text found in the PDF file"
+        return text
+    except Exception as e:
+        return f"Error processing PDF: {str(e)}"
+# Create the Gradio interface
+with gr.Blocks(title="PDF Text Extraction App") as demo:
+    gr.Markdown("# 📄 PDF Text Extraction App")
+    gr.Markdown("Upload a PDF file to extract its text content.")
+    with gr.Row():
+        with gr.Column():
+            pdf_input = gr.File(
+                label="Upload PDF File",
+                file_types=[".pdf"],
+                type="filepath"
+            )
+            extract_btn = gr.Button("Extract Text", variant="primary")
+        with gr.Column():
+            text_output = gr.Textbox(
+                label="Extracted Text",
+                lines=20,
+                max_lines=30,
+                placeholder="Extracted text will appear here..."
+            )
+    # Connect the button to the function
+    extract_btn.click(
+        fn=extract_text_from_pdf,
+        inputs=pdf_input,
+        outputs=text_output
+    )
+    # Also allow automatic extraction when file is uploaded
+    pdf_input.change(
+        fn=extract_text_from_pdf,
+        inputs=pdf_input,
+        outputs=text_output
+    )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()