Spaces:

richardprobe
/

pdf_upload

Build error

Richard Hsu commited on Jul 26, 2024

Commit

f59bbd9

1 Parent(s): 01f0fe3

push

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,26 @@
 import gradio as gr
-from langchain.document_loaders import PyPDFLoader
-def pdf_to_text(pdf_file):
-    loader = PyPDFLoader(pdf_file.name)
-    documents = loader.load()
-    text = "\n".join([doc.page_content for doc in documents])
-    print(text)  # Log the loaded text
     return text
-def pdf_to_text_interface(pdf_file):
-    text = pdf_to_text(pdf_file)
-    return text
-iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter <3")
-iface.launch()

 import gradio as gr
+import fitz  # PyMuPDF
+def extract_text_from_pdf(pdf_file):
+    # Open the PDF file
+    pdf_document = fitz.open(pdf_file.name)
+    text = ""
+    # Extract text from each page
+    for page_num in range(len(pdf_document)):
+        page = pdf_document.load_page(page_num)
+        text += page.get_text()
     return text
+# Create a Gradio interface
+interface = gr.Interface(
+    fn=extract_text_from_pdf,
+    inputs=gr.inputs.File(label="Upload PDF"),
+    outputs=gr.outputs.Textbox(label="Extracted Text"),
+    title="PDF Text Extractor",
+    description="Upload a PDF file to extract and display its text content."
+)
+# Launch the interface
+interface.launch(share=True)