Spaces:
Sleeping
Sleeping
File size: 1,139 Bytes
fc5e5f2 3117482 897be90 eb44df9 3117482 8ccf021 3117482 d6fee21 3117482 d6fee21 641a65d 3117482 8ccf021 3117482 d6fee21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
from PyPDF2 import PdfReader
import os
from docx import Document as DocxDocument
def process_pdf(file):
# Read the PDF content
pdf_reader = PdfReader(file.name)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def process_file(file):
file_extension = file.name.split(".")[-1].lower()
if file_extension == 'pdf':
ocr_text = process_pdf(file)
return ocr_text
elif file_extension == 'docx':
docx_document = DocxDocument(file.name)
text = ""
for paragraph in docx_document.paragraphs:
text += paragraph.text + "\n"
return text
#return [Document(text=text)]
with gr.Blocks() as demo:
gr.Markdown("### File upload", elem_classes="tab-header")
with gr.Row():
text_output = gr.Textbox(label="text")
with gr.Row():
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
with gr.Row():
submit_button = gr.Button("upload")
submit_button.click(process_file, inputs=file_input, outputs=text_output)
demo.launch() |