import gradio as gr from PyPDF2 import PdfReader import os from docx import Document as DocxDocument def process_pdf(file): # Read the PDF content pdf_reader = PdfReader(file.name) text = "" for page in pdf_reader.pages: text += page.extract_text() return text def process_file(file): file_extension = file.name.split(".")[-1].lower() if file_extension == 'pdf': ocr_text = process_pdf(file) return ocr_text elif file_extension == 'docx': docx_document = DocxDocument(file.name) text = "" for paragraph in docx_document.paragraphs: text += paragraph.text + "\n" return text #return [Document(text=text)] with gr.Blocks() as demo: gr.Markdown("### File upload", elem_classes="tab-header") with gr.Row(): text_output = gr.Textbox(label="text") with gr.Row(): file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath") with gr.Row(): submit_button = gr.Button("upload") submit_button.click(process_file, inputs=file_input, outputs=text_output) demo.launch()