""" Script for processing an input CV document """ import io import fitz from docx import Document def parse_pdf(pdf_file) -> str: """Read PDF from Streamlit's file uploader""" pdf_document = fitz.open("pdf", pdf_file) n_pages = len(pdf_document) all_text = [] for page_number in range(n_pages): page = pdf_document.load_page(page_number) all_text.append(page.get_text("text")) pdf_document.close() return "\n\n".join(all_text) def parse_docx(docx_file) -> str: """Read in docx file""" docx_file = io.BytesIO(docx_file) doc = Document(docx_file) all_text = [para.text for para in doc.paragraphs] return "\n".join(all_text)