import PyPDF2 from docx import Document def extract_text_from_pdf(file_path): """ Estrae il testo da un file PDF. Args: file_path: Percorso del file PDF Returns: str: Testo estratto dal PDF """ with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) text = "" for page in reader.pages: text += page.extract_text() return text def extract_text_from_docx(file_path): """ Estrae il testo da un file DOCX. Args: file_path: Percorso del file DOCX Returns: str: Testo estratto dal documento Word """ doc = Document(file_path) text = "" for para in doc.paragraphs: text += para.text + "\n" return text