File size: 774 Bytes
3c5ed5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import PyPDF2
from docx import Document

def extract_text_from_pdf(file_path):
    """
    Estrae il testo da un file PDF.
    
    Args:
        file_path: Percorso del file PDF
        
    Returns:
        str: Testo estratto dal PDF
    """
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_docx(file_path):
    """
    Estrae il testo da un file DOCX.
    
    Args:
        file_path: Percorso del file DOCX
        
    Returns:
        str: Testo estratto dal documento Word
    """
    doc = Document(file_path)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text