File size: 850 Bytes
352ebdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# utils/helpers.py

import logging
from app.document_handling import extract_text_from_pdf, extract_text_from_docx

def extract_text_from_files(files):
    """
    Estrae e concatena il testo da file PDF, DOCX e TXT.

    Args:
        files (list): Lista di file caricati.

    Returns:
        str: Testo concatenato estratto dai file.
    """
    text = ""
    for file in files:
        try:
            if file.name.endswith('.pdf'):
                text += extract_text_from_pdf(file.name)
            elif file.name.endswith('.docx'):
                text += extract_text_from_docx(file.name)
            else:
                with open(file.name, 'r', encoding='utf-8') as f:
                    text += f.read()
        except Exception as e:
            logging.error(f"Errore durante la lettura del file {file.name}: {e}")
    return text