Spaces:
Sleeping
Sleeping
# utils/helpers.py | |
import logging | |
from app.document_handling import extract_text_from_pdf, extract_text_from_docx | |
def extract_text_from_files(files): | |
""" | |
Estrae e concatena il testo da file PDF, DOCX e TXT. | |
Args: | |
files (list): Lista di file caricati. | |
Returns: | |
str: Testo concatenato estratto dai file. | |
""" | |
text = "" | |
for file in files: | |
try: | |
if file.name.endswith('.pdf'): | |
text += extract_text_from_pdf(file.name) | |
elif file.name.endswith('.docx'): | |
text += extract_text_from_docx(file.name) | |
else: | |
with open(file.name, 'r', encoding='utf-8') as f: | |
text += f.read() | |
except Exception as e: | |
logging.error(f"Errore durante la lettura del file {file.name}: {e}") | |
return text | |