Edurag_beta / utils /helpers.py
Nugh75's picture
ristrutturazione file
352ebdd
raw
history blame
850 Bytes
# utils/helpers.py
import logging
from app.document_handling import extract_text_from_pdf, extract_text_from_docx
def extract_text_from_files(files):
"""
Estrae e concatena il testo da file PDF, DOCX e TXT.
Args:
files (list): Lista di file caricati.
Returns:
str: Testo concatenato estratto dai file.
"""
text = ""
for file in files:
try:
if file.name.endswith('.pdf'):
text += extract_text_from_pdf(file.name)
elif file.name.endswith('.docx'):
text += extract_text_from_docx(file.name)
else:
with open(file.name, 'r', encoding='utf-8') as f:
text += f.read()
except Exception as e:
logging.error(f"Errore durante la lettura del file {file.name}: {e}")
return text