Spaces:
Sleeping
Sleeping
File size: 850 Bytes
352ebdd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# utils/helpers.py
import logging
from app.document_handling import extract_text_from_pdf, extract_text_from_docx
def extract_text_from_files(files):
"""
Estrae e concatena il testo da file PDF, DOCX e TXT.
Args:
files (list): Lista di file caricati.
Returns:
str: Testo concatenato estratto dai file.
"""
text = ""
for file in files:
try:
if file.name.endswith('.pdf'):
text += extract_text_from_pdf(file.name)
elif file.name.endswith('.docx'):
text += extract_text_from_docx(file.name)
else:
with open(file.name, 'r', encoding='utf-8') as f:
text += f.read()
except Exception as e:
logging.error(f"Errore durante la lettura del file {file.name}: {e}")
return text
|