# data_ingestion/ingest_data.py from docx import Document def read_document(file_path): """Reads a Word document and extracts text content from each line.""" document = Document(file_path) text_data = [] for para in document.paragraphs: line = para.text.strip() if line: # Only add non-empty lines text_data.append(line) return text_data