Spaces:
Sleeping
Sleeping
# data_ingestion/ingest_data.py | |
from docx import Document | |
def read_document(file_path): | |
"""Reads a Word document and extracts text content from each line.""" | |
document = Document(file_path) | |
text_data = [] | |
for para in document.paragraphs: | |
line = para.text.strip() | |
if line: # Only add non-empty lines | |
text_data.append(line) | |
return text_data | |