Smart-Tasker / data_ingestion /ingest_data.py
Shahid
Added first commit
af30a30
raw
history blame contribute delete
390 Bytes
# data_ingestion/ingest_data.py
from docx import Document
def read_document(file_path):
"""Reads a Word document and extracts text content from each line."""
document = Document(file_path)
text_data = []
for para in document.paragraphs:
line = para.text.strip()
if line: # Only add non-empty lines
text_data.append(line)
return text_data