Spaces:
Sleeping
Sleeping
File size: 390 Bytes
af30a30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
# data_ingestion/ingest_data.py
from docx import Document
def read_document(file_path):
"""Reads a Word document and extracts text content from each line."""
document = Document(file_path)
text_data = []
for para in document.paragraphs:
line = para.text.strip()
if line: # Only add non-empty lines
text_data.append(line)
return text_data
|