e_book_reading / examples /paragraph /paragraph_to_sentence.py
HoneyTian's picture
update
f25c867
raw
history blame
791 Bytes
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import spacy
from project_settings import project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--txt_file",
default=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(),
type=str
)
args = parser.parse_args()
return args
def main():
args = get_args()
with open(args.txt_file, "r", encoding="utf-8") as f:
data = f.read()
# print(data)
nlp = spacy.load("zh_core_web_sm")
doc = nlp(data)
for sentence in doc.sents:
text = sentence.text.strip()
if len(text) == 0:
continue
print(text)
print("-" * 150)
return
if __name__ == '__main__':
main()