#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import spacy from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--txt_file", default=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(), type=str ) args = parser.parse_args() return args def main(): args = get_args() with open(args.txt_file, "r", encoding="utf-8") as f: data = f.read() # print(data) nlp = spacy.load("zh_core_web_sm") doc = nlp(data) for sentence in doc.sents: text = sentence.text.strip() if len(text) == 0: continue print(text) print("-" * 150) return if __name__ == '__main__': main()