Spaces:
Running
Running
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
import argparse | |
import spacy | |
from project_settings import project_path | |
def get_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--txt_file", | |
default=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(), | |
type=str | |
) | |
args = parser.parse_args() | |
return args | |
def main(): | |
args = get_args() | |
with open(args.txt_file, "r", encoding="utf-8") as f: | |
data = f.read() | |
# print(data) | |
nlp = spacy.load("zh_core_web_sm") | |
doc = nlp(data) | |
for sentence in doc.sents: | |
text = sentence.text.strip() | |
if len(text) == 0: | |
continue | |
print(text) | |
print("-" * 150) | |
return | |
if __name__ == '__main__': | |
main() | |