Spaces:
Running
Running
File size: 791 Bytes
f25c867 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import spacy
from project_settings import project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--txt_file",
default=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(),
type=str
)
args = parser.parse_args()
return args
def main():
args = get_args()
with open(args.txt_file, "r", encoding="utf-8") as f:
data = f.read()
# print(data)
nlp = spacy.load("zh_core_web_sm")
doc = nlp(data)
for sentence in doc.sents:
text = sentence.text.strip()
if len(text) == 0:
continue
print(text)
print("-" * 150)
return
if __name__ == '__main__':
main()
|