syntactic_tree / utils.py
nanom's picture
Minors fixes
65fca12
raw
history blame
3.07 kB
import spacy
import subprocess
from typing import Tuple
from spacy import displacy
from anytree import Node, RenderTree
class Pipeline:
def __init__(
self
) -> None:
self.nlp = None
self.__ch_html_tree = None
self.__ch_str_tree = None
self.__ch_sentence = None
self.__init_nlp(model="en_core_web_md")
def __init_nlp(
self,
model: str
) -> None:
self.nlp = None
try:
self.nlp = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True
).communicate()
self.nlp = spacy.load(model)
def __postag(
self,
tk: str
) -> str:
tag = ""
plural_tags = ["NNS", "NNPS"]
if tk.tag_ in plural_tags:
tag = " ({}) (Plural)".format(tk.tag_)
else:
tag = " ({})".format(tk.tag_)
return tag
def __genSyntacticTree(
self,
expr: str
) -> Tuple[str,str]:
doc = self.nlp(expr)
root = next(doc.sents).root
node = Node("" + root.text + ": (Root)" + self.__postag(root), parent=None)
def tree(
tk: str,
last_node: Node,
depth: int
) -> None:
if tk.n_lefts + tk.n_rights > 0:
for child in tk.children:
tree(
child,
Node(
"" + child.text + ": " + str(depth + 1) + self.__postag(child),
parent=last_node
),
depth+1
)
tree(root, node, 0)
syntactic_str_tree = ""
for pre, fill, node in RenderTree(node):
syntactic_str_tree += """{}{}\n""".format(pre, node.name)
syntactic_tree = displacy.render(doc, style='dep', options={'distance': 100})
syntactic_html_tree = f"""
<center>
<div style='max-width: 800px; overflow-x:auto;'>
{syntactic_tree}
</div>
</center>
"""
return syntactic_html_tree, syntactic_str_tree
def compute(
self,
sentence: str
) -> Tuple[str,str,str]:
error = ""
error_template = """
<center>
<div class="alert alert-warning" role="alert">
<h6><b>{}</b></h6>
</div>
</center>
"""
if sentence.strip() == "":
error = error_template.format("The sentence can not be empty!")
return error, "", ""
if sentence != self.__ch_sentence:
self.__ch_sentence = sentence
self.__ch_html_tree, self.__ch_str_tree = self.__genSyntacticTree(sentence)
return error, self.__ch_html_tree, self.__ch_str_tree