Spaces:
Running
Running
File size: 3,069 Bytes
74a2352 a409eda 74a2352 a409eda 74a2352 a409eda 65fca12 74a2352 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import spacy
import subprocess
from typing import Tuple
from spacy import displacy
from anytree import Node, RenderTree
class Pipeline:
def __init__(
self
) -> None:
self.nlp = None
self.__ch_html_tree = None
self.__ch_str_tree = None
self.__ch_sentence = None
self.__init_nlp(model="en_core_web_md")
def __init_nlp(
self,
model: str
) -> None:
self.nlp = None
try:
self.nlp = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True
).communicate()
self.nlp = spacy.load(model)
def __postag(
self,
tk: str
) -> str:
tag = ""
plural_tags = ["NNS", "NNPS"]
if tk.tag_ in plural_tags:
tag = " ({}) (Plural)".format(tk.tag_)
else:
tag = " ({})".format(tk.tag_)
return tag
def __genSyntacticTree(
self,
expr: str
) -> Tuple[str,str]:
doc = self.nlp(expr)
root = next(doc.sents).root
node = Node("" + root.text + ": (Root)" + self.__postag(root), parent=None)
def tree(
tk: str,
last_node: Node,
depth: int
) -> None:
if tk.n_lefts + tk.n_rights > 0:
for child in tk.children:
tree(
child,
Node(
"" + child.text + ": " + str(depth + 1) + self.__postag(child),
parent=last_node
),
depth+1
)
tree(root, node, 0)
syntactic_str_tree = ""
for pre, fill, node in RenderTree(node):
syntactic_str_tree += """{}{}\n""".format(pre, node.name)
syntactic_tree = displacy.render(doc, style='dep', options={'distance': 100})
syntactic_html_tree = f"""
<center>
<div style='max-width: 800px; overflow-x:auto;'>
{syntactic_tree}
</div>
</center>
"""
return syntactic_html_tree, syntactic_str_tree
def compute(
self,
sentence: str
) -> Tuple[str,str,str]:
error = ""
error_template = """
<center>
<div class="alert alert-warning" role="alert">
<h6><b>{}</b></h6>
</div>
</center>
"""
if sentence.strip() == "":
error = error_template.format("The sentence can not be empty!")
return error, "", ""
if sentence != self.__ch_sentence:
self.__ch_sentence = sentence
self.__ch_html_tree, self.__ch_str_tree = self.__genSyntacticTree(sentence)
return error, self.__ch_html_tree, self.__ch_str_tree |