Spaces:
Runtime error
Runtime error
"""Describes the foundational level elements (layer 1) of the UCCA annotation. | |
Layer 1 is the foundational layer of UCCA, whose Nodes and Edges represent | |
scene objects and relations. The basic building blocks of this layer are | |
the FNode, which is a participant in a scene relation (including the | |
relation itself), and the various Edges between these Nodes, which represent | |
the type of relation between the Nodes. | |
""" | |
import itertools | |
import operator | |
from ucca import core, layer0 | |
LAYER_ID = '1' | |
class NodeTags: | |
"""Layer 1 Node tags.""" | |
Foundational = 'FN' | |
Linkage = 'LKG' | |
Punctuation = 'PNCT' | |
__init__ = None | |
class EdgeTags: | |
"""Layer 1 Edge tags.""" | |
Unanalyzable = 'UNA' | |
Uncertain = 'UNC' | |
ParallelScene = 'H' | |
Participant = 'A' | |
Process = 'P' | |
State = 'S' | |
Adverbial = 'D' | |
Ground = 'G' | |
Center = 'C' | |
Elaborator = 'E' | |
Function = 'F' | |
Connector = 'N' | |
Relator = 'R' | |
Time = 'T' | |
Quantifier = 'Q' | |
Linker = 'L' | |
Punctuation = 'U' | |
LinkRelation = 'LR' | |
LinkArgument = 'LA' | |
Terminal = 'Terminal' | |
__init__ = None | |
# Attribute entries | |
ATTRIB_KEYS = ('remote', 'implicit', 'uncertain', 'suggest') | |
class MissingRelationError(core.UCCAError): | |
"""Exception raised when a required edge is not present.""" | |
pass | |
def _single_child_by_tag(node, tag, must=True): | |
"""Returns the Node which is connected with an Edge with the given tag. | |
Assumes that there is only one Node connected with an Edge with this tag. | |
Args: | |
node: the Node which is the parent of the Edge (and returned Node). | |
tag: the tag of the Edge to look for. | |
must: if set to True (default), if no Node is found, raise an | |
exception. Otherwise, returns None if not found. | |
Returns: | |
The connected Node, or None if not found | |
Raises: | |
MissingRelationError if Node not found and must is set to True | |
""" | |
for edge in node: | |
if tag in edge.tags: | |
return edge.child | |
if must: | |
raise MissingRelationError(node.ID, tag) | |
return None | |
def _multiple_children_by_tag(node, tag): | |
"""Returns the Nodes which are connected with an Edge with the given tag. | |
Args: | |
node: the Node which is the parent of the Edge (and returned Nodes). | |
tag: the tag of the Edges to look for. | |
Returns: | |
A list of connected Nodes, can be empty | |
""" | |
return [edge.child for edge in node if tag in edge.tags] | |
class Linkage(core.Node): | |
"""A Linkage between parallel scenes. | |
A Linkage object represents a connection between two parallel scenes. | |
The semantic type of the link is not determined in this object, but the | |
:class:`FoundationalNode` of linkage is referred as the link relation, | |
and the linked scenes are referred to as the arguments. | |
Most cases will have two arguments, but some constructions have 1 or 3+ | |
arguments, depending on the semantic connection. | |
Attributes: | |
relation: FoundationalNode of the relation words. | |
arguments: list of FoundationalNodes of the relation participants. | |
""" | |
def relation(self): | |
return _single_child_by_tag(self, EdgeTags.LinkRelation) | |
def arguments(self): | |
return _multiple_children_by_tag(self, EdgeTags.LinkArgument) | |
def __str__(self): | |
return "{}-->{}".format(str(self.relation.ID), | |
','.join(x.ID for x in self.arguments)) | |
class FoundationalNode(core.Node): | |
"""The basic building block of UCCA annotation, represents semantic units. | |
Each FoundationalNode (FNode for short) represents a semantic unit in the | |
text, with relations to other semantic units. In essence, the FNodes form | |
a tree of annotation, when remote units are ignored. This means that each | |
FNode has exactly one FNode parent, and for completeness, there is also | |
a "Passage Head" FNode which is the FNode parent of all parallel scenes and | |
linkers in the top-level of the annotation. | |
Remote units are FNodes which are shared between two or more different | |
FNodes, and hence have two FNode parents (participate in two relations). | |
In such cases there is only one FNode parent, as the other Edges to parents | |
are marked with the 'remote' attribute (set to True). | |
Implicit Nodes are ones which aren't mentioned in the text, and hence | |
doesn't have any Terminal units in their span. In such cases, they will | |
have an 'implicit' attribute set to True, and will take the position -1 | |
(both start and end positions). | |
Attributes: | |
participants: | |
adverbials: | |
connector: | |
grounds: | |
elaborators: | |
centers: | |
linkers: | |
parallel_scenes: | |
functions: | |
punctuation: | |
terminals: | |
a list of all FNodes under self whose edge tag is one of | |
these types. | |
process: | |
state: | |
time: | |
relator: | |
Returns the FNode under self whose edge tag is one of these types, | |
or None in case it isn't found. | |
start_position: | |
end_position: | |
start/end position of the first/last terminal in the span of | |
the FNode, without counting in remote FNodes. If the FNode is | |
implicit or have no Terminals for some reason, returns -1 (both). | |
fparent: the FNode parent (FNode with incoming Edge, not remote) of | |
this FNode. There is exactly one for each FNode except the Passage | |
head, which returns None. | |
ftag: the tag of the Edge connecting the fparent (as described above) | |
with this FNode | |
discontiguous: whether this FNode has continuous Terminals or not | |
""" | |
def participants(self): | |
return _multiple_children_by_tag(self, EdgeTags.Participant) | |
def adverbials(self): | |
return _multiple_children_by_tag(self, EdgeTags.Adverbial) | |
def times(self): | |
return _multiple_children_by_tag(self, EdgeTags.Time) | |
def quantifiers(self): | |
return _multiple_children_by_tag(self, EdgeTags.Quantifier) | |
def grounds(self): | |
return _multiple_children_by_tag(self, EdgeTags.Ground) | |
def centers(self): | |
return _multiple_children_by_tag(self, EdgeTags.Center) | |
def elaborators(self): | |
return _multiple_children_by_tag(self, EdgeTags.Elaborator) | |
def linkers(self): | |
return _multiple_children_by_tag(self, EdgeTags.Linker) | |
def parallel_scenes(self): | |
return _multiple_children_by_tag(self, EdgeTags.ParallelScene) | |
def functions(self): | |
return _multiple_children_by_tag(self, EdgeTags.Function) | |
def punctuation(self): | |
return _multiple_children_by_tag(self, EdgeTags.Punctuation) | |
def terminals(self): | |
return _multiple_children_by_tag(self, EdgeTags.Terminal) | |
def process(self): | |
return _single_child_by_tag(self, EdgeTags.Process, False) | |
def state(self): | |
return _single_child_by_tag(self, EdgeTags.State, False) | |
def connector(self): | |
return _single_child_by_tag(self, EdgeTags.Connector, False) | |
def relator(self): | |
return _single_child_by_tag(self, EdgeTags.Relator, False) | |
def _fedge(self): | |
"""Returns the Edge of the fparent, or None.""" | |
for edge in self.incoming: | |
if (edge.parent.layer.ID == LAYER_ID and | |
edge.parent.tag == NodeTags.Foundational and | |
not edge.attrib.get('remote')): | |
return edge | |
return None | |
def fparent(self): | |
edge = self._fedge() | |
return edge.parent if edge else None | |
def ftag(self): | |
edge = self._fedge() | |
return edge.tag if edge else None | |
def ftags(self): | |
edge = self._fedge() | |
return edge.tags if edge else None | |
def get_terminals(self, punct=True, remotes=False, visited=None): | |
"""Returns a list of all terminals under the span of this FoundationalNode. | |
:param punct: whether to include punctuation Terminals, defaults to True | |
:param remotes: whether to include Terminals from remote FoundationalNodes, defaults to false | |
:param visited: used to detect cycles | |
:return: a list of :class:`layer0`.Terminal objects | |
""" | |
if visited is None: | |
return sorted(self.get_terminals(punct=punct, remotes=remotes, visited=set()), | |
key=operator.attrgetter("position")) | |
outgoing = {e for e in set(self) - visited if remotes or not e.attrib.get("remote")} | |
return [t for e in outgoing for t in e.child.get_terminals( | |
punct=punct, remotes=remotes, visited=visited | outgoing)] | |
def start_position(self): | |
try: | |
return self.get_terminals()[0].position | |
except IndexError: # implicit unit or having no Terminals | |
return -1 | |
def end_position(self): | |
try: | |
return self.get_terminals()[-1].position | |
except IndexError: # implicit unit or having no Terminals | |
return -1 | |
def discontiguous(self): | |
terms = self.get_terminals() | |
return any(terms[i].position + 1 != terms[i + 1].position | |
for i in range(len(terms) - 1)) | |
def get_sequences(self): | |
if self.attrib.get('implicit'): | |
return [] | |
pos = sorted([x.position for x in self.get_terminals()]) | |
# all terminals which end a sequence, including the last one | |
seq_closers = [pos[i] for i in range(len(pos) - 1) | |
if pos[i] + 1 < pos[i + 1]] + [pos[-1]] | |
# all terminals which start a sequence, including the first one | |
seq_openers = [pos[0]] + [pos[i] for i in range(1, len(pos)) | |
if pos[i - 1] < pos[i] - 1] | |
return [(op, cl) for op, cl in zip(seq_openers, seq_closers)] | |
def to_text(self): | |
"""Returns the text in the span of self, separated by spaces.""" | |
return ' '.join(t.text for t in self.get_terminals()) | |
def is_scene(self): | |
return self.state is not None or self.process is not None | |
def __str__(self): | |
def start(e): | |
return e.child.position if e.child.layer.ID == layer0.LAYER_ID else e.child.start_position | |
sorted_edges = sorted(self, key=start) | |
output = [] | |
for edge, next_edge in zip(sorted_edges, sorted_edges[1:] + [None]): | |
node = edge.child | |
remote = edge.attrib.get('remote') | |
end = node.position if node.layer.ID == layer0.LAYER_ID else node.end_position | |
if edge.tag == EdgeTags.Terminal: | |
output.append(str(node)) | |
if end != self.end_position: | |
output.append(" ") | |
else: | |
edge_tags = "|".join(edge.tags) | |
if remote: | |
edge_tags += '*' | |
if edge.attrib.get('uncertain'): | |
edge_tags += '?' | |
if start(edge) == -1: | |
output.append("[{} IMPLICIT] ".format(edge_tags)) | |
else: | |
output.append("[{} {}] ".format(edge_tags, str(node))) | |
if start(edge) != -1 and not remote and next_edge is not None and end + 1 < start(next_edge): | |
output.append("... ") # adding '...' if discontiguous | |
return "".join(output) | |
def get_top_scene(self): | |
"""Returns the top-level scene this FNode is within, or None""" | |
if self in self.layer.top_scenes: | |
return self | |
elif self.fparent is None: | |
return None | |
else: | |
return self.fparent.get_top_scene() | |
class PunctNode(FoundationalNode): | |
"""Encapsulates punctuation :class:`layer0`.Terminal objects. | |
Attributes: | |
terminals: return the :class:`layer0`.Terminal objects encapsulated | |
by this Node in a list (at least one, usually not more than 1). | |
start_position: | |
end_position: | |
start/end position of the first/last terminal in the span of | |
the PunctNode. | |
""" | |
def add(self, edge_tag, node, *, edge_attrib=None): | |
if node.layer.ID != layer0.LAYER_ID: | |
raise ValueError("Non-terminal child (%s) for %s node (%s)" % (node.ID, NodeTags.Punctuation, self.ID)) | |
if not layer0.is_punct(node): | |
node.tag = layer0.NodeTags.Punct | |
# raise ValueError("%s child (%s) for %s node (%s)" % (node.tag, node.ID, NodeTags.Punctuation, self.ID)) | |
super().add(edge_tag, node, edge_attrib=None) | |
def terminals(self): | |
return self.children | |
def get_terminals(self, punct=True, *args, **kwargs): | |
"""Returns a list of all terminals under the span of this PunctNode. | |
:param punct: whether to include punctuation Terminals, defaults to True | |
:return: a list of :class:`layer0`.Terminal objects | |
""" | |
return self.children if punct else () | |
def __str__(self): | |
return self.to_text() | |
class Layer1(core.Layer): | |
""" | |
""" | |
def __init__(self, root, attrib=None, *, orderkey=core.id_orderkey): | |
super().__init__(ID=LAYER_ID, root=root, attrib=attrib, | |
orderkey=orderkey) | |
self._scenes = [] | |
self._linkages = [] | |
self._head_fnode = FoundationalNode(root=root, | |
tag=NodeTags.Foundational, | |
ID=self.next_id()) | |
self._all = [self._head_fnode] | |
self._heads = [self._head_fnode] | |
def top_scenes(self): | |
return self._scenes[:] | |
def top_linkages(self): | |
return self._linkages[:] | |
def next_id(self): | |
"""Returns the next available ID string for this layer.""" | |
for n in itertools.count(start=len(self._all) + 1): | |
id_str = "{}{}{}".format(LAYER_ID, core.Node.ID_SEPARATOR, n) | |
try: | |
self._root.by_id(id_str) | |
except KeyError: | |
return id_str | |
def add_fnode_multiple(self, parent, edge_categories, *, implicit=False, edge_attrib=None): | |
"""Adds a new :class:`FNode` whose parent and Edge tag are given. | |
:param parent: the FNode which will be the parent of the new FNode. | |
If the parent is None, adds under the layer head FNode. | |
:param edge_categories: list of categories on the Edge between the parent and the new FNode. | |
:param implicit: whether to set the new FNode as implicit (default False) | |
:param edge_attrib: Keyword only, dictionary of attributes to be passed | |
to the Edge initializer. | |
:return: the newly created FNode | |
:raise core.FrozenPassageError if the Passage is frozen | |
""" | |
if parent is None: | |
parent = self._head_fnode | |
node_attrib = {'implicit': True} if implicit else {} | |
fnode = FoundationalNode(root=self.root, tag=NodeTags.Foundational, | |
ID=self.next_id(), attrib=node_attrib) | |
if edge_categories: | |
parent.add_multiple(edge_categories, fnode, edge_attrib=edge_attrib) | |
return fnode | |
def add_fnode(self, parent, tag, *, implicit=False): | |
return self.add_fnode_multiple(parent, [(tag,)], implicit=implicit) | |
def add_remote_multiple(self, parent, edge_categories, child, edge_attrib=None): | |
"""Adds a new :class:`core`.Edge with remote attribute between the nodes. | |
:param parent: the parent of the remote Edge | |
:param edge_categories: list of categories of the Edge | |
:param child: the child of the remote Edge | |
:param edge_attrib: Keyword only, dictionary of attributes to be passed | |
to the Edge initializer. | |
:raise core.FrozenPassageError if the Passage is frozen | |
""" | |
if edge_attrib is None: | |
edge_attrib = {} | |
edge_attrib["remote"] = True | |
return parent.add_multiple(edge_categories, child, edge_attrib=edge_attrib) | |
def add_remote(self, parent, tag, child): | |
return self.add_remote_multiple(parent, [(tag,)], child) | |
def add_punct(self, parent, terminal, layer=None, slot=None, edge_attrib=None): | |
"""Adds a PunctNode as the child of parent and the Terminal under it. | |
:param parent: the parent of the newly created PunctNode. If None, adds | |
under rhe layer head FNode. | |
:param terminal: the punctuation Terminal we want to put under parent. | |
:param edge_attrib: Keyword only, dictionary of attributes to be passed | |
to the Edge initializer. | |
:return: the newly create PunctNode. | |
:raise core.FrozenPassageError if the Passage is frozen. | |
""" | |
if parent is None: | |
parent = self._head_fnode | |
punct_node = PunctNode(root=self.root, tag=NodeTags.Punctuation, | |
ID=self.next_id()) | |
parent.add_multiple([(EdgeTags.Punctuation, slot, layer)], punct_node, edge_attrib=edge_attrib) | |
punct_node.add_multiple([(EdgeTags.Terminal, slot, layer)], terminal) | |
return punct_node | |
def add_linkage(self, relation, *args): | |
"""Adds a Linkage between the link relation and the linked arguments. | |
Linkage objects are all heads and have no parents. | |
:param relation: the link relation FNode. | |
:param args: any number (at least 1) of linkage arguments FNodes. | |
:return: the newly created Linkage | |
:raise core.FrozenPassageError if the Passage is frozen. | |
""" | |
linkage = Linkage(root=self.root, tag=NodeTags.Linkage, | |
ID=self.next_id()) | |
linkage.add(EdgeTags.LinkRelation, relation) | |
for arg in args: | |
linkage.add(EdgeTags.LinkArgument, arg) | |
return linkage | |
def _check_top_scene(self, node): | |
"""Checks whether a node is a scene, and a top-level one. | |
A top level scene is one which is not embedded in any other scene. | |
:param node: the FNode to check. | |
:return: True iff node is a top-level scenes. | |
""" | |
if not node.is_scene(): | |
return False | |
while node.fparent not in (None, self._head_fnode): | |
node = node.fparent | |
if node.is_scene(): | |
return False | |
return True | |
def _update_top_scene(self, node): | |
"""Adds/removes the node if it's a top-level scene.""" | |
if node.tag != NodeTags.Foundational: | |
return | |
if node in self._scenes and not self._check_top_scene(node): | |
self._scenes.remove(node) | |
elif node not in self._scenes and self._check_top_scene(node): | |
self._scenes.append(node) | |
# Other scenes may now become not top-level, check it | |
for ts in self._scenes[:-1]: | |
if not self._check_top_scene(ts): | |
self._scenes.remove(ts) | |
self._scenes.sort(key=self.orderkey) | |
def _update_top_linkage(self, linkage): | |
"""Adds/removes the linkage if it's a top level linkage.""" | |
if all(fnode in self._scenes for fnode in linkage.arguments): | |
if linkage not in self._linkages: | |
self._linkages.append(linkage) | |
self._linkages.sort(key=self.orderkey) | |
elif linkage in self._linkages: | |
self._linkages.remove(linkage) | |
def _update_edge(self, edge): | |
"""Adds the Edge to the Layer, and updates top scenes and linkers.""" | |
self._update_top_scene(edge.parent) | |
self._update_top_scene(edge.child) | |
for lkg in [x for x in edge.parent.parents | |
if x.tag == NodeTags.Linkage]: | |
self._update_top_linkage(lkg) | |
for lkg in [x for x in edge.child.parents | |
if x.tag == NodeTags.Linkage]: | |
self._update_top_linkage(lkg) | |
def _add_edge(self, edge): | |
super()._add_edge(edge) | |
self._update_edge(edge) | |
def _remove_edge(self, edge): | |
super()._remove_edge(edge) | |
self._update_edge(edge) | |
def _change_edge_tag(self, edge, old_tag): | |
super()._change_edge_tag(edge, old_tag) | |
self._update_edge(edge) | |