Spaces:
Running
Running
File size: 5,038 Bytes
91eaff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
These classes represent graph elements.
Consider this "flavor" of graph representation to be a superset of
`openCypher` _labeled property graphs_ (LPG) with additional support
for probabilistic graphs.
Imposing a discipline of IRIs for node names and edge relations
helps guarantee that a view of the graph can be exported to RDF
for data quality checks, transitive closure, semantic inference,
and so on.
see copyright/license https://huggingface.co/spaces/DerwenAI/textgraphs/blob/main/README.md
"""
from dataclasses import dataclass, field
import typing
import spacy # pylint: disable=E0401
from .util import EnumBase
######################################################################
## class definitions
@dataclass(order=False, frozen=False)
class KGSearchHit: # pylint: disable=R0902
"""
A data class representing a hit from a _knowledge graph_ search.
"""
iri: str
label: str
descrip: str
aliases: typing.List[ str ]
prob: float
@dataclass(order=False, frozen=False)
class LinkedEntity: # pylint: disable=R0902
"""
A data class representing one linked entity.
"""
span: typing.Optional[ spacy.tokens.span.Span ]
iri: str
length: int
rel: str
prob: float
token_id: int
kg_ent: typing.Optional[ KGSearchHit ]
count: int = 1
@dataclass(order=False, frozen=False)
class NounChunk: # pylint: disable=R0902
"""
A data class representing one noun chunk, i.e., a candidate as an extracted phrase.
"""
span: spacy.tokens.span.Span
text: str
length: int
lemma_key: str
unseen: bool
sent_id: int
start: int = 0
class NodeEnum (EnumBase):
"""
Enumeration for the kinds of node categories
"""
DEP = 0 # `spaCy` parse dependency
LEM = 1 # lemmatized token
ENT = 2 # named entity
CHU = 3 # noun chunk
IRI = 4 # IRI for linked entity
@property
def decoder (
self
) -> typing.List[ str ]:
"""
Decoder values
"""
return [
"dep",
"lem",
"ent",
"chu",
"iri",
]
@dataclass(order=False, frozen=False)
class Node: # pylint: disable=R0902
"""
A data class representing one node, i.e., an extracted phrase.
"""
node_id: int
key: str
text: str
pos: str
kind: NodeEnum
span: typing.Optional[ typing.Union[ spacy.tokens.span.Span, spacy.tokens.token.Token ]] = None
loc: typing.List[ typing.List[ int ] ] = field(default_factory = lambda: [])
label: typing.Optional[ str ] = None
length: int = 1
sub_obj: bool = False
count: int = 0
neighbors: int = 0
weight: float = 0.0
entity: typing.List[ LinkedEntity ] = field(default_factory = lambda: [])
annotated: bool = False
def get_linked_label (
self
) -> typing.Optional[ str ]:
"""
When this node has a linked entity, return that IRI.
Otherwise return its `label` value.
returns:
a label for the linked entity
"""
if len(self.entity) > 0:
return self.entity[0].iri
return self.label
def get_name (
self
) -> str:
"""
Return a brief name for the graphical depiction of this Node.
returns:
brief label to be used in a graph
"""
if self.kind == NodeEnum.IRI:
return self.label # type: ignore
if self.kind == NodeEnum.LEM:
return self.key
return self.text
def get_stacked_count (
self
) -> int:
"""
Return a modified count, to redact verbs and linked entities from
the stack-rank partitions.
returns:
count, used for re-ranking extracted entities
"""
if self.pos == "VERB" or self.kind == NodeEnum.IRI:
return 0
return self.count
def get_pos (
self
) -> typing.Tuple[ int, int ]:
"""
Generate a position span for `OpenNRE`.
returns:
a position span needed for `OpenNRE` relation extraction
"""
position: typing.Tuple[ int, int ] = ( self.span.idx, self.span.idx + len(self.text) - 1, ) # type: ignore # pylint: disable=C0301
return position
class RelEnum (EnumBase):
"""
Enumeration for the kinds of edge relations
"""
DEP = 0 # `spaCy` parse dependency
CHU = 1 # `spaCy` noun chunk
INF = 2 # `REBEL` or `OpenNRE` inferred relation
SYN = 3 # `sense2vec` inferred synonym
IRI = 4 # `DBPedia` or `Wikidata` linked entity
@property
def decoder (
self
) -> typing.List[ str ]:
"""
Decoder values
"""
return [
"dep",
"chu",
"inf",
"syn",
"iri",
]
@dataclass(order=False, frozen=False)
class Edge:
"""
A data class representing an edge between two nodes.
"""
src_node: int
dst_node: int
kind: RelEnum
rel: str
prob: float
count: int = 1
|