Spaces:
Sleeping
Sleeping
from functools import partial | |
from pathlib import Path | |
from typing import Iterable, Callable | |
import spacy | |
from spacy.training import Example | |
from spacy.tokens import DocBin, Doc | |
# make the factory work | |
# from scripts.rel_pipe import make_relation_extractor | |
# make the config work | |
# from scripts.rel_model import create_relation_model, create_classification_layer, create_instances, create_tensors | |
# from scripts.custom_comps.SpanCat_extention import build_mean_max_reducer1, build_mean_max_reducer2, build_mean_max_reducer3, build_mean_max_reducer4 | |
from typing import List, Tuple, cast | |
from thinc.api import Model, with_getitem, chain, list2ragged, Logistic | |
from thinc.api import Maxout, Linear, concatenate, glorot_uniform_init, PyTorchLSTM | |
from thinc.api import reduce_mean, reduce_max, reduce_first, reduce_last | |
from thinc.types import Ragged, Floats2d | |
from spacy.util import registry | |
from spacy.tokens import Doc | |
from spacy.ml.extract_spans import extract_spans | |
# @registry.layers("spacy.LinearLogistic.v1") | |
# def build_linear_logistic(nO=None, nI=None) -> Model[Floats2d, Floats2d]: | |
# """An output layer for multi-label classification. It uses a linear layer | |
# followed by a logistic activation. | |
# """ | |
# return chain(Linear(nO=nO, nI=nI, init_W=glorot_uniform_init), Logistic()) | |
def build_mean_max_reducer1(hidden_size: int, | |
dropout: float = 0.0) -> Model[Ragged, Floats2d]: | |
"""Reduce sequences by concatenating their mean and max pooled vectors, | |
and then combine the concatenated vectors with a hidden layer. | |
""" | |
return chain( | |
concatenate( | |
cast(Model[Ragged, Floats2d], reduce_last()), | |
cast(Model[Ragged, Floats2d], reduce_first()), | |
reduce_mean(), | |
reduce_max(), | |
), | |
Maxout(nO=hidden_size, normalize=True, dropout=dropout), | |
) | |
def build_mean_max_reducer2(hidden_size: int, | |
dropout: float = 0.0) -> Model[Ragged, Floats2d]: | |
"""Reduce sequences by concatenating their mean and max pooled vectors, | |
and then combine the concatenated vectors with a hidden layer. | |
""" | |
return chain( | |
concatenate( | |
cast(Model[Ragged, Floats2d], reduce_last()), | |
cast(Model[Ragged, Floats2d], reduce_first()), | |
reduce_mean(), | |
reduce_max(), | |
), Maxout(nO=hidden_size, normalize=True, dropout=dropout), | |
Maxout(nO=hidden_size, normalize=True, dropout=dropout)) | |
# @registry.layers("mean_max_reducer.v2") | |
# def build_mean_max_reducer2(hidden_size: int, | |
# depth: int) -> Model[Ragged, Floats2d]: | |
# """Reduce sequences by concatenating their mean and max pooled vectors, | |
# and then combine the concatenated vectors with a hidden layer. | |
# """ | |
# return chain( | |
# concatenate( | |
# cast(Model[Ragged, Floats2d], reduce_last()), | |
# cast(Model[Ragged, Floats2d], reduce_first()), | |
# reduce_mean(), | |
# reduce_max(), | |
# ), Maxout(nO=hidden_size, normalize=True, dropout=0.0), | |
# PyTorchLSTM(nO=64, nI=hidden_size, bi=True, depth=depth, dropout=0.2)) | |
def build_mean_max_reducer3(hidden_size: int, | |
maxout_pieces: int = 3, | |
dropout: float = 0.0) -> Model[Ragged, Floats2d]: | |
"""Reduce sequences by concatenating their mean and max pooled vectors, | |
and then combine the concatenated vectors with a hidden layer. | |
""" | |
hidden_size2 = int(hidden_size / 2) | |
hidden_size3 = int(hidden_size / 2) | |
return chain( | |
concatenate( | |
cast(Model[Ragged, Floats2d], reduce_last()), | |
cast(Model[Ragged, Floats2d], reduce_first()), | |
reduce_mean(), | |
reduce_max(), | |
), | |
Maxout(nO=hidden_size, | |
nP=maxout_pieces, | |
normalize=True, | |
dropout=dropout), | |
Maxout(nO=hidden_size2, | |
nP=maxout_pieces, | |
normalize=True, | |
dropout=dropout), | |
Maxout(nO=hidden_size3, | |
nP=maxout_pieces, | |
normalize=True, | |
dropout=dropout)) | |
def build_mean_max_reducer4(hidden_size: int, | |
depth: int) -> Model[Ragged, Floats2d]: | |
"""Reduce sequences by concatenating their mean and max pooled vectors, | |
and then combine the concatenated vectors with a hidden layer. | |
""" | |
hidden_size2 = int(hidden_size / 2) | |
hidden_size3 = int(hidden_size / 2) | |
return chain( | |
concatenate( | |
cast(Model[Ragged, Floats2d], reduce_last()), | |
cast(Model[Ragged, Floats2d], reduce_first()), | |
reduce_mean(), | |
reduce_max(), | |
), Maxout(nO=hidden_size, nP=3, normalize=True, dropout=0.0), | |
Maxout(nO=hidden_size2, nP=3, normalize=True, dropout=0.0), | |
Maxout(nO=hidden_size3, nP=3, normalize=True, dropout=0.0)) | |
def build_spancat_model( | |
tok2vec: Model[List[Doc], List[Floats2d]], | |
reducer: Model[Ragged, Floats2d], | |
scorer: Model[Floats2d, Floats2d], | |
) -> Model[Tuple[List[Doc], Ragged], Floats2d]: | |
"""Build a span categorizer model, given a token-to-vector model, a | |
reducer model to map the sequence of vectors for each span down to a single | |
vector, and a scorer model to map the vectors to probabilities. | |
tok2vec (Model[List[Doc], List[Floats2d]]): The tok2vec model. | |
reducer (Model[Ragged, Floats2d]): The reducer model. | |
scorer (Model[Floats2d, Floats2d]): The scorer model. | |
""" | |
model = chain( | |
cast( | |
Model[Tuple[List[Doc], Ragged], Tuple[Ragged, Ragged]], | |
with_getitem( | |
0, | |
chain(tok2vec, | |
cast(Model[List[Floats2d], Ragged], list2ragged()))), | |
), | |
extract_spans(), | |
reducer, | |
scorer, | |
) | |
model.set_ref("tok2vec", tok2vec) | |
model.set_ref("reducer", reducer) | |
model.set_ref("scorer", scorer) | |
return model | |
# @registry.architectures("spacy.SpanCategorizer.v1") | |
# def build_spancat_model( | |
# tok2vec: Model[List[Doc], List[Floats2d]], | |
# reducer: Model[Ragged, Floats2d], | |
# scorer: Model[Floats2d, Floats2d], | |
# ) -> Model[Tuple[List[Doc], Ragged], Floats2d]: | |
# """Build a span categorizer model, given a token-to-vector model, a | |
# reducer model to map the sequence of vectors for each span down to a single | |
# vector, and a scorer model to map the vectors to probabilities. | |
# tok2vec (Model[List[Doc], List[Floats2d]]): The tok2vec model. | |
# reducer (Model[Ragged, Floats2d]): The reducer model. | |
# scorer (Model[Floats2d, Floats2d]): The scorer model. | |
# """ | |
# model = chain( | |
# cast( | |
# Model[Tuple[List[Doc], Ragged], Tuple[Ragged, Ragged]], | |
# with_getitem( | |
# 0, | |
# chain(tok2vec, | |
# cast(Model[List[Floats2d], Ragged], list2ragged()))), | |
# ), | |
# extract_spans(), | |
# reducer, | |
# scorer, | |
# ) | |
# model.set_ref("tok2vec", tok2vec) | |
# model.set_ref("reducer", reducer) | |
# model.set_ref("scorer", scorer) | |
# return model |