|
import spacy
|
|
from spacy import displacy
|
|
from streamlit.components.v1 import html
|
|
import base64
|
|
|
|
from collections import Counter
|
|
import re
|
|
from ..utils.widget_utils import generate_unique_key
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
POS_COLORS = {
|
|
'ADJ': '#FFA07A',
|
|
'ADP': '#98FB98',
|
|
'ADV': '#87CEFA',
|
|
'AUX': '#DDA0DD',
|
|
'CCONJ': '#F0E68C',
|
|
'DET': '#FFB6C1',
|
|
'INTJ': '#FF6347',
|
|
'NOUN': '#90EE90',
|
|
'NUM': '#FAFAD2',
|
|
'PART': '#D3D3D3',
|
|
'PRON': '#FFA500',
|
|
'PROPN': '#20B2AA',
|
|
'SCONJ': '#DEB887',
|
|
'SYM': '#7B68EE',
|
|
'VERB': '#FF69B4',
|
|
'X': '#A9A9A9',
|
|
}
|
|
|
|
POS_TRANSLATIONS = {
|
|
'es': {
|
|
'ADJ': 'Adjetivo',
|
|
'ADP': 'Preposición',
|
|
'ADV': 'Adverbio',
|
|
'AUX': 'Auxiliar',
|
|
'CCONJ': 'Conjunción Coordinante',
|
|
'DET': 'Determinante',
|
|
'INTJ': 'Interjección',
|
|
'NOUN': 'Sustantivo',
|
|
'NUM': 'Número',
|
|
'PART': 'Partícula',
|
|
'PRON': 'Pronombre',
|
|
'PROPN': 'Nombre Propio',
|
|
'SCONJ': 'Conjunción Subordinante',
|
|
'SYM': 'Símbolo',
|
|
'VERB': 'Verbo',
|
|
'X': 'Otro',
|
|
},
|
|
'en': {
|
|
'ADJ': 'Adjective',
|
|
'ADP': 'Preposition',
|
|
'ADV': 'Adverb',
|
|
'AUX': 'Auxiliary',
|
|
'CCONJ': 'Coordinating Conjunction',
|
|
'DET': 'Determiner',
|
|
'INTJ': 'Interjection',
|
|
'NOUN': 'Noun',
|
|
'NUM': 'Number',
|
|
'PART': 'Particle',
|
|
'PRON': 'Pronoun',
|
|
'PROPN': 'Proper Noun',
|
|
'SCONJ': 'Subordinating Conjunction',
|
|
'SYM': 'Symbol',
|
|
'VERB': 'Verb',
|
|
'X': 'Other',
|
|
},
|
|
'fr': {
|
|
'ADJ': 'Adjectif',
|
|
'ADP': 'Préposition',
|
|
'ADV': 'Adverbe',
|
|
'AUX': 'Auxiliaire',
|
|
'CCONJ': 'Conjonction de Coordination',
|
|
'DET': 'Déterminant',
|
|
'INTJ': 'Interjection',
|
|
'NOUN': 'Nom',
|
|
'NUM': 'Nombre',
|
|
'PART': 'Particule',
|
|
'PRON': 'Pronom',
|
|
'PROPN': 'Nom Propre',
|
|
'SCONJ': 'Conjonction de Subordination',
|
|
'SYM': 'Symbole',
|
|
'VERB': 'Verbe',
|
|
'X': 'Autre',
|
|
}
|
|
}
|
|
|
|
def generate_arc_diagram(doc):
|
|
arc_diagrams = []
|
|
for sent in doc.sents:
|
|
words = [token.text for token in sent]
|
|
|
|
svg_width = max(600, len(words) * 120)
|
|
|
|
svg_height = 350
|
|
|
|
|
|
html = displacy.render(sent, style="dep", options={
|
|
"add_lemma":False,
|
|
"arrow_spacing": 12,
|
|
"arrow_width": 2,
|
|
"arrow_stroke": 2,
|
|
"collapse_punct": True,
|
|
"collapse_phrases": False,
|
|
"compact":False,
|
|
"color": "#ffffff",
|
|
"bg": "#0d6efd",
|
|
"compact": False,
|
|
"distance": 100,
|
|
"fine_grained": False,
|
|
"offset_x": 55,
|
|
"word_spacing": 25,
|
|
})
|
|
|
|
|
|
html = re.sub(r'width="(\d+)"', f'width="{svg_width}"', html)
|
|
html = re.sub(r'height="(\d+)"', f'height="{svg_height}"', html)
|
|
html = re.sub(r'<svg', f'<svg viewBox="0 0 {svg_width} {svg_height}"', html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
html = re.sub(r'<g transform="translate\((\d+),(\d+)\)"',
|
|
lambda m: f'<g transform="translate({m.group(1)},10)"', html)
|
|
|
|
|
|
|
|
html = html.replace('dy="1em"', 'dy="-1em"')
|
|
|
|
|
|
html = html.replace('dy="0.25em"', 'dy="-3em"')
|
|
|
|
|
|
html = html.replace('.displacy-tag {', '.displacy-tag { font-size: 14px;')
|
|
|
|
|
|
|
|
|
|
arc_diagrams.append(html)
|
|
return arc_diagrams
|
|
|
|
|
|
|
|
def perform_advanced_morphosyntactic_analysis(text, nlp):
|
|
doc = nlp(text)
|
|
arc_diagrams = generate_arc_diagram(doc)
|
|
return {
|
|
'arc_diagrams': arc_diagrams,
|
|
}
|
|
|
|
__all__ = ['perform_advanced_morphosyntactic_analysis'] |