Delete modules/morpho_analysis.py
Browse files- modules/morpho_analysis.py +0 -117
modules/morpho_analysis.py
DELETED
@@ -1,117 +0,0 @@
|
|
1 |
-
# /modules/morpho_analysis.py
|
2 |
-
import spacy
|
3 |
-
from collections import Counter
|
4 |
-
from spacy import displacy
|
5 |
-
import re
|
6 |
-
|
7 |
-
# Define colors for grammatical categories
|
8 |
-
POS_COLORS = {
|
9 |
-
'ADJ': '#FFA07A', # Light Salmon
|
10 |
-
'ADP': '#98FB98', # Pale Green
|
11 |
-
'ADV': '#87CEFA', # Light Sky Blue
|
12 |
-
'AUX': '#DDA0DD', # Plum
|
13 |
-
'CCONJ': '#F0E68C', # Khaki
|
14 |
-
'DET': '#FFB6C1', # Light Pink
|
15 |
-
'INTJ': '#FF6347', # Tomato
|
16 |
-
'NOUN': '#90EE90', # Light Green
|
17 |
-
'NUM': '#FAFAD2', # Light Goldenrod Yellow
|
18 |
-
'PART': '#D3D3D3', # Light Gray
|
19 |
-
'PRON': '#FFA500', # Orange
|
20 |
-
'PROPN': '#20B2AA', # Light Sea Green
|
21 |
-
'SCONJ': '#DEB887', # Burlywood
|
22 |
-
'SYM': '#7B68EE', # Medium Slate Blue
|
23 |
-
'VERB': '#FF69B4', # Hot Pink
|
24 |
-
'X': '#A9A9A9', # Dark Gray
|
25 |
-
}
|
26 |
-
|
27 |
-
POS_TRANSLATIONS = {
|
28 |
-
'es': {
|
29 |
-
'ADJ': 'Adjetivo',
|
30 |
-
'ADP': 'Adposici贸n',
|
31 |
-
'ADV': 'Adverbio',
|
32 |
-
'AUX': 'Auxiliar',
|
33 |
-
'CCONJ': 'Conjunci贸n Coordinante',
|
34 |
-
'DET': 'Determinante',
|
35 |
-
'INTJ': 'Interjecci贸n',
|
36 |
-
'NOUN': 'Sustantivo',
|
37 |
-
'NUM': 'N煤mero',
|
38 |
-
'PART': 'Part铆cula',
|
39 |
-
'PRON': 'Pronombre',
|
40 |
-
'PROPN': 'Nombre Propio',
|
41 |
-
'SCONJ': 'Conjunci贸n Subordinante',
|
42 |
-
'SYM': 'S铆mbolo',
|
43 |
-
'VERB': 'Verbo',
|
44 |
-
'X': 'Otro',
|
45 |
-
},
|
46 |
-
'en': {
|
47 |
-
'ADJ': 'Adjective',
|
48 |
-
'ADP': 'Adposition',
|
49 |
-
'ADV': 'Adverb',
|
50 |
-
'AUX': 'Auxiliary',
|
51 |
-
'CCONJ': 'Coordinating Conjunction',
|
52 |
-
'DET': 'Determiner',
|
53 |
-
'INTJ': 'Interjection',
|
54 |
-
'NOUN': 'Noun',
|
55 |
-
'NUM': 'Number',
|
56 |
-
'PART': 'Particle',
|
57 |
-
'PRON': 'Pronoun',
|
58 |
-
'PROPN': 'Proper Noun',
|
59 |
-
'SCONJ': 'Subordinating Conjunction',
|
60 |
-
'SYM': 'Symbol',
|
61 |
-
'VERB': 'Verb',
|
62 |
-
'X': 'Other',
|
63 |
-
},
|
64 |
-
'fr': {
|
65 |
-
'ADJ': 'Adjectif',
|
66 |
-
'ADP': 'Adposition',
|
67 |
-
'ADV': 'Adverbe',
|
68 |
-
'AUX': 'Auxiliaire',
|
69 |
-
'CCONJ': 'Conjonction de Coordination',
|
70 |
-
'DET': 'D茅terminant',
|
71 |
-
'INTJ': 'Interjection',
|
72 |
-
'NOUN': 'Nom',
|
73 |
-
'NUM': 'Nombre',
|
74 |
-
'PART': 'Particule',
|
75 |
-
'PRON': 'Pronom',
|
76 |
-
'PROPN': 'Nom Propre',
|
77 |
-
'SCONJ': 'Conjonction de Subordination',
|
78 |
-
'SYM': 'Symbole',
|
79 |
-
'VERB': 'Verbe',
|
80 |
-
'X': 'Autre',
|
81 |
-
}
|
82 |
-
}
|
83 |
-
|
84 |
-
#############################################################################################
|
85 |
-
def get_repeated_words_colors(doc):
|
86 |
-
word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
|
87 |
-
repeated_words = {word: count for word, count in word_counts.items() if count > 1}
|
88 |
-
|
89 |
-
word_colors = {}
|
90 |
-
for token in doc:
|
91 |
-
if token.text.lower() in repeated_words:
|
92 |
-
word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
|
93 |
-
|
94 |
-
return word_colors
|
95 |
-
|
96 |
-
######################################################################################################
|
97 |
-
def highlight_repeated_words(doc, word_colors):
|
98 |
-
highlighted_text = []
|
99 |
-
for token in doc:
|
100 |
-
if token.text.lower() in word_colors:
|
101 |
-
color = word_colors[token.text.lower()]
|
102 |
-
highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
|
103 |
-
else:
|
104 |
-
highlighted_text.append(token.text)
|
105 |
-
return ' '.join(highlighted_text)
|
106 |
-
|
107 |
-
#################################################################################################
|
108 |
-
def generate_arc_diagram(doc, lang_code):
|
109 |
-
sentences = list(doc.sents)
|
110 |
-
arc_diagrams = []
|
111 |
-
for sent in sentences:
|
112 |
-
html = displacy.render(sent, style="dep", options={"distance": 100})
|
113 |
-
html = html.replace('height="375"', 'height="200"')
|
114 |
-
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
115 |
-
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
116 |
-
arc_diagrams.append(html)
|
117 |
-
return arc_diagrams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|