AIdeaText commited on
Commit
573c9b8
verified
1 Parent(s): 67ddf17

Delete modules/morpho_analysis.py

Browse files
Files changed (1) hide show
  1. modules/morpho_analysis.py +0 -117
modules/morpho_analysis.py DELETED
@@ -1,117 +0,0 @@
1
- # /modules/morpho_analysis.py
2
- import spacy
3
- from collections import Counter
4
- from spacy import displacy
5
- import re
6
-
7
- # Define colors for grammatical categories
8
- POS_COLORS = {
9
- 'ADJ': '#FFA07A', # Light Salmon
10
- 'ADP': '#98FB98', # Pale Green
11
- 'ADV': '#87CEFA', # Light Sky Blue
12
- 'AUX': '#DDA0DD', # Plum
13
- 'CCONJ': '#F0E68C', # Khaki
14
- 'DET': '#FFB6C1', # Light Pink
15
- 'INTJ': '#FF6347', # Tomato
16
- 'NOUN': '#90EE90', # Light Green
17
- 'NUM': '#FAFAD2', # Light Goldenrod Yellow
18
- 'PART': '#D3D3D3', # Light Gray
19
- 'PRON': '#FFA500', # Orange
20
- 'PROPN': '#20B2AA', # Light Sea Green
21
- 'SCONJ': '#DEB887', # Burlywood
22
- 'SYM': '#7B68EE', # Medium Slate Blue
23
- 'VERB': '#FF69B4', # Hot Pink
24
- 'X': '#A9A9A9', # Dark Gray
25
- }
26
-
27
- POS_TRANSLATIONS = {
28
- 'es': {
29
- 'ADJ': 'Adjetivo',
30
- 'ADP': 'Adposici贸n',
31
- 'ADV': 'Adverbio',
32
- 'AUX': 'Auxiliar',
33
- 'CCONJ': 'Conjunci贸n Coordinante',
34
- 'DET': 'Determinante',
35
- 'INTJ': 'Interjecci贸n',
36
- 'NOUN': 'Sustantivo',
37
- 'NUM': 'N煤mero',
38
- 'PART': 'Part铆cula',
39
- 'PRON': 'Pronombre',
40
- 'PROPN': 'Nombre Propio',
41
- 'SCONJ': 'Conjunci贸n Subordinante',
42
- 'SYM': 'S铆mbolo',
43
- 'VERB': 'Verbo',
44
- 'X': 'Otro',
45
- },
46
- 'en': {
47
- 'ADJ': 'Adjective',
48
- 'ADP': 'Adposition',
49
- 'ADV': 'Adverb',
50
- 'AUX': 'Auxiliary',
51
- 'CCONJ': 'Coordinating Conjunction',
52
- 'DET': 'Determiner',
53
- 'INTJ': 'Interjection',
54
- 'NOUN': 'Noun',
55
- 'NUM': 'Number',
56
- 'PART': 'Particle',
57
- 'PRON': 'Pronoun',
58
- 'PROPN': 'Proper Noun',
59
- 'SCONJ': 'Subordinating Conjunction',
60
- 'SYM': 'Symbol',
61
- 'VERB': 'Verb',
62
- 'X': 'Other',
63
- },
64
- 'fr': {
65
- 'ADJ': 'Adjectif',
66
- 'ADP': 'Adposition',
67
- 'ADV': 'Adverbe',
68
- 'AUX': 'Auxiliaire',
69
- 'CCONJ': 'Conjonction de Coordination',
70
- 'DET': 'D茅terminant',
71
- 'INTJ': 'Interjection',
72
- 'NOUN': 'Nom',
73
- 'NUM': 'Nombre',
74
- 'PART': 'Particule',
75
- 'PRON': 'Pronom',
76
- 'PROPN': 'Nom Propre',
77
- 'SCONJ': 'Conjonction de Subordination',
78
- 'SYM': 'Symbole',
79
- 'VERB': 'Verbe',
80
- 'X': 'Autre',
81
- }
82
- }
83
-
84
- #############################################################################################
85
- def get_repeated_words_colors(doc):
86
- word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
87
- repeated_words = {word: count for word, count in word_counts.items() if count > 1}
88
-
89
- word_colors = {}
90
- for token in doc:
91
- if token.text.lower() in repeated_words:
92
- word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
93
-
94
- return word_colors
95
-
96
- ######################################################################################################
97
- def highlight_repeated_words(doc, word_colors):
98
- highlighted_text = []
99
- for token in doc:
100
- if token.text.lower() in word_colors:
101
- color = word_colors[token.text.lower()]
102
- highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
103
- else:
104
- highlighted_text.append(token.text)
105
- return ' '.join(highlighted_text)
106
-
107
- #################################################################################################
108
- def generate_arc_diagram(doc, lang_code):
109
- sentences = list(doc.sents)
110
- arc_diagrams = []
111
- for sent in sentences:
112
- html = displacy.render(sent, style="dep", options={"distance": 100})
113
- html = html.replace('height="375"', 'height="200"')
114
- html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
115
- html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
116
- arc_diagrams.append(html)
117
- return arc_diagrams