AIdeaText commited on
Commit
67ddf17
verified
1 Parent(s): 327c7c0

Delete modules/semantic_analysis.py

Browse files
Files changed (1) hide show
  1. modules/semantic_analysis.py +0 -230
modules/semantic_analysis.py DELETED
@@ -1,230 +0,0 @@
1
- #semantic_analysis.py
2
- import streamlit as st
3
- import spacy
4
- import networkx as nx
5
- import matplotlib.pyplot as plt
6
- from collections import Counter
7
- from collections import defaultdict
8
-
9
- # Remove the global nlp model loading
10
-
11
- # Define colors for grammatical categories
12
- POS_COLORS = {
13
- 'ADJ': '#FFA07A', # Light Salmon
14
- 'ADP': '#98FB98', # Pale Green
15
- 'ADV': '#87CEFA', # Light Sky Blue
16
- 'AUX': '#DDA0DD', # Plum
17
- 'CCONJ': '#F0E68C', # Khaki
18
- 'DET': '#FFB6C1', # Light Pink
19
- 'INTJ': '#FF6347', # Tomato
20
- 'NOUN': '#90EE90', # Light Green
21
- 'NUM': '#FAFAD2', # Light Goldenrod Yellow
22
- 'PART': '#D3D3D3', # Light Gray
23
- 'PRON': '#FFA500', # Orange
24
- 'PROPN': '#20B2AA', # Light Sea Green
25
- 'SCONJ': '#DEB887', # Burlywood
26
- 'SYM': '#7B68EE', # Medium Slate Blue
27
- 'VERB': '#FF69B4', # Hot Pink
28
- 'X': '#A9A9A9', # Dark Gray
29
- }
30
-
31
- POS_TRANSLATIONS = {
32
- 'es': {
33
- 'ADJ': 'Adjetivo',
34
- 'ADP': 'Adposici贸n',
35
- 'ADV': 'Adverbio',
36
- 'AUX': 'Auxiliar',
37
- 'CCONJ': 'Conjunci贸n Coordinante',
38
- 'DET': 'Determinante',
39
- 'INTJ': 'Interjecci贸n',
40
- 'NOUN': 'Sustantivo',
41
- 'NUM': 'N煤mero',
42
- 'PART': 'Part铆cula',
43
- 'PRON': 'Pronombre',
44
- 'PROPN': 'Nombre Propio',
45
- 'SCONJ': 'Conjunci贸n Subordinante',
46
- 'SYM': 'S铆mbolo',
47
- 'VERB': 'Verbo',
48
- 'X': 'Otro',
49
- },
50
- 'en': {
51
- 'ADJ': 'Adjective',
52
- 'ADP': 'Adposition',
53
- 'ADV': 'Adverb',
54
- 'AUX': 'Auxiliary',
55
- 'CCONJ': 'Coordinating Conjunction',
56
- 'DET': 'Determiner',
57
- 'INTJ': 'Interjection',
58
- 'NOUN': 'Noun',
59
- 'NUM': 'Number',
60
- 'PART': 'Particle',
61
- 'PRON': 'Pronoun',
62
- 'PROPN': 'Proper Noun',
63
- 'SCONJ': 'Subordinating Conjunction',
64
- 'SYM': 'Symbol',
65
- 'VERB': 'Verb',
66
- 'X': 'Other',
67
- },
68
- 'fr': {
69
- 'ADJ': 'Adjectif',
70
- 'ADP': 'Adposition',
71
- 'ADV': 'Adverbe',
72
- 'AUX': 'Auxiliaire',
73
- 'CCONJ': 'Conjonction de Coordination',
74
- 'DET': 'D茅terminant',
75
- 'INTJ': 'Interjection',
76
- 'NOUN': 'Nom',
77
- 'NUM': 'Nombre',
78
- 'PART': 'Particule',
79
- 'PRON': 'Pronom',
80
- 'PROPN': 'Nom Propre',
81
- 'SCONJ': 'Conjonction de Subordination',
82
- 'SYM': 'Symbole',
83
- 'VERB': 'Verbe',
84
- 'X': 'Autre',
85
- }
86
- }
87
- ########################################################################################################################################
88
-
89
- # Definimos las etiquetas y colores para cada idioma
90
- ENTITY_LABELS = {
91
- 'es': {
92
- "Personas": "lightblue",
93
- "Conceptos": "lightgreen",
94
- "Lugares": "lightcoral",
95
- "Fechas": "lightyellow"
96
- },
97
- 'en': {
98
- "People": "lightblue",
99
- "Concepts": "lightgreen",
100
- "Places": "lightcoral",
101
- "Dates": "lightyellow"
102
- },
103
- 'fr': {
104
- "Personnes": "lightblue",
105
- "Concepts": "lightgreen",
106
- "Lieux": "lightcoral",
107
- "Dates": "lightyellow"
108
- }
109
- }
110
-
111
- #########################################################################################################
112
- def count_pos(doc):
113
- return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
114
-
115
- #####################################################################################################################
116
-
117
- def create_semantic_graph(doc, lang):
118
- G = nx.Graph()
119
- word_freq = defaultdict(int)
120
- lemma_to_word = {}
121
- lemma_to_pos = {}
122
-
123
- # Count frequencies of lemmas and map lemmas to their most common word form and POS
124
- for token in doc:
125
- if token.pos_ in ['NOUN', 'VERB']:
126
- lemma = token.lemma_.lower()
127
- word_freq[lemma] += 1
128
- if lemma not in lemma_to_word or token.text.lower() == lemma:
129
- lemma_to_word[lemma] = token.text
130
- lemma_to_pos[lemma] = token.pos_
131
-
132
- # Get top 20 most frequent lemmas
133
- top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
134
-
135
- # Add nodes
136
- for lemma in top_lemmas:
137
- word = lemma_to_word[lemma]
138
- G.add_node(word, pos=lemma_to_pos[lemma])
139
-
140
- # Add edges
141
- for token in doc:
142
- if token.lemma_.lower() in top_lemmas:
143
- if token.head.lemma_.lower() in top_lemmas:
144
- source = lemma_to_word[token.lemma_.lower()]
145
- target = lemma_to_word[token.head.lemma_.lower()]
146
- if source != target: # Avoid self-loops
147
- G.add_edge(source, target, label=token.dep_)
148
-
149
- return G, word_freq
150
-
151
- ############################################################################################################################################
152
-
153
- def visualize_semantic_relations(doc, lang):
154
- G = nx.Graph()
155
- word_freq = defaultdict(int)
156
- lemma_to_word = {}
157
- lemma_to_pos = {}
158
-
159
- # Count frequencies of lemmas and map lemmas to their most common word form and POS
160
- for token in doc:
161
- if token.pos_ in ['NOUN', 'VERB']:
162
- lemma = token.lemma_.lower()
163
- word_freq[lemma] += 1
164
- if lemma not in lemma_to_word or token.text.lower() == lemma:
165
- lemma_to_word[lemma] = token.text
166
- lemma_to_pos[lemma] = token.pos_
167
-
168
- # Get top 20 most frequent lemmas
169
- top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
170
-
171
- # Add nodes
172
- for lemma in top_lemmas:
173
- word = lemma_to_word[lemma]
174
- G.add_node(word, pos=lemma_to_pos[lemma])
175
-
176
- # Add edges
177
- for token in doc:
178
- if token.lemma_.lower() in top_lemmas:
179
- if token.head.lemma_.lower() in top_lemmas:
180
- source = lemma_to_word[token.lemma_.lower()]
181
- target = lemma_to_word[token.head.lemma_.lower()]
182
- if source != target: # Avoid self-loops
183
- G.add_edge(source, target, label=token.dep_)
184
-
185
- fig, ax = plt.subplots(figsize=(36, 27))
186
- pos = nx.spring_layout(G, k=0.7, iterations=50)
187
-
188
- node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
189
-
190
- nx.draw(G, pos, node_color=node_colors, with_labels=True,
191
- node_size=10000,
192
- font_size=16,
193
- font_weight='bold',
194
- arrows=True,
195
- arrowsize=30,
196
- width=3,
197
- edge_color='gray',
198
- ax=ax)
199
-
200
- edge_labels = nx.get_edge_attributes(G, 'label')
201
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
202
-
203
- title = {
204
- 'es': "Relaciones Sem谩nticas Relevantes",
205
- 'en': "Relevant Semantic Relations",
206
- 'fr': "Relations S茅mantiques Pertinentes"
207
- }
208
- ax.set_title(title[lang], fontsize=24, fontweight='bold')
209
- ax.axis('off')
210
-
211
- legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
212
- label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
213
- for pos in ['NOUN', 'VERB']]
214
- ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
215
-
216
- return fig
217
-
218
- ############################################################################################################################################
219
- def perform_semantic_analysis(text, nlp, lang):
220
- doc = nlp(text)
221
-
222
- # Imprimir entidades para depuraci贸n
223
- print(f"Entidades encontradas ({lang}):")
224
- for ent in doc.ents:
225
- print(f"{ent.text} - {ent.label_}")
226
-
227
- relations_graph = visualize_semantic_relations(doc, lang)
228
- return relations_graph # Ahora solo devuelve un 煤nico gr谩fico
229
-
230
- __all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS']