Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +52 -19
modules/semantic_analysis.py
CHANGED
@@ -85,6 +85,29 @@ POS_TRANSLATIONS = {
|
|
85 |
}
|
86 |
########################################################################################################################################
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
def count_pos(doc):
|
89 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
90 |
|
@@ -95,31 +118,27 @@ from collections import Counter
|
|
95 |
|
96 |
# Mant茅n las definiciones de POS_COLORS y POS_TRANSLATIONS que ya tienes
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
"Lugares": [],
|
103 |
-
"Fechas": []
|
104 |
-
}
|
105 |
-
|
106 |
for ent in doc.ents:
|
107 |
if ent.label_ == "PERSON":
|
108 |
-
entities[
|
109 |
elif ent.label_ in ["LOC", "GPE"]:
|
110 |
-
entities[
|
111 |
elif ent.label_ == "DATE":
|
112 |
-
entities[
|
113 |
else:
|
114 |
-
entities[
|
115 |
-
|
116 |
return entities
|
117 |
|
|
|
118 |
def visualize_context_graph(doc, lang):
|
119 |
G = nx.Graph()
|
120 |
-
entities = extract_entities(doc)
|
121 |
-
|
122 |
-
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
123 |
|
124 |
# Add nodes
|
125 |
for category, items in entities.items():
|
@@ -139,7 +158,8 @@ def visualize_context_graph(doc, lang):
|
|
139 |
|
140 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
141 |
|
142 |
-
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=
|
|
|
143 |
|
144 |
# Add a legend
|
145 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none', label=category)
|
@@ -151,6 +171,7 @@ def visualize_context_graph(doc, lang):
|
|
151 |
|
152 |
return plt
|
153 |
|
|
|
154 |
def visualize_semantic_relations(doc, lang):
|
155 |
G = nx.Graph()
|
156 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
@@ -175,8 +196,12 @@ def visualize_semantic_relations(doc, lang):
|
|
175 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
176 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
177 |
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
180 |
plt.axis('off')
|
181 |
|
182 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
@@ -186,8 +211,16 @@ def visualize_semantic_relations(doc, lang):
|
|
186 |
|
187 |
return plt
|
188 |
|
|
|
|
|
189 |
def perform_semantic_analysis(text, nlp, lang):
|
190 |
doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
context_graph = visualize_context_graph(doc, lang)
|
192 |
relations_graph = visualize_semantic_relations(doc, lang)
|
193 |
return context_graph, relations_graph
|
|
|
85 |
}
|
86 |
########################################################################################################################################
|
87 |
|
88 |
+
# Definimos las etiquetas y colores para cada idioma
|
89 |
+
ENTITY_LABELS = {
|
90 |
+
'es': {
|
91 |
+
"Personas": "lightblue",
|
92 |
+
"Conceptos": "lightgreen",
|
93 |
+
"Lugares": "lightcoral",
|
94 |
+
"Fechas": "lightyellow"
|
95 |
+
},
|
96 |
+
'en': {
|
97 |
+
"People": "lightblue",
|
98 |
+
"Concepts": "lightgreen",
|
99 |
+
"Places": "lightcoral",
|
100 |
+
"Dates": "lightyellow"
|
101 |
+
},
|
102 |
+
'fr': {
|
103 |
+
"Personnes": "lightblue",
|
104 |
+
"Concepts": "lightgreen",
|
105 |
+
"Lieux": "lightcoral",
|
106 |
+
"Dates": "lightyellow"
|
107 |
+
}
|
108 |
+
}
|
109 |
+
|
110 |
+
#########################################################################################################
|
111 |
def count_pos(doc):
|
112 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
113 |
|
|
|
118 |
|
119 |
# Mant茅n las definiciones de POS_COLORS y POS_TRANSLATIONS que ya tienes
|
120 |
|
121 |
+
#############################################################################################################################
|
122 |
+
def extract_entities(doc, lang):
|
123 |
+
entities = {label: [] for label in ENTITY_LABELS[lang].keys()}
|
124 |
+
|
|
|
|
|
|
|
|
|
125 |
for ent in doc.ents:
|
126 |
if ent.label_ == "PERSON":
|
127 |
+
entities[list(ENTITY_LABELS[lang].keys())[0]].append(ent.text)
|
128 |
elif ent.label_ in ["LOC", "GPE"]:
|
129 |
+
entities[list(ENTITY_LABELS[lang].keys())[2]].append(ent.text)
|
130 |
elif ent.label_ == "DATE":
|
131 |
+
entities[list(ENTITY_LABELS[lang].keys())[3]].append(ent.text)
|
132 |
else:
|
133 |
+
entities[list(ENTITY_LABELS[lang].keys())[1]].append(ent.text)
|
134 |
+
|
135 |
return entities
|
136 |
|
137 |
+
#####################################################################################################################
|
138 |
def visualize_context_graph(doc, lang):
|
139 |
G = nx.Graph()
|
140 |
+
entities = extract_entities(doc, lang)
|
141 |
+
color_map = ENTITY_LABELS[lang]
|
|
|
142 |
|
143 |
# Add nodes
|
144 |
for category, items in entities.items():
|
|
|
158 |
|
159 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
160 |
|
161 |
+
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=5000,
|
162 |
+
font_size=12, font_weight='bold')
|
163 |
|
164 |
# Add a legend
|
165 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none', label=category)
|
|
|
171 |
|
172 |
return plt
|
173 |
|
174 |
+
############################################################################################################################################
|
175 |
def visualize_semantic_relations(doc, lang):
|
176 |
G = nx.Graph()
|
177 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
|
|
196 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
197 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
198 |
|
199 |
+
title = {
|
200 |
+
'es': "Relaciones Sem谩nticas Relevantes",
|
201 |
+
'en': "Relevant Semantic Relations",
|
202 |
+
'fr': "Relations S茅mantiques Pertinentes"
|
203 |
+
}
|
204 |
+
plt.title(title[lang], fontsize=20, fontweight='bold')
|
205 |
plt.axis('off')
|
206 |
|
207 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
|
|
211 |
|
212 |
return plt
|
213 |
|
214 |
+
|
215 |
+
############################################################################################################################################
|
216 |
def perform_semantic_analysis(text, nlp, lang):
|
217 |
doc = nlp(text)
|
218 |
+
|
219 |
+
# Imprimir entidades para depuraci贸n
|
220 |
+
print(f"Entidades encontradas ({lang}):")
|
221 |
+
for ent in doc.ents:
|
222 |
+
print(f"{ent.text} - {ent.label_}")
|
223 |
+
|
224 |
context_graph = visualize_context_graph(doc, lang)
|
225 |
relations_graph = visualize_semantic_relations(doc, lang)
|
226 |
return context_graph, relations_graph
|