Update modules/text_analysis/semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
@@ -6,8 +6,6 @@ import matplotlib.pyplot as plt
|
|
6 |
from collections import Counter
|
7 |
from collections import defaultdict
|
8 |
|
9 |
-
# Remove the global nlp model loading
|
10 |
-
|
11 |
# Define colors for grammatical categories
|
12 |
POS_COLORS = {
|
13 |
'ADJ': '#FFA07A', # Light Salmon
|
@@ -215,9 +213,39 @@ def visualize_semantic_relations(doc, lang):
|
|
215 |
|
216 |
return fig
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
############################################################################################################################################
|
219 |
def perform_semantic_analysis(text, nlp, lang):
|
220 |
doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
# Imprimir entidades para depuraci贸n
|
223 |
print(f"Entidades encontradas ({lang}):")
|
@@ -225,6 +253,10 @@ def perform_semantic_analysis(text, nlp, lang):
|
|
225 |
print(f"{ent.text} - {ent.label_}")
|
226 |
|
227 |
relations_graph = visualize_semantic_relations(doc, lang)
|
228 |
-
return
|
|
|
|
|
|
|
|
|
229 |
|
230 |
-
__all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS']
|
|
|
6 |
from collections import Counter
|
7 |
from collections import defaultdict
|
8 |
|
|
|
|
|
9 |
# Define colors for grammatical categories
|
10 |
POS_COLORS = {
|
11 |
'ADJ': '#FFA07A', # Light Salmon
|
|
|
213 |
|
214 |
return fig
|
215 |
|
216 |
+
############################################################################################################################################
|
217 |
+
def identify_and_contextualize_entities(doc, lang):
|
218 |
+
entities = []
|
219 |
+
for ent in doc.ents:
|
220 |
+
# Obtener el contexto (3 palabras antes y despu茅s de la entidad)
|
221 |
+
start = max(0, ent.start - 3)
|
222 |
+
end = min(len(doc), ent.end + 3)
|
223 |
+
context = doc[start:end].text
|
224 |
+
|
225 |
+
entities.append({
|
226 |
+
'text': ent.text,
|
227 |
+
'label': ent.label_,
|
228 |
+
'start': ent.start,
|
229 |
+
'end': ent.end,
|
230 |
+
'context': context
|
231 |
+
})
|
232 |
+
|
233 |
+
# Identificar conceptos clave (usando sustantivos y verbos m谩s frecuentes)
|
234 |
+
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
|
235 |
+
key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
|
236 |
+
|
237 |
+
return entities, key_concepts
|
238 |
+
|
239 |
+
|
240 |
############################################################################################################################################
|
241 |
def perform_semantic_analysis(text, nlp, lang):
|
242 |
doc = nlp(text)
|
243 |
+
|
244 |
+
# Identificar entidades y conceptos clave
|
245 |
+
entities, key_concepts = identify_and_contextualize_entities(doc, lang)
|
246 |
+
|
247 |
+
# Visualizar relaciones sem谩nticas
|
248 |
+
relations_graph = visualize_semantic_relations(doc, lang)
|
249 |
|
250 |
# Imprimir entidades para depuraci贸n
|
251 |
print(f"Entidades encontradas ({lang}):")
|
|
|
253 |
print(f"{ent.text} - {ent.label_}")
|
254 |
|
255 |
relations_graph = visualize_semantic_relations(doc, lang)
|
256 |
+
return {
|
257 |
+
'entities': entities,
|
258 |
+
'key_concepts': key_concepts,
|
259 |
+
'relations_graph': relations_graph
|
260 |
+
}
|
261 |
|
262 |
+
__all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS', 'identify_and_contextualize_entities']
|