Update modules/text_analysis/semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
@@ -68,33 +68,36 @@ def identify_key_concepts(doc, top_n=10):
|
|
68 |
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
|
69 |
return word_freq.most_common(top_n)
|
70 |
|
71 |
-
def create_concept_graph(
|
72 |
-
vectorizer = TfidfVectorizer()
|
73 |
-
tfidf_matrix = vectorizer.fit_transform([text])
|
74 |
-
concept_vectors = vectorizer.transform([c[0] for c in concepts])
|
75 |
-
similarity_matrix = cosine_similarity(concept_vectors, concept_vectors)
|
76 |
-
|
77 |
G = nx.Graph()
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
return G
|
85 |
|
86 |
def visualize_concept_graph(G, lang):
|
87 |
-
fig, ax = plt.subplots(figsize=(
|
88 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
89 |
|
90 |
node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
|
91 |
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
|
92 |
nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
|
93 |
-
nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, ax=ax)
|
94 |
|
95 |
-
|
96 |
-
nx.
|
97 |
-
|
98 |
title = {
|
99 |
'es': "Relaciones entre Conceptos Clave",
|
100 |
'en': "Key Concept Relations",
|
@@ -102,18 +105,18 @@ def visualize_concept_graph(G, lang):
|
|
102 |
}
|
103 |
ax.set_title(title[lang], fontsize=16)
|
104 |
ax.axis('off')
|
105 |
-
|
106 |
plt.tight_layout()
|
107 |
return fig
|
108 |
|
109 |
def perform_semantic_analysis(text, nlp, lang):
|
110 |
doc = nlp(text)
|
111 |
-
|
112 |
# Identificar conceptos clave
|
113 |
key_concepts = identify_key_concepts(doc)
|
114 |
-
|
115 |
# Crear y visualizar grafo de conceptos
|
116 |
-
concept_graph = create_concept_graph(
|
117 |
relations_graph = visualize_concept_graph(concept_graph, lang)
|
118 |
|
119 |
return {
|
|
|
68 |
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
|
69 |
return word_freq.most_common(top_n)
|
70 |
|
71 |
+
def create_concept_graph(doc, key_concepts):
|
|
|
|
|
|
|
|
|
|
|
72 |
G = nx.Graph()
|
73 |
+
|
74 |
+
# Añadir nodos
|
75 |
+
for concept, freq in key_concepts:
|
76 |
+
G.add_node(concept, weight=freq)
|
77 |
+
|
78 |
+
# Añadir aristas basadas en la co-ocurrencia en oraciones
|
79 |
+
for sent in doc.sents:
|
80 |
+
sent_concepts = [token.lemma_.lower() for token in sent if token.lemma_.lower() in dict(key_concepts)]
|
81 |
+
for i, concept1 in enumerate(sent_concepts):
|
82 |
+
for concept2 in sent_concepts[i+1:]:
|
83 |
+
if G.has_edge(concept1, concept2):
|
84 |
+
G[concept1][concept2]['weight'] += 1
|
85 |
+
else:
|
86 |
+
G.add_edge(concept1, concept2, weight=1)
|
87 |
+
|
88 |
return G
|
89 |
|
90 |
def visualize_concept_graph(G, lang):
|
91 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
92 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
93 |
|
94 |
node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
|
95 |
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
|
96 |
nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
|
|
|
97 |
|
98 |
+
edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
|
99 |
+
nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.5, ax=ax)
|
100 |
+
|
101 |
title = {
|
102 |
'es': "Relaciones entre Conceptos Clave",
|
103 |
'en': "Key Concept Relations",
|
|
|
105 |
}
|
106 |
ax.set_title(title[lang], fontsize=16)
|
107 |
ax.axis('off')
|
108 |
+
|
109 |
plt.tight_layout()
|
110 |
return fig
|
111 |
|
112 |
def perform_semantic_analysis(text, nlp, lang):
|
113 |
doc = nlp(text)
|
114 |
+
|
115 |
# Identificar conceptos clave
|
116 |
key_concepts = identify_key_concepts(doc)
|
117 |
+
|
118 |
# Crear y visualizar grafo de conceptos
|
119 |
+
concept_graph = create_concept_graph(doc, key_concepts)
|
120 |
relations_graph = visualize_concept_graph(concept_graph, lang)
|
121 |
|
122 |
return {
|