AIdeaText commited on
Commit
b5a7450
·
verified ·
1 Parent(s): d713f7b

Update modules/text_analysis/semantic_analysis.py

Browse files
modules/text_analysis/semantic_analysis.py CHANGED
@@ -68,33 +68,36 @@ def identify_key_concepts(doc, top_n=10):
68
  word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
69
  return word_freq.most_common(top_n)
70
 
71
- def create_concept_graph(text, concepts):
72
- vectorizer = TfidfVectorizer()
73
- tfidf_matrix = vectorizer.fit_transform([text])
74
- concept_vectors = vectorizer.transform([c[0] for c in concepts])
75
- similarity_matrix = cosine_similarity(concept_vectors, concept_vectors)
76
-
77
  G = nx.Graph()
78
- for i, (concept, weight) in enumerate(concepts):
79
- G.add_node(concept, weight=weight)
80
- for j in range(i+1, len(concepts)):
81
- if similarity_matrix[i][j] > 0.1:
82
- G.add_edge(concept, concepts[j][0], weight=similarity_matrix[i][j])
83
-
 
 
 
 
 
 
 
 
 
84
  return G
85
 
86
  def visualize_concept_graph(G, lang):
87
- fig, ax = plt.subplots(figsize=(15, 10))
88
  pos = nx.spring_layout(G, k=0.5, iterations=50)
89
 
90
  node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
91
  nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
92
  nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
93
- nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, ax=ax)
94
 
95
- edge_labels = nx.get_edge_attributes(G, 'weight')
96
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8, ax=ax)
97
-
98
  title = {
99
  'es': "Relaciones entre Conceptos Clave",
100
  'en': "Key Concept Relations",
@@ -102,18 +105,18 @@ def visualize_concept_graph(G, lang):
102
  }
103
  ax.set_title(title[lang], fontsize=16)
104
  ax.axis('off')
105
-
106
  plt.tight_layout()
107
  return fig
108
 
109
  def perform_semantic_analysis(text, nlp, lang):
110
  doc = nlp(text)
111
-
112
  # Identificar conceptos clave
113
  key_concepts = identify_key_concepts(doc)
114
-
115
  # Crear y visualizar grafo de conceptos
116
- concept_graph = create_concept_graph(text, key_concepts)
117
  relations_graph = visualize_concept_graph(concept_graph, lang)
118
 
119
  return {
 
68
  word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
69
  return word_freq.most_common(top_n)
70
 
71
+ def create_concept_graph(doc, key_concepts):
 
 
 
 
 
72
  G = nx.Graph()
73
+
74
+ # Añadir nodos
75
+ for concept, freq in key_concepts:
76
+ G.add_node(concept, weight=freq)
77
+
78
+ # Añadir aristas basadas en la co-ocurrencia en oraciones
79
+ for sent in doc.sents:
80
+ sent_concepts = [token.lemma_.lower() for token in sent if token.lemma_.lower() in dict(key_concepts)]
81
+ for i, concept1 in enumerate(sent_concepts):
82
+ for concept2 in sent_concepts[i+1:]:
83
+ if G.has_edge(concept1, concept2):
84
+ G[concept1][concept2]['weight'] += 1
85
+ else:
86
+ G.add_edge(concept1, concept2, weight=1)
87
+
88
  return G
89
 
90
  def visualize_concept_graph(G, lang):
91
+ fig, ax = plt.subplots(figsize=(12, 8))
92
  pos = nx.spring_layout(G, k=0.5, iterations=50)
93
 
94
  node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
95
  nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
96
  nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
 
97
 
98
+ edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
99
+ nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.5, ax=ax)
100
+
101
  title = {
102
  'es': "Relaciones entre Conceptos Clave",
103
  'en': "Key Concept Relations",
 
105
  }
106
  ax.set_title(title[lang], fontsize=16)
107
  ax.axis('off')
108
+
109
  plt.tight_layout()
110
  return fig
111
 
112
  def perform_semantic_analysis(text, nlp, lang):
113
  doc = nlp(text)
114
+
115
  # Identificar conceptos clave
116
  key_concepts = identify_key_concepts(doc)
117
+
118
  # Crear y visualizar grafo de conceptos
119
+ concept_graph = create_concept_graph(doc, key_concepts)
120
  relations_graph = visualize_concept_graph(concept_graph, lang)
121
 
122
  return {