AIdeaText commited on
Commit
d6c6c2c
·
verified ·
1 Parent(s): 9787b7a

Update modules/text_analysis/semantic_analysis.py

Browse files
modules/text_analysis/semantic_analysis.py CHANGED
@@ -1,19 +1,27 @@
1
  # modules/text_analysis/semantic_analysis.py
2
- # [Mantener todas las importaciones y constantes existentes...]
3
 
 
 
 
 
 
 
 
4
  import streamlit as st
5
  import spacy
6
  import networkx as nx
7
  import matplotlib.pyplot as plt
8
- import io
9
- import base64
10
- from collections import Counter, defaultdict
11
  from sklearn.feature_extraction.text import TfidfVectorizer
12
  from sklearn.metrics.pairwise import cosine_similarity
13
 
14
- import logging
15
  logger = logging.getLogger(__name__)
 
 
 
 
16
 
 
17
  from .stopwords import (
18
  process_text,
19
  get_custom_stopwords,
@@ -77,15 +85,21 @@ ENTITY_LABELS = {
77
  }
78
  }
79
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def perform_semantic_analysis(text, nlp, lang_code):
81
  """
82
  Realiza el análisis semántico completo del texto.
83
- Args:
84
- text: Texto a analizar
85
- nlp: Modelo de spaCy
86
- lang_code: Código del idioma
87
- Returns:
88
- dict: Resultados del análisis
89
  """
90
  try:
91
  logger.info(f"Starting semantic analysis for language: {lang_code}")
@@ -95,28 +109,58 @@ def perform_semantic_analysis(text, nlp, lang_code):
95
  tokens = process_text(text, lang_code, nlp)
96
 
97
  # Identificar conceptos clave
98
- key_concepts = identify_key_concepts(doc, stopwords=get_custom_stopwords(lang_code))
 
 
99
 
100
- # Crear y visualizar grafo de conceptos
 
 
 
 
 
 
 
 
101
  concept_graph = create_concept_graph(doc, key_concepts)
 
 
 
 
102
  concept_graph_fig = visualize_concept_graph(concept_graph, lang_code)
103
-
104
- # Convertir figura a bytes
105
- concept_graph_bytes = fig_to_bytes(concept_graph_fig)
106
-
107
- logger.info("Semantic analysis completed successfully")
 
 
 
 
 
 
 
 
 
 
 
 
108
  return {
109
  'success': True,
110
  'key_concepts': key_concepts,
111
- 'concept_graph': concept_graph_bytes,
112
  }
 
113
  except Exception as e:
114
  logger.error(f"Error in perform_semantic_analysis: {str(e)}")
 
115
  return {
116
  'success': False,
117
  'error': str(e)
118
  }
119
 
 
 
120
  def identify_key_concepts(doc, stopwords, min_freq=2, min_length=3):
121
  """
122
  Identifica conceptos clave en el texto.
@@ -144,20 +188,7 @@ def identify_key_concepts(doc, stopwords, min_freq=2, min_length=3):
144
  logger.error(f"Error en identify_key_concepts: {str(e)}")
145
  return []
146
 
147
- def fig_to_bytes(fig):
148
- buf = io.BytesIO()
149
- fig.savefig(buf, format='png')
150
- buf.seek(0)
151
- return buf.getvalue()
152
-
153
-
154
- def fig_to_html(fig):
155
- buf = io.BytesIO()
156
- fig.savefig(buf, format='png')
157
- buf.seek(0)
158
- img_str = base64.b64encode(buf.getvalue()).decode()
159
- return f'<img src="data:image/png;base64,{img_str}" />'
160
-
161
  def create_concept_graph(doc, key_concepts):
162
  """
163
  Crea un grafo de relaciones entre conceptos.
@@ -203,26 +234,27 @@ def create_concept_graph(doc, key_concepts):
203
  # Retornar un grafo vacío en caso de error
204
  return nx.Graph()
205
 
 
206
  def visualize_concept_graph(G, lang_code):
207
  """
208
  Visualiza el grafo de conceptos.
209
- Args:
210
- G: Grafo de networkx
211
- lang_code: Código del idioma
212
- Returns:
213
- matplotlib.figure.Figure: Figura con el grafo visualizado
214
  """
215
  try:
216
- plt.figure(figsize=(12, 8))
 
217
 
218
- # Calcular el layout del grafo
219
- pos = nx.spring_layout(G)
 
 
 
 
220
 
221
- # Obtener pesos de nodos y aristas
222
  node_weights = [G.nodes[node].get('weight', 1) * 500 for node in G.nodes()]
223
  edge_weights = [G[u][v].get('weight', 1) for u, v in G.edges()]
224
 
225
- # Dibujar el grafo
226
  nx.draw_networkx_nodes(G, pos,
227
  node_size=node_weights,
228
  node_color='lightblue',
@@ -240,13 +272,14 @@ def visualize_concept_graph(G, lang_code):
240
  plt.title("Red de conceptos relacionados")
241
  plt.axis('off')
242
 
243
- return plt.gcf()
244
 
245
  except Exception as e:
246
  logger.error(f"Error en visualize_concept_graph: {str(e)}")
247
- # Retornar una figura vacía en caso de error
248
- return plt.figure()
249
 
 
 
250
  def create_entity_graph(entities):
251
  G = nx.Graph()
252
  for entity_type, entity_list in entities.items():
@@ -257,6 +290,8 @@ def create_entity_graph(entities):
257
  G.add_edge(entity1, entity2)
258
  return G
259
 
 
 
260
  def visualize_entity_graph(G, lang_code):
261
  fig, ax = plt.subplots(figsize=(12, 8))
262
  pos = nx.spring_layout(G)
@@ -332,14 +367,7 @@ __all__ = [
332
  'identify_key_concepts',
333
  'create_concept_graph',
334
  'visualize_concept_graph',
335
- 'create_entity_graph',
336
- 'visualize_entity_graph',
337
- 'generate_summary',
338
- 'extract_entities',
339
- 'analyze_sentiment',
340
- 'create_topic_graph',
341
- 'visualize_topic_graph',
342
- 'extract_topics',
343
  'ENTITY_LABELS',
344
  'POS_COLORS',
345
  'POS_TRANSLATIONS'
 
1
  # modules/text_analysis/semantic_analysis.py
 
2
 
3
+ # 1. Importaciones estándar del sistema
4
+ import logging
5
+ import io
6
+ import base64
7
+ from collections import Counter, defaultdict
8
+
9
+ # 2. Importaciones de terceros
10
  import streamlit as st
11
  import spacy
12
  import networkx as nx
13
  import matplotlib.pyplot as plt
 
 
 
14
  from sklearn.feature_extraction.text import TfidfVectorizer
15
  from sklearn.metrics.pairwise import cosine_similarity
16
 
17
+ # 3. Configuración del logger
18
  logger = logging.getLogger(__name__)
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
22
+ )
23
 
24
+ # 4. Importaciones locales
25
  from .stopwords import (
26
  process_text,
27
  get_custom_stopwords,
 
85
  }
86
  }
87
 
88
+ def fig_to_bytes(fig):
89
+ """Convierte una figura de matplotlib a bytes."""
90
+ try:
91
+ buf = io.BytesIO()
92
+ fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
93
+ buf.seek(0)
94
+ return buf.getvalue()
95
+ except Exception as e:
96
+ logger.error(f"Error en fig_to_bytes: {str(e)}")
97
+ return None
98
+
99
+ ###########################################################
100
  def perform_semantic_analysis(text, nlp, lang_code):
101
  """
102
  Realiza el análisis semántico completo del texto.
 
 
 
 
 
 
103
  """
104
  try:
105
  logger.info(f"Starting semantic analysis for language: {lang_code}")
 
109
  tokens = process_text(text, lang_code, nlp)
110
 
111
  # Identificar conceptos clave
112
+ logger.info("Identificando conceptos clave...")
113
+ stopwords = get_custom_stopwords(lang_code)
114
+ key_concepts = identify_key_concepts(doc, stopwords=stopwords)
115
 
116
+ if not key_concepts:
117
+ logger.warning("No se identificaron conceptos clave")
118
+ return {
119
+ 'success': False,
120
+ 'error': 'No se pudieron identificar conceptos clave'
121
+ }
122
+
123
+ # Crear grafo de conceptos
124
+ logger.info("Creando grafo de conceptos...")
125
  concept_graph = create_concept_graph(doc, key_concepts)
126
+
127
+ # Visualizar grafo
128
+ logger.info("Visualizando grafo...")
129
+ plt.clf() # Limpiar figura actual
130
  concept_graph_fig = visualize_concept_graph(concept_graph, lang_code)
131
+
132
+ # Convertir a bytes
133
+ logger.info("Convirtiendo grafo a bytes...")
134
+ graph_bytes = fig_to_bytes(concept_graph_fig)
135
+
136
+ if not graph_bytes:
137
+ logger.error("Error al convertir grafo a bytes")
138
+ return {
139
+ 'success': False,
140
+ 'error': 'Error al generar visualización'
141
+ }
142
+
143
+ # Limpiar recursos
144
+ plt.close(concept_graph_fig)
145
+ plt.close('all')
146
+
147
+ logger.info("Análisis semántico completado exitosamente")
148
  return {
149
  'success': True,
150
  'key_concepts': key_concepts,
151
+ 'concept_graph': graph_bytes
152
  }
153
+
154
  except Exception as e:
155
  logger.error(f"Error in perform_semantic_analysis: {str(e)}")
156
+ plt.close('all') # Asegurarse de limpiar recursos
157
  return {
158
  'success': False,
159
  'error': str(e)
160
  }
161
 
162
+ ############################################################
163
+
164
  def identify_key_concepts(doc, stopwords, min_freq=2, min_length=3):
165
  """
166
  Identifica conceptos clave en el texto.
 
188
  logger.error(f"Error en identify_key_concepts: {str(e)}")
189
  return []
190
 
191
+ ########################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def create_concept_graph(doc, key_concepts):
193
  """
194
  Crea un grafo de relaciones entre conceptos.
 
234
  # Retornar un grafo vacío en caso de error
235
  return nx.Graph()
236
 
237
+ ###############################################################################
238
  def visualize_concept_graph(G, lang_code):
239
  """
240
  Visualiza el grafo de conceptos.
 
 
 
 
 
241
  """
242
  try:
243
+ # Crear nueva figura
244
+ fig = plt.figure(figsize=(12, 8))
245
 
246
+ if not G.nodes():
247
+ logger.warning("Grafo vacío, retornando figura vacía")
248
+ return fig
249
+
250
+ # Calcular layout
251
+ pos = nx.spring_layout(G, k=1, iterations=50)
252
 
253
+ # Obtener pesos
254
  node_weights = [G.nodes[node].get('weight', 1) * 500 for node in G.nodes()]
255
  edge_weights = [G[u][v].get('weight', 1) for u, v in G.edges()]
256
 
257
+ # Dibujar grafo
258
  nx.draw_networkx_nodes(G, pos,
259
  node_size=node_weights,
260
  node_color='lightblue',
 
272
  plt.title("Red de conceptos relacionados")
273
  plt.axis('off')
274
 
275
+ return fig
276
 
277
  except Exception as e:
278
  logger.error(f"Error en visualize_concept_graph: {str(e)}")
279
+ return plt.figure() # Retornar figura vacía en caso de error
 
280
 
281
+
282
+ ########################################################################
283
  def create_entity_graph(entities):
284
  G = nx.Graph()
285
  for entity_type, entity_list in entities.items():
 
290
  G.add_edge(entity1, entity2)
291
  return G
292
 
293
+
294
+ #############################################################
295
  def visualize_entity_graph(G, lang_code):
296
  fig, ax = plt.subplots(figsize=(12, 8))
297
  pos = nx.spring_layout(G)
 
367
  'identify_key_concepts',
368
  'create_concept_graph',
369
  'visualize_concept_graph',
370
+ 'fig_to_bytes'
 
 
 
 
 
 
 
371
  'ENTITY_LABELS',
372
  'POS_COLORS',
373
  'POS_TRANSLATIONS'