AIdeaText commited on
Commit
a9dba4e
1 Parent(s): 1e24719

Update modules/text_analysis/semantic_analysis.py

Browse files
modules/text_analysis/semantic_analysis.py CHANGED
@@ -87,60 +87,41 @@ def perform_semantic_analysis(text, nlp, lang_code):
87
  Returns:
88
  dict: Resultados del análisis
89
  """
90
-
91
- logger.info(f"Starting semantic analysis for language: {lang_code}")
92
  try:
93
- tokens = process_text(text, lang_code, nlp)
 
 
94
  doc = nlp(text)
95
- key_concepts = identify_key_concepts(doc)
 
 
 
 
 
96
  concept_graph = create_concept_graph(doc, key_concepts)
97
  concept_graph_fig = visualize_concept_graph(concept_graph, lang_code)
98
 
99
- # Convertir figuras a bytes
100
  concept_graph_bytes = fig_to_bytes(concept_graph_fig)
101
 
102
-
103
  logger.info("Semantic analysis completed successfully")
104
  return {
 
105
  'key_concepts': key_concepts,
106
  'concept_graph': concept_graph_bytes,
107
  }
108
  except Exception as e:
109
  logger.error(f"Error in perform_semantic_analysis: {str(e)}")
110
- raise
111
-
112
-
113
- def fig_to_bytes(fig):
114
- buf = io.BytesIO()
115
- fig.savefig(buf, format='png')
116
- buf.seek(0)
117
- return buf.getvalue()
118
-
119
-
120
- def fig_to_html(fig):
121
- buf = io.BytesIO()
122
- fig.savefig(buf, format='png')
123
- buf.seek(0)
124
- img_str = base64.b64encode(buf.getvalue()).decode()
125
- return f'<img src="data:image/png;base64,{img_str}" />'
126
-
127
-
128
 
129
- def identify_key_concepts(doc, min_freq=2, min_length=3):
130
  """
131
  Identifica conceptos clave en el texto.
132
- Args:
133
- doc: Documento procesado por spaCy
134
- min_freq: Frecuencia mínima para considerar un concepto
135
- min_length: Longitud mínima de palabra para considerar
136
- Returns:
137
- list: Lista de tuplas (concepto, frecuencia)
138
  """
139
  try:
140
- # Obtener stopwords para el idioma
141
- stopwords = get_stopwords(doc.lang_)
142
-
143
- # Contar frecuencias de palabras
144
  word_freq = Counter()
145
 
146
  for token in doc:
@@ -152,19 +133,30 @@ def identify_key_concepts(doc, min_freq=2, min_length=3):
152
 
153
  word_freq[token.lemma_.lower()] += 1
154
 
155
- # Filtrar por frecuencia mínima
156
  concepts = [(word, freq) for word, freq in word_freq.items()
157
  if freq >= min_freq]
158
-
159
- # Ordenar por frecuencia
160
  concepts.sort(key=lambda x: x[1], reverse=True)
161
 
162
- return concepts[:10] # Retornar los 10 conceptos más frecuentes
 
163
 
164
  except Exception as e:
165
  logger.error(f"Error en identify_key_concepts: {str(e)}")
166
- return [] # Retornar lista vacía en caso de error
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  def create_concept_graph(doc, key_concepts):
170
  """
 
87
  Returns:
88
  dict: Resultados del análisis
89
  """
 
 
90
  try:
91
+ logger.info(f"Starting semantic analysis for language: {lang_code}")
92
+
93
+ # Procesar texto y remover stopwords
94
  doc = nlp(text)
95
+ tokens = process_text(text, lang_code, nlp)
96
+
97
+ # Identificar conceptos clave
98
+ key_concepts = identify_key_concepts(doc, stopwords=get_custom_stopwords(lang_code))
99
+
100
+ # Crear y visualizar grafo de conceptos
101
  concept_graph = create_concept_graph(doc, key_concepts)
102
  concept_graph_fig = visualize_concept_graph(concept_graph, lang_code)
103
 
104
+ # Convertir figura a bytes
105
  concept_graph_bytes = fig_to_bytes(concept_graph_fig)
106
 
 
107
  logger.info("Semantic analysis completed successfully")
108
  return {
109
+ 'success': True,
110
  'key_concepts': key_concepts,
111
  'concept_graph': concept_graph_bytes,
112
  }
113
  except Exception as e:
114
  logger.error(f"Error in perform_semantic_analysis: {str(e)}")
115
+ return {
116
+ 'success': False,
117
+ 'error': str(e)
118
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ def identify_key_concepts(doc, stopwords, min_freq=2, min_length=3):
121
  """
122
  Identifica conceptos clave en el texto.
 
 
 
 
 
 
123
  """
124
  try:
 
 
 
 
125
  word_freq = Counter()
126
 
127
  for token in doc:
 
133
 
134
  word_freq[token.lemma_.lower()] += 1
135
 
 
136
  concepts = [(word, freq) for word, freq in word_freq.items()
137
  if freq >= min_freq]
 
 
138
  concepts.sort(key=lambda x: x[1], reverse=True)
139
 
140
+ logger.info(f"Identified {len(concepts)} key concepts")
141
+ return concepts[:10]
142
 
143
  except Exception as e:
144
  logger.error(f"Error en identify_key_concepts: {str(e)}")
145
+ return []
146
 
147
+ def fig_to_bytes(fig):
148
+ buf = io.BytesIO()
149
+ fig.savefig(buf, format='png')
150
+ buf.seek(0)
151
+ return buf.getvalue()
152
+
153
+
154
+ def fig_to_html(fig):
155
+ buf = io.BytesIO()
156
+ fig.savefig(buf, format='png')
157
+ buf.seek(0)
158
+ img_str = base64.b64encode(buf.getvalue()).decode()
159
+ return f'<img src="data:image/png;base64,{img_str}" />'
160
 
161
  def create_concept_graph(doc, key_concepts):
162
  """