AIdeaText commited on
Commit
bd62b3e
verified
1 Parent(s): ecd71cf

Update modules/text_analysis/semantic_analysis.py

Browse files
modules/text_analysis/semantic_analysis.py CHANGED
@@ -6,8 +6,6 @@ import matplotlib.pyplot as plt
6
  from collections import Counter
7
  from collections import defaultdict
8
 
9
- # Remove the global nlp model loading
10
-
11
  # Define colors for grammatical categories
12
  POS_COLORS = {
13
  'ADJ': '#FFA07A', # Light Salmon
@@ -215,9 +213,39 @@ def visualize_semantic_relations(doc, lang):
215
 
216
  return fig
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  ############################################################################################################################################
219
  def perform_semantic_analysis(text, nlp, lang):
220
  doc = nlp(text)
 
 
 
 
 
 
221
 
222
  # Imprimir entidades para depuraci贸n
223
  print(f"Entidades encontradas ({lang}):")
@@ -225,6 +253,10 @@ def perform_semantic_analysis(text, nlp, lang):
225
  print(f"{ent.text} - {ent.label_}")
226
 
227
  relations_graph = visualize_semantic_relations(doc, lang)
228
- return relations_graph # Ahora solo devuelve un 煤nico gr谩fico
 
 
 
 
229
 
230
- __all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS']
 
6
  from collections import Counter
7
  from collections import defaultdict
8
 
 
 
9
  # Define colors for grammatical categories
10
  POS_COLORS = {
11
  'ADJ': '#FFA07A', # Light Salmon
 
213
 
214
  return fig
215
 
216
+ ############################################################################################################################################
217
+ def identify_and_contextualize_entities(doc, lang):
218
+ entities = []
219
+ for ent in doc.ents:
220
+ # Obtener el contexto (3 palabras antes y despu茅s de la entidad)
221
+ start = max(0, ent.start - 3)
222
+ end = min(len(doc), ent.end + 3)
223
+ context = doc[start:end].text
224
+
225
+ entities.append({
226
+ 'text': ent.text,
227
+ 'label': ent.label_,
228
+ 'start': ent.start,
229
+ 'end': ent.end,
230
+ 'context': context
231
+ })
232
+
233
+ # Identificar conceptos clave (usando sustantivos y verbos m谩s frecuentes)
234
+ word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
235
+ key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
236
+
237
+ return entities, key_concepts
238
+
239
+
240
  ############################################################################################################################################
241
  def perform_semantic_analysis(text, nlp, lang):
242
  doc = nlp(text)
243
+
244
+ # Identificar entidades y conceptos clave
245
+ entities, key_concepts = identify_and_contextualize_entities(doc, lang)
246
+
247
+ # Visualizar relaciones sem谩nticas
248
+ relations_graph = visualize_semantic_relations(doc, lang)
249
 
250
  # Imprimir entidades para depuraci贸n
251
  print(f"Entidades encontradas ({lang}):")
 
253
  print(f"{ent.text} - {ent.label_}")
254
 
255
  relations_graph = visualize_semantic_relations(doc, lang)
256
+ return {
257
+ 'entities': entities,
258
+ 'key_concepts': key_concepts,
259
+ 'relations_graph': relations_graph
260
+ }
261
 
262
+ __all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS', 'identify_and_contextualize_entities']