AIdeaText commited on
Commit
3811fd3
·
verified ·
2 Parent(s): e74c829 c9dcd5b

Merge branch #AIdeaText/test' into 'AIdeaText/test2'

Browse files
app.py CHANGED
@@ -18,7 +18,6 @@ from modules.auth.auth import authenticate_user, register_user
18
  from modules.admin.admin_ui import admin_page
19
 
20
  from modules.ui.ui import (
21
- main,
22
  login_register_page,
23
  login_form,
24
  display_morphosyntax_analysis_interface,
 
18
  from modules.admin.admin_ui import admin_page
19
 
20
  from modules.ui.ui import (
 
21
  login_register_page,
22
  login_form,
23
  display_morphosyntax_analysis_interface,
modules/__init__.py CHANGED
@@ -91,14 +91,17 @@ def morpho_analysis_functions():
91
 
92
  def semantic_analysis_text_functions():
93
  from modules.analysis_text.semantic_analysis import (
94
- visualize_semantic_relations,
95
  perform_semantic_analysis,
96
- create_semantic_graph
 
97
  )
98
  return {
99
- 'visualize_semantic_relations': visualize_semantic_relations,
100
  'perform_semantic_analysis': perform_semantic_analysis,
101
- 'create_semantic_graph': create_semantic_graph
 
 
102
  }
103
 
104
  def discourse_analysis_text_functions():
 
91
 
92
  def semantic_analysis_text_functions():
93
  from modules.analysis_text.semantic_analysis import (
94
+ #visualize_semantic_relations,
95
  perform_semantic_analysis,
96
+ create_semantic_graph,
97
+ visualize_concept_graph,
98
  )
99
  return {
100
+ #'visualize_semantic_relations': visualize_semantic_relations,
101
  'perform_semantic_analysis': perform_semantic_analysis,
102
+ 'create_semantic_graph': create_semantic_graph,
103
+ 'create_concept_graph': create_concept_graph,
104
+ 'visualize_concept_graph': visualize_concept_graph,
105
  }
106
 
107
  def discourse_analysis_text_functions():
modules/database/database.py CHANGED
@@ -256,20 +256,26 @@ def store_semantic_result(username, text, analysis_result):
256
  if analysis_collection is None:
257
  logger.error("La conexión a MongoDB no está inicializada")
258
  return False
 
259
  try:
 
260
  buf = io.BytesIO()
261
  analysis_result['relations_graph'].savefig(buf, format='png')
262
  buf.seek(0)
263
  img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
 
 
 
 
264
  analysis_document = {
265
  'username': username,
266
  'timestamp': datetime.utcnow(),
267
  'text': text,
268
- 'entities': analysis_result['entities'],
269
- 'key_concepts': analysis_result['key_concepts'],
270
- 'network_diagram': img_str, # Cambiado de 'relations_graph' a 'network_diagram'
271
  'analysis_type': 'semantic'
272
  }
 
273
  result = analysis_collection.insert_one(analysis_document)
274
  logger.info(f"Análisis semántico guardado con ID: {result.inserted_id} para el usuario: {username}")
275
  logger.info(f"Longitud de la imagen guardada: {len(img_str)}")
@@ -280,19 +286,19 @@ def store_semantic_result(username, text, analysis_result):
280
 
281
  ###############################################################################################################
282
 
283
- def store_discourse_analysis_result(username, text1, text2, graph1, graph2):
284
  try:
285
  # Crear una nueva figura combinada
286
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
287
 
288
- # Añadir la primera imagen con título
289
- ax1.imshow(graph1.get_figure().canvas.renderer.buffer_rgba())
290
- ax1.set_title("Documento Patrón: Relaciones semánticas relevantes")
291
  ax1.axis('off')
292
 
293
- # Añadir la segunda imagen con título
294
- ax2.imshow(graph2.get_figure().canvas.renderer.buffer_rgba())
295
- ax2.set_title("Documento Comparado con el documento patrón: Relaciones semánticas relevantes")
296
  ax2.axis('off')
297
 
298
  # Ajustar el diseño
@@ -306,8 +312,12 @@ def store_discourse_analysis_result(username, text1, text2, graph1, graph2):
306
 
307
  # Cerrar las figuras para liberar memoria
308
  plt.close(fig)
309
- plt.close(graph1.get_figure())
310
- plt.close(graph2.get_figure())
 
 
 
 
311
 
312
  analysis_document = {
313
  'username': username,
@@ -315,11 +325,12 @@ def store_discourse_analysis_result(username, text1, text2, graph1, graph2):
315
  'text1': text1,
316
  'text2': text2,
317
  'combined_graph': img_str,
 
 
318
  'analysis_type': 'discourse'
319
  }
320
 
321
  result = analysis_collection.insert_one(analysis_document)
322
-
323
  logger.info(f"Análisis discursivo guardado con ID: {result.inserted_id} para el usuario: {username}")
324
  return True
325
  except Exception as e:
 
256
  if analysis_collection is None:
257
  logger.error("La conexión a MongoDB no está inicializada")
258
  return False
259
+
260
  try:
261
+ # Convertir el gráfico a imagen base64
262
  buf = io.BytesIO()
263
  analysis_result['relations_graph'].savefig(buf, format='png')
264
  buf.seek(0)
265
  img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
266
+
267
+ # Convertir los conceptos clave a una lista de tuplas
268
+ key_concepts = [(concept, float(frequency)) for concept, frequency in analysis_result['key_concepts']]
269
+
270
  analysis_document = {
271
  'username': username,
272
  'timestamp': datetime.utcnow(),
273
  'text': text,
274
+ 'key_concepts': key_concepts,
275
+ 'network_diagram': img_str,
 
276
  'analysis_type': 'semantic'
277
  }
278
+
279
  result = analysis_collection.insert_one(analysis_document)
280
  logger.info(f"Análisis semántico guardado con ID: {result.inserted_id} para el usuario: {username}")
281
  logger.info(f"Longitud de la imagen guardada: {len(img_str)}")
 
286
 
287
  ###############################################################################################################
288
 
289
+ def store_discourse_analysis_result(username, text1, text2, analysis_result):
290
  try:
291
  # Crear una nueva figura combinada
292
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
293
 
294
+ # Añadir la primera imagen
295
+ ax1.imshow(analysis_result['graph1'].canvas.renderer.buffer_rgba())
296
+ ax1.set_title("Documento 1: Relaciones Conceptuales")
297
  ax1.axis('off')
298
 
299
+ # Añadir la segunda imagen
300
+ ax2.imshow(analysis_result['graph2'].canvas.renderer.buffer_rgba())
301
+ ax2.set_title("Documento 2: Relaciones Conceptuales")
302
  ax2.axis('off')
303
 
304
  # Ajustar el diseño
 
312
 
313
  # Cerrar las figuras para liberar memoria
314
  plt.close(fig)
315
+ plt.close(analysis_result['graph1'])
316
+ plt.close(analysis_result['graph2'])
317
+
318
+ # Convertir los conceptos clave a listas de tuplas
319
+ key_concepts1 = [(concept, float(frequency)) for concept, frequency in analysis_result['table1'].values.tolist()]
320
+ key_concepts2 = [(concept, float(frequency)) for concept, frequency in analysis_result['table2'].values.tolist()]
321
 
322
  analysis_document = {
323
  'username': username,
 
325
  'text1': text1,
326
  'text2': text2,
327
  'combined_graph': img_str,
328
+ 'key_concepts1': key_concepts1,
329
+ 'key_concepts2': key_concepts2,
330
  'analysis_type': 'discourse'
331
  }
332
 
333
  result = analysis_collection.insert_one(analysis_document)
 
334
  logger.info(f"Análisis discursivo guardado con ID: {result.inserted_id} para el usuario: {username}")
335
  return True
336
  except Exception as e:
modules/text_analysis/discourse_analysis.py CHANGED
@@ -2,53 +2,88 @@ import streamlit as st
2
  import spacy
3
  import networkx as nx
4
  import matplotlib.pyplot as plt
5
- from collections import defaultdict
6
- from .semantic_analysis import visualize_semantic_relations, create_semantic_graph, POS_COLORS, POS_TRANSLATIONS
 
 
 
 
 
 
 
7
 
8
- ##################################################################################################################
9
  def compare_semantic_analysis(text1, text2, nlp, lang):
10
  doc1 = nlp(text1)
11
  doc2 = nlp(text2)
12
-
13
- G1, pos_counts1 = create_semantic_graph(doc1, lang)
14
- G2, pos_counts2 = create_semantic_graph(doc2, lang)
15
-
16
- # Create two separate figures with a smaller size
17
- fig1, ax1 = plt.subplots(figsize=(18, 13))
18
- fig2, ax2 = plt.subplots(figsize=(18, 13))
19
-
20
- # Draw the first graph
21
- pos1 = nx.spring_layout(G1, k=0.7, iterations=50)
22
- nx.draw(G1, pos1, ax=ax1, node_color=[POS_COLORS.get(G1.nodes[node]['pos'], '#CCCCCC') for node in G1.nodes()],
23
- with_labels=True, node_size=4000, font_size=10, font_weight='bold',
24
- arrows=True, arrowsize=20, width=2, edge_color='gray')
25
- nx.draw_networkx_edge_labels(G1, pos1, edge_labels=nx.get_edge_attributes(G1, 'label'), font_size=8, ax=ax1)
26
-
27
- # Draw the second graph
28
- pos2 = nx.spring_layout(G2, k=0.7, iterations=50)
29
- nx.draw(G2, pos2, ax=ax2, node_color=[POS_COLORS.get(G2.nodes[node]['pos'], '#CCCCCC') for node in G2.nodes()],
30
- with_labels=True, node_size=4000, font_size=10, font_weight='bold',
31
- arrows=True, arrowsize=20, width=2, edge_color='gray')
32
- nx.draw_networkx_edge_labels(G2, pos2, edge_labels=nx.get_edge_attributes(G2, 'label'), font_size=8, ax=ax2)
33
-
34
- ax1.set_title("Documento 1: Relaciones Semánticas Relevantes", fontsize=14, fontweight='bold')
35
- ax2.set_title("Documento 2: Relaciones Semánticas Relevantes", fontsize=14, fontweight='bold')
36
-
37
- ax1.axis('off')
38
- ax2.axis('off')
39
-
40
- # Add legends
41
- legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
42
- label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
43
- for pos in ['NOUN', 'VERB']]
44
- ax1.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(0, 1), fontsize=8)
45
- ax2.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(0, 1), fontsize=8)
46
-
47
- plt.tight_layout()
48
-
49
- return fig1, fig2
50
-
51
- ##################################################################################################################
52
  def perform_discourse_analysis(text1, text2, nlp, lang):
53
- graph1, graph2 = compare_semantic_analysis(text1, text2, nlp, lang)
54
- return graph1, graph2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import spacy
3
  import networkx as nx
4
  import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+ from .semantic_analysis import (
7
+ create_concept_graph,
8
+ visualize_concept_graph,
9
+ identify_key_concepts,
10
+ POS_COLORS,
11
+ POS_TRANSLATIONS,
12
+ ENTITY_LABELS
13
+ )
14
 
 
15
  def compare_semantic_analysis(text1, text2, nlp, lang):
16
  doc1 = nlp(text1)
17
  doc2 = nlp(text2)
18
+
19
+ # Identificar conceptos clave para ambos documentos
20
+ key_concepts1 = identify_key_concepts(doc1)
21
+ key_concepts2 = identify_key_concepts(doc2)
22
+
23
+ # Crear grafos de conceptos para ambos documentos
24
+ G1 = create_concept_graph(doc1, key_concepts1)
25
+ G2 = create_concept_graph(doc2, key_concepts2)
26
+
27
+ # Visualizar los grafos de conceptos
28
+ fig1 = visualize_concept_graph(G1, lang)
29
+ fig2 = visualize_concept_graph(G2, lang)
30
+
31
+ # Remover los títulos superpuestos
32
+ fig1.suptitle("")
33
+ fig2.suptitle("")
34
+
35
+ return fig1, fig2, key_concepts1, key_concepts2
36
+
37
+ def create_concept_table(key_concepts):
38
+ df = pd.DataFrame(key_concepts, columns=['Concepto', 'Frecuencia'])
39
+ df['Frecuencia'] = df['Frecuencia'].round(2)
40
+ return df
41
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def perform_discourse_analysis(text1, text2, nlp, lang):
43
+ graph1, graph2, key_concepts1, key_concepts2 = compare_semantic_analysis(text1, text2, nlp, lang)
44
+
45
+ # Crear tablas de conceptos clave
46
+ table1 = create_concept_table(key_concepts1)
47
+ table2 = create_concept_table(key_concepts2)
48
+
49
+ return {
50
+ 'graph1': graph1,
51
+ 'graph2': graph2,
52
+ 'table1': table1,
53
+ 'table2': table2
54
+ }
55
+
56
+ def display_discourse_analysis_results(analysis_result, lang_code):
57
+ translations = {
58
+ 'es': {
59
+ 'doc1_title': "Documento 1: Relaciones Conceptuales",
60
+ 'doc2_title': "Documento 2: Relaciones Conceptuales",
61
+ 'key_concepts': "Conceptos Clave",
62
+ },
63
+ 'en': {
64
+ 'doc1_title': "Document 1: Conceptual Relations",
65
+ 'doc2_title': "Document 2: Conceptual Relations",
66
+ 'key_concepts': "Key Concepts",
67
+ },
68
+ 'fr': {
69
+ 'doc1_title': "Document 1 : Relations Conceptuelles",
70
+ 'doc2_title': "Document 2 : Relations Conceptuelles",
71
+ 'key_concepts': "Concepts Clés",
72
+ }
73
+ }
74
+
75
+ t = translations[lang_code]
76
+
77
+ col1, col2 = st.columns(2)
78
+
79
+ with col1:
80
+ with st.expander(t['doc1_title'], expanded=True):
81
+ st.pyplot(analysis_result['graph1'])
82
+ st.subheader(t['key_concepts'])
83
+ st.table(analysis_result['table1'])
84
+
85
+ with col2:
86
+ with st.expander(t['doc2_title'], expanded=True):
87
+ st.pyplot(analysis_result['graph2'])
88
+ st.subheader(t['key_concepts'])
89
+ st.table(analysis_result['table2'])
modules/text_analysis/semantic_analysis.py CHANGED
@@ -3,260 +3,125 @@ import streamlit as st
3
  import spacy
4
  import networkx as nx
5
  import matplotlib.pyplot as plt
6
- from collections import Counter
7
- from collections import defaultdict
 
8
 
9
  # Define colors for grammatical categories
10
  POS_COLORS = {
11
- 'ADJ': '#FFA07A', # Light Salmon
12
- 'ADP': '#98FB98', # Pale Green
13
- 'ADV': '#87CEFA', # Light Sky Blue
14
- 'AUX': '#DDA0DD', # Plum
15
- 'CCONJ': '#F0E68C', # Khaki
16
- 'DET': '#FFB6C1', # Light Pink
17
- 'INTJ': '#FF6347', # Tomato
18
- 'NOUN': '#90EE90', # Light Green
19
- 'NUM': '#FAFAD2', # Light Goldenrod Yellow
20
- 'PART': '#D3D3D3', # Light Gray
21
- 'PRON': '#FFA500', # Orange
22
- 'PROPN': '#20B2AA', # Light Sea Green
23
- 'SCONJ': '#DEB887', # Burlywood
24
- 'SYM': '#7B68EE', # Medium Slate Blue
25
- 'VERB': '#FF69B4', # Hot Pink
26
- 'X': '#A9A9A9', # Dark Gray
27
  }
28
 
29
  POS_TRANSLATIONS = {
30
  'es': {
31
- 'ADJ': 'Adjetivo',
32
- 'ADP': 'Preposición',
33
- 'ADV': 'Adverbio',
34
- 'AUX': 'Auxiliar',
35
- 'CCONJ': 'Conjunción Coordinante',
36
- 'DET': 'Determinante',
37
- 'INTJ': 'Interjección',
38
- 'NOUN': 'Sustantivo',
39
- 'NUM': 'Número',
40
- 'PART': 'Partícula',
41
- 'PRON': 'Pronombre',
42
- 'PROPN': 'Nombre Propio',
43
- 'SCONJ': 'Conjunción Subordinante',
44
- 'SYM': 'Símbolo',
45
- 'VERB': 'Verbo',
46
- 'X': 'Otro',
47
  },
48
  'en': {
49
- 'ADJ': 'Adjective',
50
- 'ADP': 'Preposition',
51
- 'ADV': 'Adverb',
52
- 'AUX': 'Auxiliary',
53
- 'CCONJ': 'Coordinating Conjunction',
54
- 'DET': 'Determiner',
55
- 'INTJ': 'Interjection',
56
- 'NOUN': 'Noun',
57
- 'NUM': 'Number',
58
- 'PART': 'Particle',
59
- 'PRON': 'Pronoun',
60
- 'PROPN': 'Proper Noun',
61
- 'SCONJ': 'Subordinating Conjunction',
62
- 'SYM': 'Symbol',
63
- 'VERB': 'Verb',
64
- 'X': 'Other',
65
  },
66
  'fr': {
67
- 'ADJ': 'Adjectif',
68
- 'ADP': 'Préposition',
69
- 'ADV': 'Adverbe',
70
- 'AUX': 'Auxiliaire',
71
- 'CCONJ': 'Conjonction de Coordination',
72
- 'DET': 'Déterminant',
73
- 'INTJ': 'Interjection',
74
- 'NOUN': 'Nom',
75
- 'NUM': 'Nombre',
76
- 'PART': 'Particule',
77
- 'PRON': 'Pronom',
78
- 'PROPN': 'Nom Propre',
79
- 'SCONJ': 'Conjonction de Subordination',
80
- 'SYM': 'Symbole',
81
- 'VERB': 'Verbe',
82
- 'X': 'Autre',
83
  }
84
  }
85
- ########################################################################################################################################
86
 
87
- # Definimos las etiquetas y colores para cada idioma
88
  ENTITY_LABELS = {
89
  'es': {
90
  "Personas": "lightblue",
91
- "Conceptos": "lightgreen",
92
  "Lugares": "lightcoral",
93
- "Fechas": "lightyellow"
 
 
94
  },
95
  'en': {
96
  "People": "lightblue",
97
- "Concepts": "lightgreen",
98
  "Places": "lightcoral",
99
- "Dates": "lightyellow"
 
 
100
  },
101
  'fr': {
102
  "Personnes": "lightblue",
103
- "Concepts": "lightgreen",
104
  "Lieux": "lightcoral",
105
- "Dates": "lightyellow"
 
 
106
  }
107
  }
108
 
109
- #########################################################################################################
110
- def count_pos(doc):
111
- return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
 
112
 
113
- #####################################################################################################################
114
-
115
- def create_semantic_graph(doc, lang):
116
- G = nx.Graph()
117
- word_freq = defaultdict(int)
118
- lemma_to_word = {}
119
- lemma_to_pos = {}
120
-
121
- # Count frequencies of lemmas and map lemmas to their most common word form and POS
122
- for token in doc:
123
- if token.pos_ in ['NOUN', 'VERB']:
124
- lemma = token.lemma_.lower()
125
- word_freq[lemma] += 1
126
- if lemma not in lemma_to_word or token.text.lower() == lemma:
127
- lemma_to_word[lemma] = token.text
128
- lemma_to_pos[lemma] = token.pos_
129
-
130
- # Get top 20 most frequent lemmas
131
- top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
132
-
133
- # Add nodes
134
- for lemma in top_lemmas:
135
- word = lemma_to_word[lemma]
136
- G.add_node(word, pos=lemma_to_pos[lemma])
137
-
138
- # Add edges
139
- for token in doc:
140
- if token.lemma_.lower() in top_lemmas:
141
- if token.head.lemma_.lower() in top_lemmas:
142
- source = lemma_to_word[token.lemma_.lower()]
143
- target = lemma_to_word[token.head.lemma_.lower()]
144
- if source != target: # Avoid self-loops
145
- G.add_edge(source, target, label=token.dep_)
146
-
147
- return G, word_freq
148
-
149
- ############################################################################################################################################
150
-
151
- def visualize_semantic_relations(doc, lang):
152
  G = nx.Graph()
153
- word_freq = defaultdict(int)
154
- lemma_to_word = {}
155
- lemma_to_pos = {}
156
-
157
- # Count frequencies of lemmas and map lemmas to their most common word form and POS
158
- for token in doc:
159
- if token.pos_ in ['NOUN', 'VERB']:
160
- lemma = token.lemma_.lower()
161
- word_freq[lemma] += 1
162
- if lemma not in lemma_to_word or token.text.lower() == lemma:
163
- lemma_to_word[lemma] = token.text
164
- lemma_to_pos[lemma] = token.pos_
165
-
166
- # Get top 20 most frequent lemmas
167
- top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
168
-
169
- # Add nodes
170
- for lemma in top_lemmas:
171
- word = lemma_to_word[lemma]
172
- G.add_node(word, pos=lemma_to_pos[lemma])
173
-
174
- # Add edges
175
- for token in doc:
176
- if token.lemma_.lower() in top_lemmas:
177
- if token.head.lemma_.lower() in top_lemmas:
178
- source = lemma_to_word[token.lemma_.lower()]
179
- target = lemma_to_word[token.head.lemma_.lower()]
180
- if source != target: # Avoid self-loops
181
- G.add_edge(source, target, label=token.dep_)
182
-
183
- fig, ax = plt.subplots(figsize=(36, 27))
184
- pos = nx.spring_layout(G, k=0.7, iterations=50)
185
-
186
- node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
187
-
188
- nx.draw(G, pos, node_color=node_colors, with_labels=True,
189
- node_size=10000,
190
- font_size=16,
191
- font_weight='bold',
192
- arrows=True,
193
- arrowsize=30,
194
- width=3,
195
- edge_color='gray',
196
- ax=ax)
197
-
198
- edge_labels = nx.get_edge_attributes(G, 'label')
199
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
200
 
 
 
 
 
 
 
 
 
 
 
 
201
  title = {
202
- 'es': "Relaciones Semánticas Relevantes",
203
- 'en': "Relevant Semantic Relations",
204
- 'fr': "Relations Sémantiques Pertinentes"
205
  }
206
- ax.set_title(title[lang], fontsize=24, fontweight='bold')
207
  ax.axis('off')
208
-
209
- legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
210
- label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
211
- for pos in ['NOUN', 'VERB']]
212
- ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
213
-
214
- return fig
215
-
216
- ############################################################################################################################################
217
- def identify_and_contextualize_entities(doc, lang):
218
- entities = []
219
- for ent in doc.ents:
220
- # Obtener el contexto (3 palabras antes y después de la entidad)
221
- start = max(0, ent.start - 3)
222
- end = min(len(doc), ent.end + 3)
223
- context = doc[start:end].text
224
-
225
- entities.append({
226
- 'text': ent.text,
227
- 'label': ent.label_,
228
- 'start': ent.start,
229
- 'end': ent.end,
230
- 'context': context
231
- })
232
 
233
- # Identificar conceptos clave (usando sustantivos y verbos más frecuentes)
234
- word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
235
- key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
236
-
237
- return entities, key_concepts
238
-
239
 
240
- ############################################################################################################################################
241
  def perform_semantic_analysis(text, nlp, lang):
242
  doc = nlp(text)
243
-
244
- # Identificar entidades y conceptos clave
245
- entities, key_concepts = identify_and_contextualize_entities(doc, lang)
246
-
247
- # Visualizar relaciones semánticas
248
- relations_graph = visualize_semantic_relations(doc, lang)
249
 
250
- # Imprimir entidades para depuración
251
- print(f"Entidades encontradas ({lang}):")
252
- for ent in doc.ents:
253
- print(f"{ent.text} - {ent.label_}")
 
 
254
 
255
- relations_graph = visualize_semantic_relations(doc, lang)
256
  return {
257
- 'entities': entities,
258
  'key_concepts': key_concepts,
259
  'relations_graph': relations_graph
260
  }
261
 
262
- __all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS', 'identify_and_contextualize_entities']
 
3
  import spacy
4
  import networkx as nx
5
  import matplotlib.pyplot as plt
6
+ from collections import Counter, defaultdict
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
 
10
  # Define colors for grammatical categories
11
  POS_COLORS = {
12
+ 'ADJ': '#FFA07A', 'ADP': '#98FB98', 'ADV': '#87CEFA', 'AUX': '#DDA0DD',
13
+ 'CCONJ': '#F0E68C', 'DET': '#FFB6C1', 'INTJ': '#FF6347', 'NOUN': '#90EE90',
14
+ 'NUM': '#FAFAD2', 'PART': '#D3D3D3', 'PRON': '#FFA500', 'PROPN': '#20B2AA',
15
+ 'SCONJ': '#DEB887', 'SYM': '#7B68EE', 'VERB': '#FF69B4', 'X': '#A9A9A9',
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
 
18
  POS_TRANSLATIONS = {
19
  'es': {
20
+ 'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar',
21
+ 'CCONJ': 'Conjunción Coordinante', 'DET': 'Determinante', 'INTJ': 'Interjección',
22
+ 'NOUN': 'Sustantivo', 'NUM': 'Número', 'PART': 'Partícula', 'PRON': 'Pronombre',
23
+ 'PROPN': 'Nombre Propio', 'SCONJ': 'Conjunción Subordinante', 'SYM': 'Símbolo',
24
+ 'VERB': 'Verbo', 'X': 'Otro',
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  'en': {
27
+ 'ADJ': 'Adjective', 'ADP': 'Preposition', 'ADV': 'Adverb', 'AUX': 'Auxiliary',
28
+ 'CCONJ': 'Coordinating Conjunction', 'DET': 'Determiner', 'INTJ': 'Interjection',
29
+ 'NOUN': 'Noun', 'NUM': 'Number', 'PART': 'Particle', 'PRON': 'Pronoun',
30
+ 'PROPN': 'Proper Noun', 'SCONJ': 'Subordinating Conjunction', 'SYM': 'Symbol',
31
+ 'VERB': 'Verb', 'X': 'Other',
 
 
 
 
 
 
 
 
 
 
 
32
  },
33
  'fr': {
34
+ 'ADJ': 'Adjectif', 'ADP': 'Préposition', 'ADV': 'Adverbe', 'AUX': 'Auxiliaire',
35
+ 'CCONJ': 'Conjonction de Coordination', 'DET': 'Déterminant', 'INTJ': 'Interjection',
36
+ 'NOUN': 'Nom', 'NUM': 'Nombre', 'PART': 'Particule', 'PRON': 'Pronom',
37
+ 'PROPN': 'Nom Propre', 'SCONJ': 'Conjonction de Subordination', 'SYM': 'Symbole',
38
+ 'VERB': 'Verbe', 'X': 'Autre',
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  }
 
41
 
 
42
  ENTITY_LABELS = {
43
  'es': {
44
  "Personas": "lightblue",
 
45
  "Lugares": "lightcoral",
46
+ "Inventos": "lightgreen",
47
+ "Fechas": "lightyellow",
48
+ "Conceptos": "lightpink"
49
  },
50
  'en': {
51
  "People": "lightblue",
 
52
  "Places": "lightcoral",
53
+ "Inventions": "lightgreen",
54
+ "Dates": "lightyellow",
55
+ "Concepts": "lightpink"
56
  },
57
  'fr': {
58
  "Personnes": "lightblue",
 
59
  "Lieux": "lightcoral",
60
+ "Inventions": "lightgreen",
61
+ "Dates": "lightyellow",
62
+ "Concepts": "lightpink"
63
  }
64
  }
65
 
66
+ def identify_key_concepts(doc, top_n=10):
67
+ # Identificar sustantivos, verbos y adjetivos más frecuentes
68
+ word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
69
+ return word_freq.most_common(top_n)
70
 
71
+ def create_concept_graph(doc, key_concepts):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  G = nx.Graph()
73
+
74
+ # Añadir nodos
75
+ for concept, freq in key_concepts:
76
+ G.add_node(concept, weight=freq)
77
+
78
+ # Añadir aristas basadas en la co-ocurrencia en oraciones
79
+ for sent in doc.sents:
80
+ sent_concepts = [token.lemma_.lower() for token in sent if token.lemma_.lower() in dict(key_concepts)]
81
+ for i, concept1 in enumerate(sent_concepts):
82
+ for concept2 in sent_concepts[i+1:]:
83
+ if G.has_edge(concept1, concept2):
84
+ G[concept1][concept2]['weight'] += 1
85
+ else:
86
+ G.add_edge(concept1, concept2, weight=1)
87
+
88
+ return G
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ def visualize_concept_graph(G, lang):
91
+ fig, ax = plt.subplots(figsize=(12, 8))
92
+ pos = nx.spring_layout(G, k=0.5, iterations=50)
93
+
94
+ node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
95
+ nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
96
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
97
+
98
+ edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
99
+ nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.5, ax=ax)
100
+
101
  title = {
102
+ 'es': "Relaciones entre Conceptos Clave",
103
+ 'en': "Key Concept Relations",
104
+ 'fr': "Relations entre Concepts Clés"
105
  }
106
+ ax.set_title(title[lang], fontsize=16)
107
  ax.axis('off')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ plt.tight_layout()
110
+ return fig
 
 
 
 
111
 
 
112
  def perform_semantic_analysis(text, nlp, lang):
113
  doc = nlp(text)
 
 
 
 
 
 
114
 
115
+ # Identificar conceptos clave
116
+ key_concepts = identify_key_concepts(doc)
117
+
118
+ # Crear y visualizar grafo de conceptos
119
+ concept_graph = create_concept_graph(doc, key_concepts)
120
+ relations_graph = visualize_concept_graph(concept_graph, lang)
121
 
 
122
  return {
 
123
  'key_concepts': key_concepts,
124
  'relations_graph': relations_graph
125
  }
126
 
127
+ __all__ = ['perform_semantic_analysis', 'ENTITY_LABELS', 'POS_TRANSLATIONS']
modules/text_analysis/semantic_analysis_v0.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #semantic_analysis.py
2
+ import streamlit as st
3
+ import spacy
4
+ import networkx as nx
5
+ import matplotlib.pyplot as plt
6
+ from collections import Counter
7
+ from collections import defaultdict
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # Define colors for grammatical categories
12
+ POS_COLORS = {
13
+ 'ADJ': '#FFA07A', # Light Salmon
14
+ 'ADP': '#98FB98', # Pale Green
15
+ 'ADV': '#87CEFA', # Light Sky Blue
16
+ 'AUX': '#DDA0DD', # Plum
17
+ 'CCONJ': '#F0E68C', # Khaki
18
+ 'DET': '#FFB6C1', # Light Pink
19
+ 'INTJ': '#FF6347', # Tomato
20
+ 'NOUN': '#90EE90', # Light Green
21
+ 'NUM': '#FAFAD2', # Light Goldenrod Yellow
22
+ 'PART': '#D3D3D3', # Light Gray
23
+ 'PRON': '#FFA500', # Orange
24
+ 'PROPN': '#20B2AA', # Light Sea Green
25
+ 'SCONJ': '#DEB887', # Burlywood
26
+ 'SYM': '#7B68EE', # Medium Slate Blue
27
+ 'VERB': '#FF69B4', # Hot Pink
28
+ 'X': '#A9A9A9', # Dark Gray
29
+ }
30
+
31
+ POS_TRANSLATIONS = {
32
+ 'es': {
33
+ 'ADJ': 'Adjetivo',
34
+ 'ADP': 'Preposición',
35
+ 'ADV': 'Adverbio',
36
+ 'AUX': 'Auxiliar',
37
+ 'CCONJ': 'Conjunción Coordinante',
38
+ 'DET': 'Determinante',
39
+ 'INTJ': 'Interjección',
40
+ 'NOUN': 'Sustantivo',
41
+ 'NUM': 'Número',
42
+ 'PART': 'Partícula',
43
+ 'PRON': 'Pronombre',
44
+ 'PROPN': 'Nombre Propio',
45
+ 'SCONJ': 'Conjunción Subordinante',
46
+ 'SYM': 'Símbolo',
47
+ 'VERB': 'Verbo',
48
+ 'X': 'Otro',
49
+ },
50
+ 'en': {
51
+ 'ADJ': 'Adjective',
52
+ 'ADP': 'Preposition',
53
+ 'ADV': 'Adverb',
54
+ 'AUX': 'Auxiliary',
55
+ 'CCONJ': 'Coordinating Conjunction',
56
+ 'DET': 'Determiner',
57
+ 'INTJ': 'Interjection',
58
+ 'NOUN': 'Noun',
59
+ 'NUM': 'Number',
60
+ 'PART': 'Particle',
61
+ 'PRON': 'Pronoun',
62
+ 'PROPN': 'Proper Noun',
63
+ 'SCONJ': 'Subordinating Conjunction',
64
+ 'SYM': 'Symbol',
65
+ 'VERB': 'Verb',
66
+ 'X': 'Other',
67
+ },
68
+ 'fr': {
69
+ 'ADJ': 'Adjectif',
70
+ 'ADP': 'Préposition',
71
+ 'ADV': 'Adverbe',
72
+ 'AUX': 'Auxiliaire',
73
+ 'CCONJ': 'Conjonction de Coordination',
74
+ 'DET': 'Déterminant',
75
+ 'INTJ': 'Interjection',
76
+ 'NOUN': 'Nom',
77
+ 'NUM': 'Nombre',
78
+ 'PART': 'Particule',
79
+ 'PRON': 'Pronom',
80
+ 'PROPN': 'Nom Propre',
81
+ 'SCONJ': 'Conjonction de Subordination',
82
+ 'SYM': 'Symbole',
83
+ 'VERB': 'Verbe',
84
+ 'X': 'Autre',
85
+ }
86
+ }
87
+ ########################################################################################################################################
88
+
89
+ # Definimos las etiquetas y colores para cada idioma
90
+ ENTITY_LABELS = {
91
+ 'es': {
92
+ "Personas": "lightblue",
93
+ "Conceptos": "lightgreen",
94
+ "Lugares": "lightcoral",
95
+ "Fechas": "lightyellow"
96
+ },
97
+ 'en': {
98
+ "People": "lightblue",
99
+ "Concepts": "lightgreen",
100
+ "Places": "lightcoral",
101
+ "Dates": "lightyellow"
102
+ },
103
+ 'fr': {
104
+ "Personnes": "lightblue",
105
+ "Concepts": "lightgreen",
106
+ "Lieux": "lightcoral",
107
+ "Dates": "lightyellow"
108
+ }
109
+ }
110
+
111
+ #########################################################################################################
112
+ def count_pos(doc):
113
+ return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
114
+
115
+ #####################################################################################################################
116
+
117
+ def create_semantic_graph(doc, lang):
118
+ G = nx.Graph()
119
+ word_freq = defaultdict(int)
120
+ lemma_to_word = {}
121
+ lemma_to_pos = {}
122
+
123
+ # Count frequencies of lemmas and map lemmas to their most common word form and POS
124
+ for token in doc:
125
+ if token.pos_ in ['NOUN', 'VERB']:
126
+ lemma = token.lemma_.lower()
127
+ word_freq[lemma] += 1
128
+ if lemma not in lemma_to_word or token.text.lower() == lemma:
129
+ lemma_to_word[lemma] = token.text
130
+ lemma_to_pos[lemma] = token.pos_
131
+
132
+ # Get top 20 most frequent lemmas
133
+ top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
134
+
135
+ # Add nodes
136
+ for lemma in top_lemmas:
137
+ word = lemma_to_word[lemma]
138
+ G.add_node(word, pos=lemma_to_pos[lemma])
139
+
140
+ # Add edges
141
+ for token in doc:
142
+ if token.lemma_.lower() in top_lemmas:
143
+ if token.head.lemma_.lower() in top_lemmas:
144
+ source = lemma_to_word[token.lemma_.lower()]
145
+ target = lemma_to_word[token.head.lemma_.lower()]
146
+ if source != target: # Avoid self-loops
147
+ G.add_edge(source, target, label=token.dep_)
148
+
149
+ return G, word_freq
150
+
151
+ ############################################################################################################################################
152
+
153
+ def visualize_semantic_relations(doc, lang):
154
+ G = nx.Graph()
155
+ word_freq = defaultdict(int)
156
+ lemma_to_word = {}
157
+ lemma_to_pos = {}
158
+
159
+ # Count frequencies of lemmas and map lemmas to their most common word form and POS
160
+ for token in doc:
161
+ if token.pos_ in ['NOUN', 'VERB']:
162
+ lemma = token.lemma_.lower()
163
+ word_freq[lemma] += 1
164
+ if lemma not in lemma_to_word or token.text.lower() == lemma:
165
+ lemma_to_word[lemma] = token.text
166
+ lemma_to_pos[lemma] = token.pos_
167
+
168
+ # Get top 20 most frequent lemmas
169
+ top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
170
+
171
+ # Add nodes
172
+ for lemma in top_lemmas:
173
+ word = lemma_to_word[lemma]
174
+ G.add_node(word, pos=lemma_to_pos[lemma])
175
+
176
+ # Add edges
177
+ for token in doc:
178
+ if token.lemma_.lower() in top_lemmas:
179
+ if token.head.lemma_.lower() in top_lemmas:
180
+ source = lemma_to_word[token.lemma_.lower()]
181
+ target = lemma_to_word[token.head.lemma_.lower()]
182
+ if source != target: # Avoid self-loops
183
+ G.add_edge(source, target, label=token.dep_)
184
+
185
+ fig, ax = plt.subplots(figsize=(36, 27))
186
+ pos = nx.spring_layout(G, k=0.7, iterations=50)
187
+
188
+ node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
189
+
190
+ nx.draw(G, pos, node_color=node_colors, with_labels=True,
191
+ node_size=10000,
192
+ font_size=16,
193
+ font_weight='bold',
194
+ arrows=True,
195
+ arrowsize=30,
196
+ width=3,
197
+ edge_color='gray',
198
+ ax=ax)
199
+
200
+ edge_labels = nx.get_edge_attributes(G, 'label')
201
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
202
+
203
+ title = {
204
+ 'es': "Relaciones Semánticas Relevantes",
205
+ 'en': "Relevant Semantic Relations",
206
+ 'fr': "Relations Sémantiques Pertinentes"
207
+ }
208
+ ax.set_title(title[lang], fontsize=24, fontweight='bold')
209
+ ax.axis('off')
210
+
211
+ legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
212
+ label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
213
+ for pos in ['NOUN', 'VERB']]
214
+ ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
215
+
216
+ return fig
217
+
218
+ ############################################################################################################################################
219
+ def identify_and_contextualize_entities(doc, lang):
220
+ entities = []
221
+ for ent in doc.ents:
222
+ # Obtener el contexto (3 palabras antes y después de la entidad)
223
+ start = max(0, ent.start - 3)
224
+ end = min(len(doc), ent.end + 3)
225
+ context = doc[start:end].text
226
+
227
+ entities.append({
228
+ 'text': ent.text,
229
+ 'label': ent.label_,
230
+ 'start': ent.start,
231
+ 'end': ent.end,
232
+ 'context': context
233
+ })
234
+
235
+ # Identificar conceptos clave (usando sustantivos y verbos más frecuentes)
236
+ word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
237
+ key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
238
+
239
+ return entities, key_concepts
240
+
241
+
242
+ ############################################################################################################################################
243
+ def perform_semantic_analysis(text, nlp, lang):
244
+ doc = nlp(text)
245
+
246
+ # Identificar entidades y conceptos clave
247
+ entities, key_concepts = identify_and_contextualize_entities(doc, lang)
248
+
249
+ # Visualizar relaciones semánticas
250
+ relations_graph = visualize_semantic_relations(doc, lang)
251
+
252
+ # Imprimir entidades para depuración
253
+ print(f"Entidades encontradas ({lang}):")
254
+ for ent in doc.ents:
255
+ print(f"{ent.text} - {ent.label_}")
256
+
257
+ relations_graph = visualize_semantic_relations(doc, lang)
258
+ return {
259
+ 'entities': entities,
260
+ 'key_concepts': key_concepts,
261
+ 'relations_graph': relations_graph
262
+ }
263
+
264
+ __all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS', 'identify_and_contextualize_entities']
modules/text_analysis/semantic_analysis_v00.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #semantic_analysis.py
2
+ import streamlit as st
3
+ import spacy
4
+ import networkx as nx
5
+ import matplotlib.pyplot as plt
6
+ from collections import Counter, defaultdict
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ # Define colors for grammatical categories
11
+ POS_COLORS = {
12
+ 'ADJ': '#FFA07A', 'ADP': '#98FB98', 'ADV': '#87CEFA', 'AUX': '#DDA0DD',
13
+ 'CCONJ': '#F0E68C', 'DET': '#FFB6C1', 'INTJ': '#FF6347', 'NOUN': '#90EE90',
14
+ 'NUM': '#FAFAD2', 'PART': '#D3D3D3', 'PRON': '#FFA500', 'PROPN': '#20B2AA',
15
+ 'SCONJ': '#DEB887', 'SYM': '#7B68EE', 'VERB': '#FF69B4', 'X': '#A9A9A9',
16
+ }
17
+
18
+ POS_TRANSLATIONS = {
19
+ 'es': {
20
+ 'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar',
21
+ 'CCONJ': 'Conjunción Coordinante', 'DET': 'Determinante', 'INTJ': 'Interjección',
22
+ 'NOUN': 'Sustantivo', 'NUM': 'Número', 'PART': 'Partícula', 'PRON': 'Pronombre',
23
+ 'PROPN': 'Nombre Propio', 'SCONJ': 'Conjunción Subordinante', 'SYM': 'Símbolo',
24
+ 'VERB': 'Verbo', 'X': 'Otro',
25
+ },
26
+ 'en': {
27
+ 'ADJ': 'Adjective', 'ADP': 'Preposition', 'ADV': 'Adverb', 'AUX': 'Auxiliary',
28
+ 'CCONJ': 'Coordinating Conjunction', 'DET': 'Determiner', 'INTJ': 'Interjection',
29
+ 'NOUN': 'Noun', 'NUM': 'Number', 'PART': 'Particle', 'PRON': 'Pronoun',
30
+ 'PROPN': 'Proper Noun', 'SCONJ': 'Subordinating Conjunction', 'SYM': 'Symbol',
31
+ 'VERB': 'Verb', 'X': 'Other',
32
+ },
33
+ 'fr': {
34
+ 'ADJ': 'Adjectif', 'ADP': 'Préposition', 'ADV': 'Adverbe', 'AUX': 'Auxiliaire',
35
+ 'CCONJ': 'Conjonction de Coordination', 'DET': 'Déterminant', 'INTJ': 'Interjection',
36
+ 'NOUN': 'Nom', 'NUM': 'Nombre', 'PART': 'Particule', 'PRON': 'Pronom',
37
+ 'PROPN': 'Nom Propre', 'SCONJ': 'Conjonction de Subordination', 'SYM': 'Symbole',
38
+ 'VERB': 'Verbe', 'X': 'Autre',
39
+ }
40
+ }
41
+
42
+ ENTITY_LABELS = {
43
+ 'es': {
44
+ "Personas": "lightblue",
45
+ "Lugares": "lightcoral",
46
+ "Inventos": "lightgreen",
47
+ "Fechas": "lightyellow",
48
+ "Conceptos": "lightpink"
49
+ },
50
+ 'en': {
51
+ "People": "lightblue",
52
+ "Places": "lightcoral",
53
+ "Inventions": "lightgreen",
54
+ "Dates": "lightyellow",
55
+ "Concepts": "lightpink"
56
+ },
57
+ 'fr': {
58
+ "Personnes": "lightblue",
59
+ "Lieux": "lightcoral",
60
+ "Inventions": "lightgreen",
61
+ "Dates": "lightyellow",
62
+ "Concepts": "lightpink"
63
+ }
64
+ }
65
+
66
+ def identify_and_contextualize_entities(doc, lang):
67
+ entities = []
68
+ for ent in doc.ents:
69
+ # Obtener el contexto (3 palabras antes y después de la entidad)
70
+ start = max(0, ent.start - 3)
71
+ end = min(len(doc), ent.end + 3)
72
+ context = doc[start:end].text
73
+
74
+ # Mapear las etiquetas de spaCy a nuestras categorías
75
+ if ent.label_ in ['PERSON', 'ORG']:
76
+ category = "Personas" if lang == 'es' else "People" if lang == 'en' else "Personnes"
77
+ elif ent.label_ in ['LOC', 'GPE']:
78
+ category = "Lugares" if lang == 'es' else "Places" if lang == 'en' else "Lieux"
79
+ elif ent.label_ in ['PRODUCT']:
80
+ category = "Inventos" if lang == 'es' else "Inventions" if lang == 'en' else "Inventions"
81
+ elif ent.label_ in ['DATE', 'TIME']:
82
+ category = "Fechas" if lang == 'es' else "Dates" if lang == 'en' else "Dates"
83
+ else:
84
+ category = "Conceptos" if lang == 'es' else "Concepts" if lang == 'en' else "Concepts"
85
+
86
+ entities.append({
87
+ 'text': ent.text,
88
+ 'label': category,
89
+ 'start': ent.start,
90
+ 'end': ent.end,
91
+ 'context': context
92
+ })
93
+
94
+ # Identificar conceptos clave (usando sustantivos y verbos más frecuentes)
95
+ word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
96
+ key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
97
+
98
+ return entities, key_concepts
99
+
100
+ def create_concept_graph(text, concepts):
101
+ vectorizer = TfidfVectorizer()
102
+ tfidf_matrix = vectorizer.fit_transform([text])
103
+ concept_vectors = vectorizer.transform(concepts)
104
+ similarity_matrix = cosine_similarity(concept_vectors, concept_vectors)
105
+
106
+ G = nx.Graph()
107
+ for i, concept in enumerate(concepts):
108
+ G.add_node(concept)
109
+ for j in range(i+1, len(concepts)):
110
+ if similarity_matrix[i][j] > 0.1:
111
+ G.add_edge(concept, concepts[j], weight=similarity_matrix[i][j])
112
+
113
+ return G
114
+
115
+ def visualize_concept_graph(G, lang):
116
+ fig, ax = plt.subplots(figsize=(12, 8))
117
+ pos = nx.spring_layout(G)
118
+
119
+ nx.draw_networkx_nodes(G, pos, node_size=3000, node_color='lightblue', ax=ax)
120
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
121
+ nx.draw_networkx_edges(G, pos, width=1, ax=ax)
122
+
123
+ edge_labels = nx.get_edge_attributes(G, 'weight')
124
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8, ax=ax)
125
+
126
+ title = {
127
+ 'es': "Relaciones Conceptuales",
128
+ 'en': "Conceptual Relations",
129
+ 'fr': "Relations Conceptuelles"
130
+ }
131
+ ax.set_title(title[lang], fontsize=16)
132
+ ax.axis('off')
133
+
134
+ return fig
135
+
136
+ def perform_semantic_analysis(text, nlp, lang):
137
+ doc = nlp(text)
138
+
139
+ # Identificar entidades y conceptos clave
140
+ entities, key_concepts = identify_and_contextualize_entities(doc, lang)
141
+
142
+ # Crear y visualizar grafo de conceptos
143
+ concepts = [concept for concept, _ in key_concepts]
144
+ concept_graph = create_concept_graph(text, concepts)
145
+ relations_graph = visualize_concept_graph(concept_graph, lang)
146
+
147
+ return {
148
+ 'entities': entities,
149
+ 'key_concepts': key_concepts,
150
+ 'relations_graph': relations_graph
151
+ }
152
+
153
+ __all__ = ['perform_semantic_analysis', 'ENTITY_LABELS', 'POS_TRANSLATIONS']
modules/ui/ui.py CHANGED
@@ -58,14 +58,16 @@ from ..text_analysis.morpho_analysis import (
58
 
59
  ######################################################
60
  from ..text_analysis.semantic_analysis import (
61
- visualize_semantic_relations,
62
- perform_semantic_analysis
 
 
63
  )
64
 
65
  ######################################################
66
  from ..text_analysis.discourse_analysis import (
67
- compare_semantic_analysis,
68
- perform_discourse_analysis
69
  )
70
 
71
  ######################################################
@@ -763,7 +765,7 @@ def display_semantic_analysis_interface(nlp_models, lang_code):
763
  'text_input_placeholder': "El objetivo de esta aplicación es que mejore sus habilidades de redacción...",
764
  'file_uploader': "O cargue un archivo de texto",
765
  'analyze_button': "Analizar texto",
766
- 'semantic_relations': "Relaciones Semánticas Relevantes",
767
  'identified_entities': "Entidades Identificadas",
768
  'key_concepts': "Conceptos Clave",
769
  'success_message': "Análisis semántico guardado correctamente.",
@@ -776,7 +778,7 @@ def display_semantic_analysis_interface(nlp_models, lang_code):
776
  'text_input_placeholder': "The goal of this application is to improve your writing skills...",
777
  'file_uploader': "Or upload a text file",
778
  'analyze_button': "Analyze text",
779
- 'semantic_relations': "Relevant Semantic Relations",
780
  'identified_entities': "Identified Entities",
781
  'key_concepts': "Key Concepts",
782
  'success_message': "Semantic analysis saved successfully.",
@@ -789,7 +791,7 @@ def display_semantic_analysis_interface(nlp_models, lang_code):
789
  'text_input_placeholder': "L'objectif de cette application est d'améliorer vos compétences en rédaction...",
790
  'file_uploader': "Ou téléchargez un fichier texte",
791
  'analyze_button': "Analyser le texte",
792
- 'semantic_relations': "Relations Sémantiques Pertinentes",
793
  'identified_entities': "Entités Identifiées",
794
  'key_concepts': "Concepts Clés",
795
  'success_message': "Analyse sémantique enregistrée avec succès.",
@@ -824,18 +826,11 @@ def display_semantic_analysis_interface(nlp_models, lang_code):
824
 
825
  # Mostrar conceptos clave
826
  with st.expander(t['key_concepts'], expanded=True):
827
- key_concepts_text = " ".join([f"[[{concept}]]" for concept, _ in analysis_result['key_concepts']])
828
- st.markdown(key_concepts_text)
829
-
830
- # Mostrar entidades identificadas
831
- with st.expander(t['identified_entities'], expanded=True):
832
- entities_text = ""
833
- for entity in analysis_result['entities']:
834
- entities_text += f"[[{entity['text']} ({entity['label']}) - Contexto: {entity['context']}]] "
835
- st.markdown(entities_text)
836
-
837
- # Mostrar el gráfico de relaciones semánticas
838
- with st.expander(t['semantic_relations'], expanded=True):
839
  st.pyplot(analysis_result['relations_graph'])
840
 
841
  # Guardar el resultado del análisis
@@ -845,7 +840,6 @@ def display_semantic_analysis_interface(nlp_models, lang_code):
845
  st.error(t['error_message'])
846
  else:
847
  st.warning(t['warning_message'])
848
-
849
  ##################################################################################################
850
  def display_discourse_analysis_interface(nlp_models, lang_code):
851
  translations = {
@@ -898,19 +892,13 @@ def display_discourse_analysis_interface(nlp_models, lang_code):
898
  text_content2 = uploaded_file2.getvalue().decode('utf-8')
899
 
900
  # Realizar el análisis
901
- graph1, graph2 = perform_discourse_analysis(text_content1, text_content2, nlp_models[lang_code], lang_code)
902
 
903
- # Mostrar los gráficos de comparación
904
- st.subheader(t['comparison'])
905
- col1, col2 = st.columns(2)
906
- with col1:
907
- st.pyplot(graph1)
908
- with col2:
909
- st.pyplot(graph2)
910
 
911
  # Guardar el resultado del análisis
912
- #if store_discourse_analysis_result(st.session_state.username, text_content1 + "\n\n" + text_content2, graph1, graph2):
913
- if store_discourse_analysis_result(st.session_state.username, text_content1, text_content2, graph1, graph2):
914
  st.success(t['success_message'])
915
  else:
916
  st.error(t['error_message'])
 
58
 
59
  ######################################################
60
  from ..text_analysis.semantic_analysis import (
61
+ #visualize_semantic_relations,
62
+ perform_semantic_analysis,
63
+ create_concept_graph,
64
+ visualize_concept_graph
65
  )
66
 
67
  ######################################################
68
  from ..text_analysis.discourse_analysis import (
69
+ perform_discourse_analysis,
70
+ display_discourse_analysis_results
71
  )
72
 
73
  ######################################################
 
765
  'text_input_placeholder': "El objetivo de esta aplicación es que mejore sus habilidades de redacción...",
766
  'file_uploader': "O cargue un archivo de texto",
767
  'analyze_button': "Analizar texto",
768
+ 'conceptual_relations': "Relaciones Conceptuales",
769
  'identified_entities': "Entidades Identificadas",
770
  'key_concepts': "Conceptos Clave",
771
  'success_message': "Análisis semántico guardado correctamente.",
 
778
  'text_input_placeholder': "The goal of this application is to improve your writing skills...",
779
  'file_uploader': "Or upload a text file",
780
  'analyze_button': "Analyze text",
781
+ 'conceptual_relations': "Conceptual Relations",
782
  'identified_entities': "Identified Entities",
783
  'key_concepts': "Key Concepts",
784
  'success_message': "Semantic analysis saved successfully.",
 
791
  'text_input_placeholder': "L'objectif de cette application est d'améliorer vos compétences en rédaction...",
792
  'file_uploader': "Ou téléchargez un fichier texte",
793
  'analyze_button': "Analyser le texte",
794
+ 'conceptual_relations': "Relations Conceptuelles",
795
  'identified_entities': "Entités Identifiées",
796
  'key_concepts': "Concepts Clés",
797
  'success_message': "Analyse sémantique enregistrée avec succès.",
 
826
 
827
  # Mostrar conceptos clave
828
  with st.expander(t['key_concepts'], expanded=True):
829
+ concept_text = " | ".join([f"{concept} ({frequency:.2f})" for concept, frequency in analysis_result['key_concepts']])
830
+ st.write(concept_text)
831
+
832
+ # Mostrar el gráfico de relaciones conceptuales
833
+ with st.expander(t['conceptual_relations'], expanded=True):
 
 
 
 
 
 
 
834
  st.pyplot(analysis_result['relations_graph'])
835
 
836
  # Guardar el resultado del análisis
 
840
  st.error(t['error_message'])
841
  else:
842
  st.warning(t['warning_message'])
 
843
  ##################################################################################################
844
  def display_discourse_analysis_interface(nlp_models, lang_code):
845
  translations = {
 
892
  text_content2 = uploaded_file2.getvalue().decode('utf-8')
893
 
894
  # Realizar el análisis
895
+ analysis_result = perform_discourse_analysis(text_content1, text_content2, nlp_models[lang_code], lang_code)
896
 
897
+ # Mostrar los resultados del análisis
898
+ display_discourse_analysis_results(analysis_result, lang_code)
 
 
 
 
 
899
 
900
  # Guardar el resultado del análisis
901
+ if store_discourse_analysis_result(st.session_state.username, text_content1, text_content2, analysis_result):
 
902
  st.success(t['success_message'])
903
  else:
904
  st.error(t['error_message'])