AIdeaText commited on
Commit
33b0153
verified
1 Parent(s): 802c793

Update modules/studentact/current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py CHANGED
@@ -1,1008 +1,1000 @@
1
- #v3/modules/studentact/current_situation_analysis.py
2
-
3
- import streamlit as st
4
- import matplotlib.pyplot as plt
5
- import networkx as nx
6
- import seaborn as sns
7
- from collections import Counter
8
- from itertools import combinations
9
- import numpy as np
10
- import matplotlib.patches as patches
11
- import logging
12
-
13
- from translations.recommendations import RECOMMENDATIONS
14
-
15
- # 2. Configuraci贸n b谩sica del logging
16
- logging.basicConfig(
17
- level=logging.INFO,
18
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
- handlers=[
20
- logging.StreamHandler(),
21
- logging.FileHandler('app.log')
22
- ]
23
- )
24
-
25
- # 3. Obtener el logger espec铆fico para este m贸dulo
26
- logger = logging.getLogger(__name__)
27
-
28
- #########################################################################
29
-
30
- def correlate_metrics(scores):
31
- """
32
- Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas.
33
-
34
- Args:
35
- scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad
36
-
37
- Returns:
38
- dict con scores ajustados
39
- """
40
- try:
41
- # 1. Correlaci贸n estructura-cohesi贸n
42
- # La cohesi贸n no puede ser menor que estructura * 0.7
43
- min_cohesion = scores['structure']['normalized_score'] * 0.7
44
- if scores['cohesion']['normalized_score'] < min_cohesion:
45
- scores['cohesion']['normalized_score'] = min_cohesion
46
-
47
- # 2. Correlaci贸n vocabulario-cohesi贸n
48
- # La cohesi贸n l茅xica depende del vocabulario
49
- vocab_influence = scores['vocabulary']['normalized_score'] * 0.6
50
- scores['cohesion']['normalized_score'] = max(
51
- scores['cohesion']['normalized_score'],
52
- vocab_influence
53
- )
54
-
55
- # 3. Correlaci贸n cohesi贸n-claridad
56
- # La claridad no puede superar cohesi贸n * 1.2
57
- max_clarity = scores['cohesion']['normalized_score'] * 1.2
58
- if scores['clarity']['normalized_score'] > max_clarity:
59
- scores['clarity']['normalized_score'] = max_clarity
60
-
61
- # 4. Correlaci贸n estructura-claridad
62
- # La claridad no puede superar estructura * 1.1
63
- struct_max_clarity = scores['structure']['normalized_score'] * 1.1
64
- scores['clarity']['normalized_score'] = min(
65
- scores['clarity']['normalized_score'],
66
- struct_max_clarity
67
- )
68
-
69
- # Normalizar todos los scores entre 0 y 1
70
- for metric in scores:
71
- scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score']))
72
-
73
- return scores
74
-
75
- except Exception as e:
76
- logger.error(f"Error en correlate_metrics: {str(e)}")
77
- return scores
78
-
79
- ##########################################################################
80
-
81
- def analyze_text_dimensions(doc):
82
- """
83
- Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas.
84
- """
85
- try:
86
- # Obtener scores iniciales
87
- vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
88
- struct_score = analyze_structure(doc)
89
- cohesion_score = analyze_cohesion(doc)
90
- clarity_score, clarity_details = analyze_clarity(doc)
91
-
92
- # Crear diccionario de scores inicial
93
- scores = {
94
- 'vocabulary': {
95
- 'normalized_score': vocab_score,
96
- 'details': vocab_details
97
- },
98
- 'structure': {
99
- 'normalized_score': struct_score,
100
- 'details': None
101
- },
102
- 'cohesion': {
103
- 'normalized_score': cohesion_score,
104
- 'details': None
105
- },
106
- 'clarity': {
107
- 'normalized_score': clarity_score,
108
- 'details': clarity_details
109
- }
110
- }
111
-
112
- # Ajustar correlaciones entre m茅tricas
113
- adjusted_scores = correlate_metrics(scores)
114
-
115
- # Logging para diagn贸stico
116
- logger.info(f"""
117
- Scores originales vs ajustados:
118
- Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f}
119
- Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f}
120
- Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f}
121
- Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f}
122
- """)
123
-
124
- return adjusted_scores
125
-
126
- except Exception as e:
127
- logger.error(f"Error en analyze_text_dimensions: {str(e)}")
128
- return {
129
- 'vocabulary': {'normalized_score': 0.0, 'details': {}},
130
- 'structure': {'normalized_score': 0.0, 'details': {}},
131
- 'cohesion': {'normalized_score': 0.0, 'details': {}},
132
- 'clarity': {'normalized_score': 0.0, 'details': {}}
133
- }
134
-
135
-
136
-
137
- #############################################################################################
138
-
139
- def analyze_clarity(doc):
140
- """
141
- Analiza la claridad del texto considerando m煤ltiples factores.
142
- """
143
- try:
144
- sentences = list(doc.sents)
145
- if not sentences:
146
- return 0.0, {}
147
-
148
- # 1. Longitud de oraciones
149
- sentence_lengths = [len(sent) for sent in sentences]
150
- avg_length = sum(sentence_lengths) / len(sentences)
151
-
152
- # Normalizar usando los umbrales definidos para clarity
153
- length_score = normalize_score(
154
- value=avg_length,
155
- metric_type='clarity',
156
- optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
157
- min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
158
- target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
159
- )
160
-
161
- # 2. An谩lisis de conectores
162
- connector_count = 0
163
- connector_weights = {
164
- 'CCONJ': 1.0, # Coordinantes
165
- 'SCONJ': 1.2, # Subordinantes
166
- 'ADV': 0.8 # Adverbios conectivos
167
- }
168
-
169
- for token in doc:
170
- if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
171
- connector_count += connector_weights[token.pos_]
172
-
173
- # Normalizar conectores por oraci贸n
174
- connectors_per_sentence = connector_count / len(sentences) if sentences else 0
175
- connector_score = normalize_score(
176
- value=connectors_per_sentence,
177
- metric_type='clarity',
178
- optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
179
- min_threshold=0.60,
180
- target_threshold=0.75
181
- )
182
-
183
- # 3. Complejidad estructural
184
- clause_count = 0
185
- for sent in sentences:
186
- verbs = [token for token in sent if token.pos_ == 'VERB']
187
- clause_count += len(verbs)
188
-
189
- complexity_raw = clause_count / len(sentences) if sentences else 0
190
- complexity_score = normalize_score(
191
- value=complexity_raw,
192
- metric_type='clarity',
193
- optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
194
- min_threshold=0.60,
195
- target_threshold=0.75
196
- )
197
-
198
- # 4. Densidad l茅xica
199
- content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
200
- total_words = len([token for token in doc if token.is_alpha])
201
- density = content_words / total_words if total_words > 0 else 0
202
-
203
- density_score = normalize_score(
204
- value=density,
205
- metric_type='clarity',
206
- optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
207
- min_threshold=0.60,
208
- target_threshold=0.75
209
- )
210
-
211
- # Score final ponderado
212
- weights = {
213
- 'length': 0.3,
214
- 'connectors': 0.3,
215
- 'complexity': 0.2,
216
- 'density': 0.2
217
- }
218
-
219
- clarity_score = (
220
- weights['length'] * length_score +
221
- weights['connectors'] * connector_score +
222
- weights['complexity'] * complexity_score +
223
- weights['density'] * density_score
224
- )
225
-
226
- details = {
227
- 'length_score': length_score,
228
- 'connector_score': connector_score,
229
- 'complexity_score': complexity_score,
230
- 'density_score': density_score,
231
- 'avg_sentence_length': avg_length,
232
- 'connectors_per_sentence': connectors_per_sentence,
233
- 'density': density
234
- }
235
-
236
- # Agregar logging para diagn贸stico
237
- logger.info(f"""
238
- Scores de Claridad:
239
- - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
240
- - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
241
- - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
242
- - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
243
- - Score Final: {clarity_score:.2f}
244
- """)
245
-
246
- return clarity_score, details
247
-
248
- except Exception as e:
249
- logger.error(f"Error en analyze_clarity: {str(e)}")
250
- return 0.0, {}
251
-
252
- #########################################################################
253
- def analyze_vocabulary_diversity(doc):
254
- """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
255
- try:
256
- # 1. An谩lisis b谩sico de diversidad
257
- unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
258
- total_words = len([token for token in doc if token.is_alpha])
259
- basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
260
-
261
- # 2. An谩lisis de registro
262
- academic_words = 0
263
- narrative_words = 0
264
- technical_terms = 0
265
-
266
- # Clasificar palabras por registro
267
- for token in doc:
268
- if token.is_alpha:
269
- # Detectar t茅rminos acad茅micos/t茅cnicos
270
- if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
271
- if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
272
- technical_terms += 1
273
- # Detectar palabras narrativas
274
- if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
275
- narrative_words += 1
276
-
277
- # 3. An谩lisis de complejidad sint谩ctica
278
- avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
279
-
280
- # 4. Calcular score ponderado
281
- weights = {
282
- 'diversity': 0.3,
283
- 'technical': 0.3,
284
- 'narrative': 0.2,
285
- 'complexity': 0.2
286
- }
287
-
288
- scores = {
289
- 'diversity': basic_diversity,
290
- 'technical': technical_terms / total_words if total_words > 0 else 0,
291
- 'narrative': narrative_words / total_words if total_words > 0 else 0,
292
- 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
293
- }
294
-
295
- # Score final ponderado
296
- final_score = sum(weights[key] * scores[key] for key in weights)
297
-
298
- # Informaci贸n adicional para diagn贸stico
299
- details = {
300
- 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
301
- 'scores': scores
302
- }
303
-
304
- return final_score, details
305
-
306
- except Exception as e:
307
- logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
308
- return 0.0, {}
309
-
310
- #########################################################################
311
- def analyze_cohesion(doc):
312
- """Analiza la cohesi贸n textual"""
313
- try:
314
- sentences = list(doc.sents)
315
- if len(sentences) < 2:
316
- logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
317
- return 0.0
318
-
319
- # 1. An谩lisis de conexiones l茅xicas
320
- lexical_connections = 0
321
- total_possible_connections = 0
322
-
323
- for i in range(len(sentences)-1):
324
- # Obtener lemmas significativos (no stopwords)
325
- sent1_words = {token.lemma_ for token in sentences[i]
326
- if token.is_alpha and not token.is_stop}
327
- sent2_words = {token.lemma_ for token in sentences[i+1]
328
- if token.is_alpha and not token.is_stop}
329
-
330
- if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
331
- intersection = len(sent1_words.intersection(sent2_words))
332
- total_possible = min(len(sent1_words), len(sent2_words))
333
-
334
- if total_possible > 0:
335
- lexical_score = intersection / total_possible
336
- lexical_connections += lexical_score
337
- total_possible_connections += 1
338
-
339
- # 2. An谩lisis de conectores
340
- connector_count = 0
341
- connector_types = {
342
- 'CCONJ': 1.0, # Coordinantes
343
- 'SCONJ': 1.2, # Subordinantes
344
- 'ADV': 0.8 # Adverbios conectivos
345
- }
346
-
347
- for token in doc:
348
- if (token.pos_ in connector_types and
349
- token.dep_ in ['cc', 'mark', 'advmod'] and
350
- not token.is_stop):
351
- connector_count += connector_types[token.pos_]
352
-
353
- # 3. C谩lculo de scores normalizados
354
- if total_possible_connections > 0:
355
- lexical_cohesion = lexical_connections / total_possible_connections
356
- else:
357
- lexical_cohesion = 0
358
-
359
- if len(sentences) > 1:
360
- connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
361
- else:
362
- connector_cohesion = 0
363
-
364
- # 4. Score final ponderado
365
- weights = {
366
- 'lexical': 0.7,
367
- 'connectors': 0.3
368
- }
369
-
370
- cohesion_score = (
371
- weights['lexical'] * lexical_cohesion +
372
- weights['connectors'] * connector_cohesion
373
- )
374
-
375
- # 5. Logging para diagn贸stico
376
- logger.info(f"""
377
- An谩lisis de Cohesi贸n:
378
- - Conexiones l茅xicas encontradas: {lexical_connections}
379
- - Conexiones posibles: {total_possible_connections}
380
- - Lexical cohesion score: {lexical_cohesion}
381
- - Conectores encontrados: {connector_count}
382
- - Connector cohesion score: {connector_cohesion}
383
- - Score final: {cohesion_score}
384
- """)
385
-
386
- return cohesion_score
387
-
388
- except Exception as e:
389
- logger.error(f"Error en analyze_cohesion: {str(e)}")
390
- return 0.0
391
-
392
- #########################################################################
393
- def analyze_structure(doc):
394
- try:
395
- if len(doc) == 0:
396
- return 0.0
397
-
398
- structure_scores = []
399
- for token in doc:
400
- if token.dep_ == 'ROOT':
401
- result = get_dependency_depths(token)
402
- structure_scores.append(result['final_score'])
403
-
404
- if not structure_scores:
405
- return 0.0
406
-
407
- return min(1.0, sum(structure_scores) / len(structure_scores))
408
-
409
- except Exception as e:
410
- logger.error(f"Error en analyze_structure: {str(e)}")
411
- return 0.0
412
-
413
- #########################################################################
414
- # Funciones auxiliares de an谩lisis
415
- def get_dependency_depths(token, depth=0, analyzed_tokens=None):
416
- """
417
- Analiza la profundidad y calidad de las relaciones de dependencia.
418
-
419
- Args:
420
- token: Token a analizar
421
- depth: Profundidad actual en el 谩rbol
422
- analyzed_tokens: Set para evitar ciclos en el an谩lisis
423
-
424
- Returns:
425
- dict: Informaci贸n detallada sobre las dependencias
426
- - depths: Lista de profundidades
427
- - relations: Diccionario con tipos de relaciones encontradas
428
- - complexity_score: Puntuaci贸n de complejidad
429
- """
430
- if analyzed_tokens is None:
431
- analyzed_tokens = set()
432
-
433
- # Evitar ciclos
434
- if token.i in analyzed_tokens:
435
- return {
436
- 'depths': [],
437
- 'relations': {},
438
- 'complexity_score': 0
439
- }
440
-
441
- analyzed_tokens.add(token.i)
442
-
443
- # Pesos para diferentes tipos de dependencias
444
- dependency_weights = {
445
- # Dependencias principales
446
- 'nsubj': 1.2, # Sujeto nominal
447
- 'obj': 1.1, # Objeto directo
448
- 'iobj': 1.1, # Objeto indirecto
449
- 'ROOT': 1.3, # Ra铆z
450
-
451
- # Modificadores
452
- 'amod': 0.8, # Modificador adjetival
453
- 'advmod': 0.8, # Modificador adverbial
454
- 'nmod': 0.9, # Modificador nominal
455
-
456
- # Estructuras complejas
457
- 'csubj': 1.4, # Cl谩usula como sujeto
458
- 'ccomp': 1.3, # Complemento clausal
459
- 'xcomp': 1.2, # Complemento clausal abierto
460
- 'advcl': 1.2, # Cl谩usula adverbial
461
-
462
- # Coordinaci贸n y subordinaci贸n
463
- 'conj': 1.1, # Conjunci贸n
464
- 'cc': 0.7, # Coordinaci贸n
465
- 'mark': 0.8, # Marcador
466
-
467
- # Otros
468
- 'det': 0.5, # Determinante
469
- 'case': 0.5, # Caso
470
- 'punct': 0.1 # Puntuaci贸n
471
- }
472
-
473
- # Inicializar resultados
474
- current_result = {
475
- 'depths': [depth],
476
- 'relations': {token.dep_: 1},
477
- 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
478
- }
479
-
480
- # Analizar hijos recursivamente
481
- for child in token.children:
482
- child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
483
-
484
- # Combinar profundidades
485
- current_result['depths'].extend(child_result['depths'])
486
-
487
- # Combinar relaciones
488
- for rel, count in child_result['relations'].items():
489
- current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
490
-
491
- # Acumular score de complejidad
492
- current_result['complexity_score'] += child_result['complexity_score']
493
-
494
- # Calcular m茅tricas adicionales
495
- current_result['max_depth'] = max(current_result['depths'])
496
- current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
497
- current_result['relation_diversity'] = len(current_result['relations'])
498
-
499
- # Calcular score ponderado por tipo de estructura
500
- structure_bonus = 0
501
-
502
- # Bonus por estructuras complejas
503
- if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
504
- structure_bonus += 0.3
505
-
506
- # Bonus por coordinaci贸n balanceada
507
- if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
508
- structure_bonus += 0.2
509
-
510
- # Bonus por modificaci贸n rica
511
- if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
512
- structure_bonus += 0.2
513
-
514
- current_result['final_score'] = (
515
- current_result['complexity_score'] * (1 + structure_bonus)
516
- )
517
-
518
- return current_result
519
-
520
- #########################################################################
521
- def normalize_score(value, metric_type,
522
- min_threshold=0.0, target_threshold=1.0,
523
- range_factor=2.0, optimal_length=None,
524
- optimal_connections=None, optimal_depth=None):
525
- """
526
- Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica.
527
-
528
- Args:
529
- value: Valor a normalizar
530
- metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity')
531
- min_threshold: Valor m铆nimo aceptable
532
- target_threshold: Valor objetivo
533
- range_factor: Factor para ajustar el rango
534
- optimal_length: Longitud 贸ptima (opcional)
535
- optimal_connections: N煤mero 贸ptimo de conexiones (opcional)
536
- optimal_depth: Profundidad 贸ptima de estructura (opcional)
537
-
538
- Returns:
539
- float: Valor normalizado entre 0 y 1
540
- """
541
- try:
542
- # Definir umbrales por tipo de m茅trica
543
- METRIC_THRESHOLDS = {
544
- 'vocabulary': {
545
- 'min': 0.60,
546
- 'target': 0.75,
547
- 'range_factor': 1.5
548
- },
549
- 'structure': {
550
- 'min': 0.65,
551
- 'target': 0.80,
552
- 'range_factor': 1.8
553
- },
554
- 'cohesion': {
555
- 'min': 0.55,
556
- 'target': 0.70,
557
- 'range_factor': 1.6
558
- },
559
- 'clarity': {
560
- 'min': 0.60,
561
- 'target': 0.75,
562
- 'range_factor': 1.7
563
- }
564
- }
565
-
566
- # Validar valores negativos o cero
567
- if value < 0:
568
- logger.warning(f"Valor negativo recibido: {value}")
569
- return 0.0
570
-
571
- # Manejar caso donde el valor es cero
572
- if value == 0:
573
- logger.warning("Valor cero recibido")
574
- return 0.0
575
-
576
- # Obtener umbrales espec铆ficos para el tipo de m茅trica
577
- thresholds = METRIC_THRESHOLDS.get(metric_type, {
578
- 'min': min_threshold,
579
- 'target': target_threshold,
580
- 'range_factor': range_factor
581
- })
582
-
583
- # Identificar el valor de referencia a usar
584
- if optimal_depth is not None:
585
- reference = optimal_depth
586
- elif optimal_connections is not None:
587
- reference = optimal_connections
588
- elif optimal_length is not None:
589
- reference = optimal_length
590
- else:
591
- reference = thresholds['target']
592
-
593
- # Validar valor de referencia
594
- if reference <= 0:
595
- logger.warning(f"Valor de referencia inv谩lido: {reference}")
596
- return 0.0
597
-
598
- # Calcular score basado en umbrales
599
- if value < thresholds['min']:
600
- # Valor por debajo del m铆nimo
601
- score = (value / thresholds['min']) * 0.5 # M谩ximo 0.5 para valores bajo el m铆nimo
602
- elif value < thresholds['target']:
603
- # Valor entre m铆nimo y objetivo
604
- range_size = thresholds['target'] - thresholds['min']
605
- progress = (value - thresholds['min']) / range_size
606
- score = 0.5 + (progress * 0.5) # Escala entre 0.5 y 1.0
607
- else:
608
- # Valor alcanza o supera el objetivo
609
- score = 1.0
610
-
611
- # Penalizar valores muy por encima del objetivo
612
- if value > (thresholds['target'] * thresholds['range_factor']):
613
- excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor'])
614
- score = max(0.7, 1.0 - excess) # No bajar de 0.7 para valores altos
615
-
616
- # Asegurar que el resultado est茅 entre 0 y 1
617
- return max(0.0, min(1.0, score))
618
-
619
- except Exception as e:
620
- logger.error(f"Error en normalize_score: {str(e)}")
621
- return 0.0
622
-
623
- #########################################################################
624
- #########################################################################
625
- def generate_recommendations(metrics, text_type, lang_code='es'):
626
- """
627
- Genera recomendaciones personalizadas basadas en las m茅tricas del texto y el tipo de texto.
628
-
629
- Args:
630
- metrics: Diccionario con las m茅tricas analizadas
631
- text_type: Tipo de texto ('academic_article', 'student_essay', 'general_communication')
632
- lang_code: C贸digo del idioma para las recomendaciones (es, en, fr, pt)
633
-
634
- Returns:
635
- dict: Recomendaciones organizadas por categor铆a en el idioma correspondiente
636
- """
637
- try:
638
- # Obtener umbrales seg煤n el tipo de texto
639
- thresholds = TEXT_TYPES[text_type]['thresholds']
640
-
641
- # Verificar que el idioma est茅 soportado, usar espa帽ol como respaldo
642
- if lang_code not in RECOMMENDATIONS:
643
- logger.warning(f"Idioma {lang_code} no soportado para recomendaciones, usando espa帽ol")
644
- lang_code = 'es'
645
-
646
- # Obtener traducciones para el idioma seleccionado
647
- translations = RECOMMENDATIONS[lang_code]
648
-
649
- # Inicializar diccionario de recomendaciones
650
- recommendations = {
651
- 'vocabulary': [],
652
- 'structure': [],
653
- 'cohesion': [],
654
- 'clarity': [],
655
- 'specific': [],
656
- 'priority': {
657
- 'area': 'general',
658
- 'tips': []
659
- },
660
- 'text_type_name': translations['text_types'][text_type],
661
- 'dimension_names': translations['dimension_names'],
662
- 'ui_text': {
663
- 'priority_intro': translations['priority_intro'],
664
- 'detailed_recommendations': translations['detailed_recommendations'],
665
- 'save_button': translations['save_button'],
666
- 'save_success': translations['save_success'],
667
- 'save_error': translations['save_error'],
668
- 'area_priority': translations['area_priority']
669
- }
670
- }
671
-
672
- # Determinar nivel para cada dimensi贸n y asignar recomendaciones
673
- dimensions = ['vocabulary', 'structure', 'cohesion', 'clarity']
674
- scores = {}
675
-
676
- for dim in dimensions:
677
- score = metrics[dim]['normalized_score']
678
- scores[dim] = score
679
-
680
- # Determinar nivel (bajo, medio, alto)
681
- if score < thresholds[dim]['min']:
682
- level = 'low'
683
- elif score < thresholds[dim]['target']:
684
- level = 'medium'
685
- else:
686
- level = 'high'
687
-
688
- # Asignar recomendaciones para ese nivel
689
- recommendations[dim] = translations[dim][level]
690
-
691
- # Asignar recomendaciones espec铆ficas por tipo de texto
692
- recommendations['specific'] = translations[text_type]
693
-
694
- # Determinar 谩rea prioritaria (la que tiene menor puntuaci贸n)
695
- priority_dimension = min(scores, key=scores.get)
696
- recommendations['priority']['area'] = priority_dimension
697
- recommendations['priority']['tips'] = recommendations[priority_dimension]
698
-
699
- logger.info(f"Generadas recomendaciones en {lang_code} para texto tipo {text_type}")
700
- return recommendations
701
-
702
- except Exception as e:
703
- logger.error(f"Error en generate_recommendations: {str(e)}")
704
- # Retornar mensajes gen茅ricos en caso de error
705
- if lang_code == 'en':
706
- return {
707
- 'vocabulary': ["Try enriching your vocabulary"],
708
- 'structure': ["Work on the structure of your sentences"],
709
- 'cohesion': ["Improve the connection between your ideas"],
710
- 'clarity': ["Try to express your ideas more clearly"],
711
- 'specific': ["Adapt your text according to its purpose"],
712
- 'priority': {
713
- 'area': 'general',
714
- 'tips': ["Seek specific feedback from a tutor or teacher"]
715
- },
716
- 'dimension_names': {
717
- 'vocabulary': 'Vocabulary',
718
- 'structure': 'Structure',
719
- 'cohesion': 'Cohesion',
720
- 'clarity': 'Clarity',
721
- 'general': 'General'
722
- },
723
- 'ui_text': {
724
- 'priority_intro': "This is where you should focus your efforts.",
725
- 'detailed_recommendations': "Detailed recommendations",
726
- 'save_button': "Save analysis",
727
- 'save_success': "Analysis saved successfully",
728
- 'save_error': "Error saving analysis",
729
- 'area_priority': "Priority area"
730
- }
731
- }
732
- elif lang_code == 'fr':
733
- return {
734
- 'vocabulary': ["Essayez d'enrichir votre vocabulaire"],
735
- 'structure': ["Travaillez sur la structure de vos phrases"],
736
- 'cohesion': ["Am茅liorez la connexion entre vos id茅es"],
737
- 'clarity': ["Essayez d'exprimer vos id茅es plus clairement"],
738
- 'specific': ["Adaptez votre texte en fonction de son objectif"],
739
- 'priority': {
740
- 'area': 'general',
741
- 'tips': ["Demandez des commentaires sp茅cifiques 脿 un tuteur ou un professeur"]
742
- },
743
- 'dimension_names': {
744
- 'vocabulary': 'Vocabulaire',
745
- 'structure': 'Structure',
746
- 'cohesion': 'Coh茅sion',
747
- 'clarity': 'Clart茅',
748
- 'general': 'G茅n茅ral'
749
- },
750
- 'ui_text': {
751
- 'priority_intro': "C'est l脿 que vous devriez concentrer vos efforts.",
752
- 'detailed_recommendations': "Recommandations d茅taill茅es",
753
- 'save_button': "Enregistrer l'analyse",
754
- 'save_success': "Analyse enregistr茅e avec succ猫s",
755
- 'save_error': "Erreur lors de l'enregistrement de l'analyse",
756
- 'area_priority': "Domaine prioritaire"
757
- }
758
- }
759
- elif lang_code == 'pt':
760
- return {
761
- 'vocabulary': ["Tente enriquecer seu vocabul谩rio"],
762
- 'structure': ["Trabalhe na estrutura de suas frases"],
763
- 'cohesion': ["Melhore a conex茫o entre suas ideias"],
764
- 'clarity': ["Tente expressar suas ideias com mais clareza"],
765
- 'specific': ["Adapte seu texto de acordo com seu prop贸sito"],
766
- 'priority': {
767
- 'area': 'general',
768
- 'tips': ["Busque feedback espec铆fico de um tutor ou professor"]
769
- },
770
- 'dimension_names': {
771
- 'vocabulary': 'Vocabul谩rio',
772
- 'structure': 'Estrutura',
773
- 'cohesion': 'Coes茫o',
774
- 'clarity': 'Clareza',
775
- 'general': 'Geral'
776
- },
777
- 'ui_text': {
778
- 'priority_intro': " aqui que voc锚 deve concentrar seus esfor莽os.",
779
- 'detailed_recommendations': "Recomenda莽玫es detalhadas",
780
- 'save_button': "Salvar an谩lise",
781
- 'save_success': "An谩lise salva com sucesso",
782
- 'save_error': "Erro ao salvar an谩lise",
783
- 'area_priority': "脕rea priorit谩ria"
784
- }
785
- }
786
- else: # Espa帽ol por defecto
787
- return {
788
- 'vocabulary': ["Intenta enriquecer tu vocabulario"],
789
- 'structure': ["Trabaja en la estructura de tus oraciones"],
790
- 'cohesion': ["Mejora la conexi贸n entre tus ideas"],
791
- 'clarity': ["Busca expresar tus ideas con mayor claridad"],
792
- 'specific': ["Adapta tu texto seg煤n su prop贸sito"],
793
- 'priority': {
794
- 'area': 'general',
795
- 'tips': ["Busca retroalimentaci贸n espec铆fica de un tutor o profesor"]
796
- },
797
- 'dimension_names': {
798
- 'vocabulary': 'Vocabulario',
799
- 'structure': 'Estructura',
800
- 'cohesion': 'Cohesi贸n',
801
- 'clarity': 'Claridad',
802
- 'general': 'General'
803
- },
804
- 'ui_text': {
805
- 'priority_intro': "Esta es el 谩rea donde debes concentrar tus esfuerzos.",
806
- 'detailed_recommendations': "Recomendaciones detalladas",
807
- 'save_button': "Guardar an谩lisis",
808
- 'save_success': "An谩lisis guardado con 茅xito",
809
- 'save_error': "Error al guardar el an谩lisis",
810
- 'area_priority': "脕rea prioritaria"
811
- }
812
- }
813
-
814
-
815
- #########################################################################
816
- #########################################################################
817
- # Funciones de generaci贸n de gr谩ficos
818
- def generate_sentence_graphs(doc):
819
- """Genera visualizaciones de estructura de oraciones"""
820
- fig, ax = plt.subplots(figsize=(10, 6))
821
- # Implementar visualizaci贸n
822
- plt.close()
823
- return fig
824
-
825
- def generate_word_connections(doc):
826
- """Genera red de conexiones de palabras"""
827
- fig, ax = plt.subplots(figsize=(10, 6))
828
- # Implementar visualizaci贸n
829
- plt.close()
830
- return fig
831
-
832
- def generate_connection_paths(doc):
833
- """Genera patrones de conexi贸n"""
834
- fig, ax = plt.subplots(figsize=(10, 6))
835
- # Implementar visualizaci贸n
836
- plt.close()
837
- return fig
838
-
839
- def create_vocabulary_network(doc):
840
- """
841
- Genera el grafo de red de vocabulario.
842
- """
843
- G = nx.Graph()
844
-
845
- # Crear nodos para palabras significativas
846
- words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
847
- word_freq = Counter(words)
848
-
849
- # A帽adir nodos con tama帽o basado en frecuencia
850
- for word, freq in word_freq.items():
851
- G.add_node(word, size=freq)
852
-
853
- # Crear conexiones basadas en co-ocurrencia
854
- window_size = 5
855
- for i in range(len(words) - window_size):
856
- window = words[i:i+window_size]
857
- for w1, w2 in combinations(set(window), 2):
858
- if G.has_edge(w1, w2):
859
- G[w1][w2]['weight'] += 1
860
- else:
861
- G.add_edge(w1, w2, weight=1)
862
-
863
- # Crear visualizaci贸n
864
- fig, ax = plt.subplots(figsize=(12, 8))
865
- pos = nx.spring_layout(G)
866
-
867
- # Dibujar nodos
868
- nx.draw_networkx_nodes(G, pos,
869
- node_size=[G.nodes[node]['size']*100 for node in G.nodes],
870
- node_color='lightblue',
871
- alpha=0.7)
872
-
873
- # Dibujar conexiones
874
- nx.draw_networkx_edges(G, pos,
875
- width=[G[u][v]['weight']*0.5 for u,v in G.edges],
876
- alpha=0.5)
877
-
878
- # A帽adir etiquetas
879
- nx.draw_networkx_labels(G, pos)
880
-
881
- plt.title("Red de Vocabulario")
882
- plt.axis('off')
883
- return fig
884
-
885
- def create_syntax_complexity_graph(doc):
886
- """
887
- Genera el diagrama de arco de complejidad sint谩ctica.
888
- Muestra la estructura de dependencias con colores basados en la complejidad.
889
- """
890
- try:
891
- # Preparar datos para la visualizaci贸n
892
- sentences = list(doc.sents)
893
- if not sentences:
894
- return None
895
-
896
- # Crear figura para el gr谩fico
897
- fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
898
-
899
- # Colores para diferentes niveles de profundidad
900
- depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
901
-
902
- y_offset = 0
903
- max_x = 0
904
-
905
- for sent in sentences:
906
- words = [token.text for token in sent]
907
- x_positions = range(len(words))
908
- max_x = max(max_x, len(words))
909
-
910
- # Dibujar palabras
911
- plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
912
- plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
913
-
914
- # A帽adir texto
915
- for i, word in enumerate(words):
916
- plt.annotate(word, (i, y_offset), xytext=(0, -10),
917
- textcoords='offset points', ha='center')
918
-
919
- # Dibujar arcos de dependencia
920
- for token in sent:
921
- if token.dep_ != "ROOT":
922
- # Calcular profundidad de dependencia
923
- depth = 0
924
- current = token
925
- while current.head != current:
926
- depth += 1
927
- current = current.head
928
-
929
- # Determinar posiciones para el arco
930
- start = token.i - sent[0].i
931
- end = token.head.i - sent[0].i
932
-
933
- # Altura del arco basada en la distancia entre palabras
934
- height = 0.5 * abs(end - start)
935
-
936
- # Color basado en la profundidad
937
- color = depth_colors[min(depth, len(depth_colors)-1)]
938
-
939
- # Crear arco
940
- arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
941
- width=abs(end - start),
942
- height=height,
943
- angle=0,
944
- theta1=0,
945
- theta2=180,
946
- color=color,
947
- alpha=0.6)
948
- ax.add_patch(arc)
949
-
950
- y_offset -= 2
951
-
952
- # Configurar el gr谩fico
953
- plt.xlim(-1, max_x)
954
- plt.ylim(y_offset - 1, 1)
955
- plt.axis('off')
956
- plt.title("Complejidad Sint谩ctica")
957
-
958
- return fig
959
-
960
- except Exception as e:
961
- logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
962
- return None
963
-
964
-
965
- def create_cohesion_heatmap(doc):
966
- """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones."""
967
- try:
968
- sentences = list(doc.sents)
969
- n_sentences = len(sentences)
970
-
971
- if n_sentences < 2:
972
- return None
973
-
974
- similarity_matrix = np.zeros((n_sentences, n_sentences))
975
-
976
- for i in range(n_sentences):
977
- for j in range(n_sentences):
978
- sent1_lemmas = {token.lemma_ for token in sentences[i]
979
- if token.is_alpha and not token.is_stop}
980
- sent2_lemmas = {token.lemma_ for token in sentences[j]
981
- if token.is_alpha and not token.is_stop}
982
-
983
- if sent1_lemmas and sent2_lemmas:
984
- intersection = len(sent1_lemmas & sent2_lemmas) # Corregido aqu铆
985
- union = len(sent1_lemmas | sent2_lemmas) # Y aqu铆
986
- similarity_matrix[i, j] = intersection / union if union > 0 else 0
987
-
988
- # Crear visualizaci贸n
989
- fig, ax = plt.subplots(figsize=(10, 8))
990
-
991
- sns.heatmap(similarity_matrix,
992
- cmap='YlOrRd',
993
- square=True,
994
- xticklabels=False,
995
- yticklabels=False,
996
- cbar_kws={'label': 'Cohesi贸n'},
997
- ax=ax)
998
-
999
- plt.title("Mapa de Cohesi贸n Textual")
1000
- plt.xlabel("Oraciones")
1001
- plt.ylabel("Oraciones")
1002
-
1003
- plt.tight_layout()
1004
- return fig
1005
-
1006
- except Exception as e:
1007
- logger.error(f"Error en create_cohesion_heatmap: {str(e)}")
1008
  return None
 
1
+ #v3/modules/studentact/current_situation_analysis.py
2
+
3
+ import streamlit as st
4
+ import matplotlib.pyplot as plt
5
+ import networkx as nx
6
+ import seaborn as sns
7
+ from collections import Counter
8
+ from itertools import combinations
9
+ import numpy as np
10
+ import matplotlib.patches as patches
11
+ import logging
12
+
13
+ from translations.recommendations import RECOMMENDATIONS
14
+
15
+ # 2. Configuraci贸n b谩sica del logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
+ handlers=[
20
+ logging.StreamHandler(),
21
+ logging.FileHandler('app.log')
22
+ ]
23
+ )
24
+
25
+ # 3. Obtener el logger espec铆fico para este m贸dulo
26
+ logger = logging.getLogger(__name__)
27
+
28
+ #########################################################################
29
+
30
+ def correlate_metrics(scores):
31
+ """
32
+ Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas.
33
+
34
+ Args:
35
+ scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad
36
+
37
+ Returns:
38
+ dict con scores ajustados
39
+ """
40
+ try:
41
+ # 1. Correlaci贸n estructura-cohesi贸n
42
+ # La cohesi贸n no puede ser menor que estructura * 0.7
43
+ min_cohesion = scores['structure']['normalized_score'] * 0.7
44
+ if scores['cohesion']['normalized_score'] < min_cohesion:
45
+ scores['cohesion']['normalized_score'] = min_cohesion
46
+
47
+ # 2. Correlaci贸n vocabulario-cohesi贸n
48
+ # La cohesi贸n l茅xica depende del vocabulario
49
+ vocab_influence = scores['vocabulary']['normalized_score'] * 0.6
50
+ scores['cohesion']['normalized_score'] = max(
51
+ scores['cohesion']['normalized_score'],
52
+ vocab_influence
53
+ )
54
+
55
+ # 3. Correlaci贸n cohesi贸n-claridad
56
+ # La claridad no puede superar cohesi贸n * 1.2
57
+ max_clarity = scores['cohesion']['normalized_score'] * 1.2
58
+ if scores['clarity']['normalized_score'] > max_clarity:
59
+ scores['clarity']['normalized_score'] = max_clarity
60
+
61
+ # 4. Correlaci贸n estructura-claridad
62
+ # La claridad no puede superar estructura * 1.1
63
+ struct_max_clarity = scores['structure']['normalized_score'] * 1.1
64
+ scores['clarity']['normalized_score'] = min(
65
+ scores['clarity']['normalized_score'],
66
+ struct_max_clarity
67
+ )
68
+
69
+ # Normalizar todos los scores entre 0 y 1
70
+ for metric in scores:
71
+ scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score']))
72
+
73
+ return scores
74
+
75
+ except Exception as e:
76
+ logger.error(f"Error en correlate_metrics: {str(e)}")
77
+ return scores
78
+
79
+ ##########################################################################
80
+
81
+ def analyze_text_dimensions(doc):
82
+ """
83
+ Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas.
84
+ """
85
+ try:
86
+ # Obtener scores iniciales
87
+ vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
88
+ struct_score = analyze_structure(doc)
89
+ cohesion_score = analyze_cohesion(doc)
90
+ clarity_score, clarity_details = analyze_clarity(doc)
91
+
92
+ # Crear diccionario de scores inicial
93
+ scores = {
94
+ 'vocabulary': {
95
+ 'normalized_score': vocab_score,
96
+ 'details': vocab_details
97
+ },
98
+ 'structure': {
99
+ 'normalized_score': struct_score,
100
+ 'details': None
101
+ },
102
+ 'cohesion': {
103
+ 'normalized_score': cohesion_score,
104
+ 'details': None
105
+ },
106
+ 'clarity': {
107
+ 'normalized_score': clarity_score,
108
+ 'details': clarity_details
109
+ }
110
+ }
111
+
112
+ # Ajustar correlaciones entre m茅tricas
113
+ adjusted_scores = correlate_metrics(scores)
114
+
115
+ # Logging para diagn贸stico
116
+ logger.info(f"""
117
+ Scores originales vs ajustados:
118
+ Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f}
119
+ Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f}
120
+ Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f}
121
+ Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f}
122
+ """)
123
+
124
+ return adjusted_scores
125
+
126
+ except Exception as e:
127
+ logger.error(f"Error en analyze_text_dimensions: {str(e)}")
128
+ return {
129
+ 'vocabulary': {'normalized_score': 0.0, 'details': {}},
130
+ 'structure': {'normalized_score': 0.0, 'details': {}},
131
+ 'cohesion': {'normalized_score': 0.0, 'details': {}},
132
+ 'clarity': {'normalized_score': 0.0, 'details': {}}
133
+ }
134
+
135
+
136
+
137
+ #############################################################################################
138
+
139
+ def analyze_clarity(doc):
140
+ """
141
+ Analiza la claridad del texto considerando m煤ltiples factores.
142
+ """
143
+ try:
144
+ sentences = list(doc.sents)
145
+ if not sentences:
146
+ return 0.0, {}
147
+
148
+ # 1. Longitud de oraciones
149
+ sentence_lengths = [len(sent) for sent in sentences]
150
+ avg_length = sum(sentence_lengths) / len(sentences)
151
+
152
+ # Normalizar usando los umbrales definidos para clarity
153
+ length_score = normalize_score(
154
+ value=avg_length,
155
+ metric_type='clarity',
156
+ optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
157
+ min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
158
+ target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
159
+ )
160
+
161
+ # 2. An谩lisis de conectores
162
+ connector_count = 0
163
+ connector_weights = {
164
+ 'CCONJ': 1.0, # Coordinantes
165
+ 'SCONJ': 1.2, # Subordinantes
166
+ 'ADV': 0.8 # Adverbios conectivos
167
+ }
168
+
169
+ for token in doc:
170
+ if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
171
+ connector_count += connector_weights[token.pos_]
172
+
173
+ # Normalizar conectores por oraci贸n
174
+ connectors_per_sentence = connector_count / len(sentences) if sentences else 0
175
+ connector_score = normalize_score(
176
+ value=connectors_per_sentence,
177
+ metric_type='clarity',
178
+ optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
179
+ min_threshold=0.60,
180
+ target_threshold=0.75
181
+ )
182
+
183
+ # 3. Complejidad estructural
184
+ clause_count = 0
185
+ for sent in sentences:
186
+ verbs = [token for token in sent if token.pos_ == 'VERB']
187
+ clause_count += len(verbs)
188
+
189
+ complexity_raw = clause_count / len(sentences) if sentences else 0
190
+ complexity_score = normalize_score(
191
+ value=complexity_raw,
192
+ metric_type='clarity',
193
+ optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
194
+ min_threshold=0.60,
195
+ target_threshold=0.75
196
+ )
197
+
198
+ # 4. Densidad l茅xica
199
+ content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
200
+ total_words = len([token for token in doc if token.is_alpha])
201
+ density = content_words / total_words if total_words > 0 else 0
202
+
203
+ density_score = normalize_score(
204
+ value=density,
205
+ metric_type='clarity',
206
+ optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
207
+ min_threshold=0.60,
208
+ target_threshold=0.75
209
+ )
210
+
211
+ # Score final ponderado
212
+ weights = {
213
+ 'length': 0.3,
214
+ 'connectors': 0.3,
215
+ 'complexity': 0.2,
216
+ 'density': 0.2
217
+ }
218
+
219
+ clarity_score = (
220
+ weights['length'] * length_score +
221
+ weights['connectors'] * connector_score +
222
+ weights['complexity'] * complexity_score +
223
+ weights['density'] * density_score
224
+ )
225
+
226
+ details = {
227
+ 'length_score': length_score,
228
+ 'connector_score': connector_score,
229
+ 'complexity_score': complexity_score,
230
+ 'density_score': density_score,
231
+ 'avg_sentence_length': avg_length,
232
+ 'connectors_per_sentence': connectors_per_sentence,
233
+ 'density': density
234
+ }
235
+
236
+ # Agregar logging para diagn贸stico
237
+ logger.info(f"""
238
+ Scores de Claridad:
239
+ - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
240
+ - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
241
+ - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
242
+ - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
243
+ - Score Final: {clarity_score:.2f}
244
+ """)
245
+
246
+ return clarity_score, details
247
+
248
+ except Exception as e:
249
+ logger.error(f"Error en analyze_clarity: {str(e)}")
250
+ return 0.0, {}
251
+
252
+ #########################################################################
253
+ def analyze_vocabulary_diversity(doc):
254
+ """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
255
+ try:
256
+ # 1. An谩lisis b谩sico de diversidad
257
+ unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
258
+ total_words = len([token for token in doc if token.is_alpha])
259
+ basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
260
+
261
+ # 2. An谩lisis de registro
262
+ academic_words = 0
263
+ narrative_words = 0
264
+ technical_terms = 0
265
+
266
+ # Clasificar palabras por registro
267
+ for token in doc:
268
+ if token.is_alpha:
269
+ # Detectar t茅rminos acad茅micos/t茅cnicos
270
+ if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
271
+ if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
272
+ technical_terms += 1
273
+ # Detectar palabras narrativas
274
+ if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
275
+ narrative_words += 1
276
+
277
+ # 3. An谩lisis de complejidad sint谩ctica
278
+ avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
279
+
280
+ # 4. Calcular score ponderado
281
+ weights = {
282
+ 'diversity': 0.3,
283
+ 'technical': 0.3,
284
+ 'narrative': 0.2,
285
+ 'complexity': 0.2
286
+ }
287
+
288
+ scores = {
289
+ 'diversity': basic_diversity,
290
+ 'technical': technical_terms / total_words if total_words > 0 else 0,
291
+ 'narrative': narrative_words / total_words if total_words > 0 else 0,
292
+ 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
293
+ }
294
+
295
+ # Score final ponderado
296
+ final_score = sum(weights[key] * scores[key] for key in weights)
297
+
298
+ # Informaci贸n adicional para diagn贸stico
299
+ details = {
300
+ 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
301
+ 'scores': scores
302
+ }
303
+
304
+ return final_score, details
305
+
306
+ except Exception as e:
307
+ logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
308
+ return 0.0, {}
309
+
310
+ #########################################################################
311
+ def analyze_cohesion(doc):
312
+ """Analiza la cohesi贸n textual"""
313
+ try:
314
+ sentences = list(doc.sents)
315
+ if len(sentences) < 2:
316
+ logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
317
+ return 0.0
318
+
319
+ # 1. An谩lisis de conexiones l茅xicas
320
+ lexical_connections = 0
321
+ total_possible_connections = 0
322
+
323
+ for i in range(len(sentences)-1):
324
+ # Obtener lemmas significativos (no stopwords)
325
+ sent1_words = {token.lemma_ for token in sentences[i]
326
+ if token.is_alpha and not token.is_stop}
327
+ sent2_words = {token.lemma_ for token in sentences[i+1]
328
+ if token.is_alpha and not token.is_stop}
329
+
330
+ if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
331
+ intersection = len(sent1_words.intersection(sent2_words))
332
+ total_possible = min(len(sent1_words), len(sent2_words))
333
+
334
+ if total_possible > 0:
335
+ lexical_score = intersection / total_possible
336
+ lexical_connections += lexical_score
337
+ total_possible_connections += 1
338
+
339
+ # 2. An谩lisis de conectores
340
+ connector_count = 0
341
+ connector_types = {
342
+ 'CCONJ': 1.0, # Coordinantes
343
+ 'SCONJ': 1.2, # Subordinantes
344
+ 'ADV': 0.8 # Adverbios conectivos
345
+ }
346
+
347
+ for token in doc:
348
+ if (token.pos_ in connector_types and
349
+ token.dep_ in ['cc', 'mark', 'advmod'] and
350
+ not token.is_stop):
351
+ connector_count += connector_types[token.pos_]
352
+
353
+ # 3. C谩lculo de scores normalizados
354
+ if total_possible_connections > 0:
355
+ lexical_cohesion = lexical_connections / total_possible_connections
356
+ else:
357
+ lexical_cohesion = 0
358
+
359
+ if len(sentences) > 1:
360
+ connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
361
+ else:
362
+ connector_cohesion = 0
363
+
364
+ # 4. Score final ponderado
365
+ weights = {
366
+ 'lexical': 0.7,
367
+ 'connectors': 0.3
368
+ }
369
+
370
+ cohesion_score = (
371
+ weights['lexical'] * lexical_cohesion +
372
+ weights['connectors'] * connector_cohesion
373
+ )
374
+
375
+ # 5. Logging para diagn贸stico
376
+ logger.info(f"""
377
+ An谩lisis de Cohesi贸n:
378
+ - Conexiones l茅xicas encontradas: {lexical_connections}
379
+ - Conexiones posibles: {total_possible_connections}
380
+ - Lexical cohesion score: {lexical_cohesion}
381
+ - Conectores encontrados: {connector_count}
382
+ - Connector cohesion score: {connector_cohesion}
383
+ - Score final: {cohesion_score}
384
+ """)
385
+
386
+ return cohesion_score
387
+
388
+ except Exception as e:
389
+ logger.error(f"Error en analyze_cohesion: {str(e)}")
390
+ return 0.0
391
+
392
+ #########################################################################
393
+ def analyze_structure(doc):
394
+ try:
395
+ if len(doc) == 0:
396
+ return 0.0
397
+
398
+ structure_scores = []
399
+ for token in doc:
400
+ if token.dep_ == 'ROOT':
401
+ result = get_dependency_depths(token)
402
+ structure_scores.append(result['final_score'])
403
+
404
+ if not structure_scores:
405
+ return 0.0
406
+
407
+ return min(1.0, sum(structure_scores) / len(structure_scores))
408
+
409
+ except Exception as e:
410
+ logger.error(f"Error en analyze_structure: {str(e)}")
411
+ return 0.0
412
+
413
+ #########################################################################
414
+ # Funciones auxiliares de an谩lisis
415
+ def get_dependency_depths(token, depth=0, analyzed_tokens=None):
416
+ """
417
+ Analiza la profundidad y calidad de las relaciones de dependencia.
418
+
419
+ Args:
420
+ token: Token a analizar
421
+ depth: Profundidad actual en el 谩rbol
422
+ analyzed_tokens: Set para evitar ciclos en el an谩lisis
423
+
424
+ Returns:
425
+ dict: Informaci贸n detallada sobre las dependencias
426
+ - depths: Lista de profundidades
427
+ - relations: Diccionario con tipos de relaciones encontradas
428
+ - complexity_score: Puntuaci贸n de complejidad
429
+ """
430
+ if analyzed_tokens is None:
431
+ analyzed_tokens = set()
432
+
433
+ # Evitar ciclos
434
+ if token.i in analyzed_tokens:
435
+ return {
436
+ 'depths': [],
437
+ 'relations': {},
438
+ 'complexity_score': 0
439
+ }
440
+
441
+ analyzed_tokens.add(token.i)
442
+
443
+ # Pesos para diferentes tipos de dependencias
444
+ dependency_weights = {
445
+ # Dependencias principales
446
+ 'nsubj': 1.2, # Sujeto nominal
447
+ 'obj': 1.1, # Objeto directo
448
+ 'iobj': 1.1, # Objeto indirecto
449
+ 'ROOT': 1.3, # Ra铆z
450
+
451
+ # Modificadores
452
+ 'amod': 0.8, # Modificador adjetival
453
+ 'advmod': 0.8, # Modificador adverbial
454
+ 'nmod': 0.9, # Modificador nominal
455
+
456
+ # Estructuras complejas
457
+ 'csubj': 1.4, # Cl谩usula como sujeto
458
+ 'ccomp': 1.3, # Complemento clausal
459
+ 'xcomp': 1.2, # Complemento clausal abierto
460
+ 'advcl': 1.2, # Cl谩usula adverbial
461
+
462
+ # Coordinaci贸n y subordinaci贸n
463
+ 'conj': 1.1, # Conjunci贸n
464
+ 'cc': 0.7, # Coordinaci贸n
465
+ 'mark': 0.8, # Marcador
466
+
467
+ # Otros
468
+ 'det': 0.5, # Determinante
469
+ 'case': 0.5, # Caso
470
+ 'punct': 0.1 # Puntuaci贸n
471
+ }
472
+
473
+ # Inicializar resultados
474
+ current_result = {
475
+ 'depths': [depth],
476
+ 'relations': {token.dep_: 1},
477
+ 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
478
+ }
479
+
480
+ # Analizar hijos recursivamente
481
+ for child in token.children:
482
+ child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
483
+
484
+ # Combinar profundidades
485
+ current_result['depths'].extend(child_result['depths'])
486
+
487
+ # Combinar relaciones
488
+ for rel, count in child_result['relations'].items():
489
+ current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
490
+
491
+ # Acumular score de complejidad
492
+ current_result['complexity_score'] += child_result['complexity_score']
493
+
494
+ # Calcular m茅tricas adicionales
495
+ current_result['max_depth'] = max(current_result['depths'])
496
+ current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
497
+ current_result['relation_diversity'] = len(current_result['relations'])
498
+
499
+ # Calcular score ponderado por tipo de estructura
500
+ structure_bonus = 0
501
+
502
+ # Bonus por estructuras complejas
503
+ if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
504
+ structure_bonus += 0.3
505
+
506
+ # Bonus por coordinaci贸n balanceada
507
+ if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
508
+ structure_bonus += 0.2
509
+
510
+ # Bonus por modificaci贸n rica
511
+ if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
512
+ structure_bonus += 0.2
513
+
514
+ current_result['final_score'] = (
515
+ current_result['complexity_score'] * (1 + structure_bonus)
516
+ )
517
+
518
+ return current_result
519
+
520
+ #########################################################################
521
+ def normalize_score(value, metric_type,
522
+ min_threshold=0.0, target_threshold=1.0,
523
+ range_factor=2.0, optimal_length=None,
524
+ optimal_connections=None, optimal_depth=None):
525
+ """
526
+ Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica.
527
+
528
+ Args:
529
+ value: Valor a normalizar
530
+ metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity')
531
+ min_threshold: Valor m铆nimo aceptable
532
+ target_threshold: Valor objetivo
533
+ range_factor: Factor para ajustar el rango
534
+ optimal_length: Longitud 贸ptima (opcional)
535
+ optimal_connections: N煤mero 贸ptimo de conexiones (opcional)
536
+ optimal_depth: Profundidad 贸ptima de estructura (opcional)
537
+
538
+ Returns:
539
+ float: Valor normalizado entre 0 y 1
540
+ """
541
+ try:
542
+ # Definir umbrales por tipo de m茅trica
543
+ METRIC_THRESHOLDS = {
544
+ 'vocabulary': {
545
+ 'min': 0.60,
546
+ 'target': 0.75,
547
+ 'range_factor': 1.5
548
+ },
549
+ 'structure': {
550
+ 'min': 0.65,
551
+ 'target': 0.80,
552
+ 'range_factor': 1.8
553
+ },
554
+ 'cohesion': {
555
+ 'min': 0.55,
556
+ 'target': 0.70,
557
+ 'range_factor': 1.6
558
+ },
559
+ 'clarity': {
560
+ 'min': 0.60,
561
+ 'target': 0.75,
562
+ 'range_factor': 1.7
563
+ }
564
+ }
565
+
566
+ # Validar valores negativos o cero
567
+ if value < 0:
568
+ logger.warning(f"Valor negativo recibido: {value}")
569
+ return 0.0
570
+
571
+ # Manejar caso donde el valor es cero
572
+ if value == 0:
573
+ logger.warning("Valor cero recibido")
574
+ return 0.0
575
+
576
+ # Obtener umbrales espec铆ficos para el tipo de m茅trica
577
+ thresholds = METRIC_THRESHOLDS.get(metric_type, {
578
+ 'min': min_threshold,
579
+ 'target': target_threshold,
580
+ 'range_factor': range_factor
581
+ })
582
+
583
+ # Identificar el valor de referencia a usar
584
+ if optimal_depth is not None:
585
+ reference = optimal_depth
586
+ elif optimal_connections is not None:
587
+ reference = optimal_connections
588
+ elif optimal_length is not None:
589
+ reference = optimal_length
590
+ else:
591
+ reference = thresholds['target']
592
+
593
+ # Validar valor de referencia
594
+ if reference <= 0:
595
+ logger.warning(f"Valor de referencia inv谩lido: {reference}")
596
+ return 0.0
597
+
598
+ # Calcular score basado en umbrales
599
+ if value < thresholds['min']:
600
+ # Valor por debajo del m铆nimo
601
+ score = (value / thresholds['min']) * 0.5 # M谩ximo 0.5 para valores bajo el m铆nimo
602
+ elif value < thresholds['target']:
603
+ # Valor entre m铆nimo y objetivo
604
+ range_size = thresholds['target'] - thresholds['min']
605
+ progress = (value - thresholds['min']) / range_size
606
+ score = 0.5 + (progress * 0.5) # Escala entre 0.5 y 1.0
607
+ else:
608
+ # Valor alcanza o supera el objetivo
609
+ score = 1.0
610
+
611
+ # Penalizar valores muy por encima del objetivo
612
+ if value > (thresholds['target'] * thresholds['range_factor']):
613
+ excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor'])
614
+ score = max(0.7, 1.0 - excess) # No bajar de 0.7 para valores altos
615
+
616
+ # Asegurar que el resultado est茅 entre 0 y 1
617
+ return max(0.0, min(1.0, score))
618
+
619
+ except Exception as e:
620
+ logger.error(f"Error en normalize_score: {str(e)}")
621
+ return 0.0
622
+
623
+ #########################################################################
624
+ #########################################################################
625
+
626
+ def generate_recommendations(metrics, text_type, lang_code='es'):
627
+ """
628
+ Genera recomendaciones personalizadas basadas en las m茅tricas del texto y el tipo de texto.
629
+
630
+ Args:
631
+ metrics: Diccionario con las m茅tricas analizadas
632
+ text_type: Tipo de texto ('academic_article', 'student_essay', 'general_communication')
633
+ lang_code: C贸digo del idioma para las recomendaciones (es, en, uk)
634
+
635
+ Returns:
636
+ dict: Recomendaciones organizadas por categor铆a en el idioma correspondiente
637
+ """
638
+ try:
639
+ # Obtener umbrales seg煤n el tipo de texto
640
+ thresholds = TEXT_TYPES[text_type]['thresholds']
641
+
642
+ # Verificar que el idioma est茅 soportado, usar espa帽ol como respaldo
643
+ if lang_code not in RECOMMENDATIONS:
644
+ logger.warning(f"Idioma {lang_code} no soportado para recomendaciones, usando espa帽ol")
645
+ lang_code = 'es'
646
+
647
+ # Obtener traducciones para el idioma seleccionado
648
+ translations = RECOMMENDATIONS[lang_code]
649
+
650
+ # Inicializar diccionario de recomendaciones
651
+ recommendations = {
652
+ 'vocabulary': [],
653
+ 'structure': [],
654
+ 'cohesion': [],
655
+ 'clarity': [],
656
+ 'specific': [],
657
+ 'priority': {
658
+ 'area': 'general',
659
+ 'tips': []
660
+ },
661
+ 'text_type_name': translations['text_types'][text_type],
662
+ 'dimension_names': translations['dimension_names'],
663
+ 'ui_text': {
664
+ 'priority_intro': translations['priority_intro'],
665
+ 'detailed_recommendations': translations['detailed_recommendations'],
666
+ 'save_button': translations['save_button'],
667
+ 'save_success': translations['save_success'],
668
+ 'save_error': translations['save_error'],
669
+ 'area_priority': translations['area_priority']
670
+ }
671
+ }
672
+
673
+ # Determinar nivel para cada dimensi贸n y asignar recomendaciones
674
+ dimensions = ['vocabulary', 'structure', 'cohesion', 'clarity']
675
+ scores = {}
676
+
677
+ for dim in dimensions:
678
+ score = metrics[dim]['normalized_score']
679
+ scores[dim] = score
680
+
681
+ # Determinar nivel (bajo, medio, alto)
682
+ if score < thresholds[dim]['min']:
683
+ level = 'low'
684
+ elif score < thresholds[dim]['target']:
685
+ level = 'medium'
686
+ else:
687
+ level = 'high'
688
+
689
+ # Asignar recomendaciones para ese nivel
690
+ recommendations[dim] = translations[dim][level]
691
+
692
+ # Asignar recomendaciones espec铆ficas por tipo de texto
693
+ recommendations['specific'] = translations[text_type]
694
+
695
+ # Determinar 谩rea prioritaria (la que tiene menor puntuaci贸n)
696
+ priority_dimension = min(scores, key=scores.get)
697
+ recommendations['priority']['area'] = priority_dimension
698
+ recommendations['priority']['tips'] = recommendations[priority_dimension]
699
+
700
+ logger.info(f"Generadas recomendaciones en {lang_code} para texto tipo {text_type}")
701
+ return recommendations
702
+
703
+ except Exception as e:
704
+ logger.error(f"Error en generate_recommendations: {str(e)}")
705
+
706
+ # Utilizar un enfoque basado en el idioma actual en lugar de casos codificados
707
+ # Esto permite manejar ucraniano y cualquier otro idioma futuro
708
+ fallback_translations = {
709
+ 'en': {
710
+ 'basic_recommendations': {
711
+ 'vocabulary': ["Try enriching your vocabulary"],
712
+ 'structure': ["Work on the structure of your sentences"],
713
+ 'cohesion': ["Improve the connection between your ideas"],
714
+ 'clarity': ["Try to express your ideas more clearly"],
715
+ 'specific': ["Adapt your text according to its purpose"],
716
+ },
717
+ 'dimension_names': {
718
+ 'vocabulary': 'Vocabulary',
719
+ 'structure': 'Structure',
720
+ 'cohesion': 'Cohesion',
721
+ 'clarity': 'Clarity',
722
+ 'general': 'General'
723
+ },
724
+ 'ui_text': {
725
+ 'priority_intro': "This is where you should focus your efforts.",
726
+ 'detailed_recommendations': "Detailed recommendations",
727
+ 'save_button': "Save analysis",
728
+ 'save_success': "Analysis saved successfully",
729
+ 'save_error': "Error saving analysis",
730
+ 'area_priority': "Priority area"
731
+ }
732
+ },
733
+ 'uk': {
734
+ 'basic_recommendations': {
735
+ 'vocabulary': ["袪芯蟹褕懈褉褌械 褋胁褨泄 褋谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋"],
736
+ 'structure': ["袩芯泻褉邪褖褨褌褜 褋褌褉褍泻褌褍褉褍 胁邪褕懈褏 褉械褔械薪褜"],
737
+ 'cohesion': ["袩芯泻褉邪褖褨褌褜 蟹胁'褟蟹芯泻 屑褨卸 胁邪褕懈屑懈 褨写械褟屑懈"],
738
+ 'clarity': ["袙懈褋谢芯胁谢褞泄褌械 褋胁芯褩 褨写械褩 褟褋薪褨褕械"],
739
+ 'specific': ["袗写邪锌褌褍泄褌械 褋胁褨泄 褌械泻褋褌 胁褨写锌芯胁褨写薪芯 写芯 泄芯谐芯 屑械褌懈"],
740
+ },
741
+ 'dimension_names': {
742
+ 'vocabulary': '小谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋',
743
+ 'structure': '小褌褉褍泻褌褍褉邪',
744
+ 'cohesion': '袟胁\'褟蟹薪褨褋褌褜',
745
+ 'clarity': '携褋薪褨褋褌褜',
746
+ 'general': '袟邪谐邪谢褜薪械'
747
+ },
748
+ 'ui_text': {
749
+ 'priority_intro': "笑械 芯斜谢邪褋褌褜, 写械 胁懈 锌芯胁懈薪薪褨 蟹芯褋械褉械写懈褌懈 褋胁芯褩 蟹褍褋懈谢谢褟.",
750
+ 'detailed_recommendations': "袛械褌邪谢褜薪褨 褉械泻芯屑械薪写邪褑褨褩",
751
+ 'save_button': "袟斜械褉械谐褌懈 邪薪邪谢褨蟹",
752
+ 'save_success': "袗薪邪谢褨蟹 褍褋锌褨褕薪芯 蟹斜械褉械卸械薪芯",
753
+ 'save_error': "袩芯屑懈谢泻邪 锌褉懈 蟹斜械褉械卸械薪薪褨 邪薪邪谢褨蟹褍",
754
+ 'area_priority': "袩褉褨芯褉懈褌械褌薪邪 芯斜谢邪褋褌褜"
755
+ }
756
+ },
757
+ 'es': {
758
+ 'basic_recommendations': {
759
+ 'vocabulary': ["Intenta enriquecer tu vocabulario"],
760
+ 'structure': ["Trabaja en la estructura de tus oraciones"],
761
+ 'cohesion': ["Mejora la conexi贸n entre tus ideas"],
762
+ 'clarity': ["Busca expresar tus ideas con mayor claridad"],
763
+ 'specific': ["Adapta tu texto seg煤n su prop贸sito"],
764
+ },
765
+ 'dimension_names': {
766
+ 'vocabulary': 'Vocabulario',
767
+ 'structure': 'Estructura',
768
+ 'cohesion': 'Cohesi贸n',
769
+ 'clarity': 'Claridad',
770
+ 'general': 'General'
771
+ },
772
+ 'ui_text': {
773
+ 'priority_intro': "Esta es el 谩rea donde debes concentrar tus esfuerzos.",
774
+ 'detailed_recommendations': "Recomendaciones detalladas",
775
+ 'save_button': "Guardar an谩lisis",
776
+ 'save_success': "An谩lisis guardado con 茅xito",
777
+ 'save_error': "Error al guardar el an谩lisis",
778
+ 'area_priority': "脕rea prioritaria"
779
+ }
780
+ }
781
+ }
782
+
783
+ # Usar el idioma actual si est谩 disponible, o ingl茅s, o espa帽ol como 煤ltima opci贸n
784
+ current_lang = fallback_translations.get(lang_code,
785
+ fallback_translations.get('en',
786
+ fallback_translations['es']))
787
+
788
+ basic_recommendations = current_lang['basic_recommendations']
789
+
790
+ return {
791
+ 'vocabulary': basic_recommendations['vocabulary'],
792
+ 'structure': basic_recommendations['structure'],
793
+ 'cohesion': basic_recommendations['cohesion'],
794
+ 'clarity': basic_recommendations['clarity'],
795
+ 'specific': basic_recommendations['specific'],
796
+ 'priority': {
797
+ 'area': 'general',
798
+ 'tips': ["Busca retroalimentaci贸n espec铆fica de un tutor o profesor"]
799
+ },
800
+ 'dimension_names': current_lang['dimension_names'],
801
+ 'ui_text': current_lang['ui_text']
802
+ }
803
+
804
+
805
+
806
+
807
+ #########################################################################
808
+ #########################################################################
809
+ # Funciones de generaci贸n de gr谩ficos
810
+ def generate_sentence_graphs(doc):
811
+ """Genera visualizaciones de estructura de oraciones"""
812
+ fig, ax = plt.subplots(figsize=(10, 6))
813
+ # Implementar visualizaci贸n
814
+ plt.close()
815
+ return fig
816
+
817
+ def generate_word_connections(doc):
818
+ """Genera red de conexiones de palabras"""
819
+ fig, ax = plt.subplots(figsize=(10, 6))
820
+ # Implementar visualizaci贸n
821
+ plt.close()
822
+ return fig
823
+
824
+ def generate_connection_paths(doc):
825
+ """Genera patrones de conexi贸n"""
826
+ fig, ax = plt.subplots(figsize=(10, 6))
827
+ # Implementar visualizaci贸n
828
+ plt.close()
829
+ return fig
830
+
831
+ def create_vocabulary_network(doc):
832
+ """
833
+ Genera el grafo de red de vocabulario.
834
+ """
835
+ G = nx.Graph()
836
+
837
+ # Crear nodos para palabras significativas
838
+ words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
839
+ word_freq = Counter(words)
840
+
841
+ # A帽adir nodos con tama帽o basado en frecuencia
842
+ for word, freq in word_freq.items():
843
+ G.add_node(word, size=freq)
844
+
845
+ # Crear conexiones basadas en co-ocurrencia
846
+ window_size = 5
847
+ for i in range(len(words) - window_size):
848
+ window = words[i:i+window_size]
849
+ for w1, w2 in combinations(set(window), 2):
850
+ if G.has_edge(w1, w2):
851
+ G[w1][w2]['weight'] += 1
852
+ else:
853
+ G.add_edge(w1, w2, weight=1)
854
+
855
+ # Crear visualizaci贸n
856
+ fig, ax = plt.subplots(figsize=(12, 8))
857
+ pos = nx.spring_layout(G)
858
+
859
+ # Dibujar nodos
860
+ nx.draw_networkx_nodes(G, pos,
861
+ node_size=[G.nodes[node]['size']*100 for node in G.nodes],
862
+ node_color='lightblue',
863
+ alpha=0.7)
864
+
865
+ # Dibujar conexiones
866
+ nx.draw_networkx_edges(G, pos,
867
+ width=[G[u][v]['weight']*0.5 for u,v in G.edges],
868
+ alpha=0.5)
869
+
870
+ # A帽adir etiquetas
871
+ nx.draw_networkx_labels(G, pos)
872
+
873
+ plt.title("Red de Vocabulario")
874
+ plt.axis('off')
875
+ return fig
876
+
877
+ def create_syntax_complexity_graph(doc):
878
+ """
879
+ Genera el diagrama de arco de complejidad sint谩ctica.
880
+ Muestra la estructura de dependencias con colores basados en la complejidad.
881
+ """
882
+ try:
883
+ # Preparar datos para la visualizaci贸n
884
+ sentences = list(doc.sents)
885
+ if not sentences:
886
+ return None
887
+
888
+ # Crear figura para el gr谩fico
889
+ fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
890
+
891
+ # Colores para diferentes niveles de profundidad
892
+ depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
893
+
894
+ y_offset = 0
895
+ max_x = 0
896
+
897
+ for sent in sentences:
898
+ words = [token.text for token in sent]
899
+ x_positions = range(len(words))
900
+ max_x = max(max_x, len(words))
901
+
902
+ # Dibujar palabras
903
+ plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
904
+ plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
905
+
906
+ # A帽adir texto
907
+ for i, word in enumerate(words):
908
+ plt.annotate(word, (i, y_offset), xytext=(0, -10),
909
+ textcoords='offset points', ha='center')
910
+
911
+ # Dibujar arcos de dependencia
912
+ for token in sent:
913
+ if token.dep_ != "ROOT":
914
+ # Calcular profundidad de dependencia
915
+ depth = 0
916
+ current = token
917
+ while current.head != current:
918
+ depth += 1
919
+ current = current.head
920
+
921
+ # Determinar posiciones para el arco
922
+ start = token.i - sent[0].i
923
+ end = token.head.i - sent[0].i
924
+
925
+ # Altura del arco basada en la distancia entre palabras
926
+ height = 0.5 * abs(end - start)
927
+
928
+ # Color basado en la profundidad
929
+ color = depth_colors[min(depth, len(depth_colors)-1)]
930
+
931
+ # Crear arco
932
+ arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
933
+ width=abs(end - start),
934
+ height=height,
935
+ angle=0,
936
+ theta1=0,
937
+ theta2=180,
938
+ color=color,
939
+ alpha=0.6)
940
+ ax.add_patch(arc)
941
+
942
+ y_offset -= 2
943
+
944
+ # Configurar el gr谩fico
945
+ plt.xlim(-1, max_x)
946
+ plt.ylim(y_offset - 1, 1)
947
+ plt.axis('off')
948
+ plt.title("Complejidad Sint谩ctica")
949
+
950
+ return fig
951
+
952
+ except Exception as e:
953
+ logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
954
+ return None
955
+
956
+
957
+ def create_cohesion_heatmap(doc):
958
+ """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones."""
959
+ try:
960
+ sentences = list(doc.sents)
961
+ n_sentences = len(sentences)
962
+
963
+ if n_sentences < 2:
964
+ return None
965
+
966
+ similarity_matrix = np.zeros((n_sentences, n_sentences))
967
+
968
+ for i in range(n_sentences):
969
+ for j in range(n_sentences):
970
+ sent1_lemmas = {token.lemma_ for token in sentences[i]
971
+ if token.is_alpha and not token.is_stop}
972
+ sent2_lemmas = {token.lemma_ for token in sentences[j]
973
+ if token.is_alpha and not token.is_stop}
974
+
975
+ if sent1_lemmas and sent2_lemmas:
976
+ intersection = len(sent1_lemmas & sent2_lemmas) # Corregido aqu铆
977
+ union = len(sent1_lemmas | sent2_lemmas) # Y aqu铆
978
+ similarity_matrix[i, j] = intersection / union if union > 0 else 0
979
+
980
+ # Crear visualizaci贸n
981
+ fig, ax = plt.subplots(figsize=(10, 8))
982
+
983
+ sns.heatmap(similarity_matrix,
984
+ cmap='YlOrRd',
985
+ square=True,
986
+ xticklabels=False,
987
+ yticklabels=False,
988
+ cbar_kws={'label': 'Cohesi贸n'},
989
+ ax=ax)
990
+
991
+ plt.title("Mapa de Cohesi贸n Textual")
992
+ plt.xlabel("Oraciones")
993
+ plt.ylabel("Oraciones")
994
+
995
+ plt.tight_layout()
996
+ return fig
997
+
998
+ except Exception as e:
999
+ logger.error(f"Error en create_cohesion_heatmap: {str(e)}")
 
 
 
 
 
 
 
 
1000
  return None