AIdeaText commited on
Commit
2987273
verified
1 Parent(s): babae9f

Update src/modules/studentact/current_situation_analysis.py

Browse files
src/modules/studentact/current_situation_analysis.py CHANGED
@@ -1,1009 +1,1009 @@
1
- #v3/modules/studentact/current_situation_analysis.py
2
-
3
- import streamlit as st
4
- import matplotlib.pyplot as plt
5
- import networkx as nx
6
- import seaborn as sns
7
- from collections import Counter
8
- from itertools import combinations
9
- import numpy as np
10
- import matplotlib.patches as patches
11
- import logging
12
-
13
-
14
- # 2. Configuraci贸n b谩sica del logging
15
- logging.basicConfig(
16
- level=logging.INFO,
17
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
18
- handlers=[
19
- logging.StreamHandler(),
20
- logging.FileHandler('app.log')
21
- ]
22
- )
23
-
24
- # 3. Obtener el logger espec铆fico para este m贸dulo
25
- logger = logging.getLogger(__name__)
26
-
27
- #########################################################################
28
-
29
- def correlate_metrics(scores):
30
- """
31
- Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas.
32
-
33
- Args:
34
- scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad
35
-
36
- Returns:
37
- dict con scores ajustados
38
- """
39
- try:
40
- # 1. Correlaci贸n estructura-cohesi贸n
41
- # La cohesi贸n no puede ser menor que estructura * 0.7
42
- min_cohesion = scores['structure']['normalized_score'] * 0.7
43
- if scores['cohesion']['normalized_score'] < min_cohesion:
44
- scores['cohesion']['normalized_score'] = min_cohesion
45
-
46
- # 2. Correlaci贸n vocabulario-cohesi贸n
47
- # La cohesi贸n l茅xica depende del vocabulario
48
- vocab_influence = scores['vocabulary']['normalized_score'] * 0.6
49
- scores['cohesion']['normalized_score'] = max(
50
- scores['cohesion']['normalized_score'],
51
- vocab_influence
52
- )
53
-
54
- # 3. Correlaci贸n cohesi贸n-claridad
55
- # La claridad no puede superar cohesi贸n * 1.2
56
- max_clarity = scores['cohesion']['normalized_score'] * 1.2
57
- if scores['clarity']['normalized_score'] > max_clarity:
58
- scores['clarity']['normalized_score'] = max_clarity
59
-
60
- # 4. Correlaci贸n estructura-claridad
61
- # La claridad no puede superar estructura * 1.1
62
- struct_max_clarity = scores['structure']['normalized_score'] * 1.1
63
- scores['clarity']['normalized_score'] = min(
64
- scores['clarity']['normalized_score'],
65
- struct_max_clarity
66
- )
67
-
68
- # Normalizar todos los scores entre 0 y 1
69
- for metric in scores:
70
- scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score']))
71
-
72
- return scores
73
-
74
- except Exception as e:
75
- logger.error(f"Error en correlate_metrics: {str(e)}")
76
- return scores
77
-
78
- ##########################################################################
79
-
80
- def analyze_text_dimensions(doc):
81
- """
82
- Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas.
83
- """
84
- try:
85
- # Obtener scores iniciales
86
- vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
87
- struct_score = analyze_structure(doc)
88
- cohesion_score = analyze_cohesion(doc)
89
- clarity_score, clarity_details = analyze_clarity(doc)
90
-
91
- # Crear diccionario de scores inicial
92
- scores = {
93
- 'vocabulary': {
94
- 'normalized_score': vocab_score,
95
- 'details': vocab_details
96
- },
97
- 'structure': {
98
- 'normalized_score': struct_score,
99
- 'details': None
100
- },
101
- 'cohesion': {
102
- 'normalized_score': cohesion_score,
103
- 'details': None
104
- },
105
- 'clarity': {
106
- 'normalized_score': clarity_score,
107
- 'details': clarity_details
108
- }
109
- }
110
-
111
- # Ajustar correlaciones entre m茅tricas
112
- adjusted_scores = correlate_metrics(scores)
113
-
114
- # Logging para diagn贸stico
115
- logger.info(f"""
116
- Scores originales vs ajustados:
117
- Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f}
118
- Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f}
119
- Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f}
120
- Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f}
121
- """)
122
-
123
- return adjusted_scores
124
-
125
- except Exception as e:
126
- logger.error(f"Error en analyze_text_dimensions: {str(e)}")
127
- return {
128
- 'vocabulary': {'normalized_score': 0.0, 'details': {}},
129
- 'structure': {'normalized_score': 0.0, 'details': {}},
130
- 'cohesion': {'normalized_score': 0.0, 'details': {}},
131
- 'clarity': {'normalized_score': 0.0, 'details': {}}
132
- }
133
-
134
-
135
-
136
- #############################################################################################
137
-
138
- def analyze_clarity(doc):
139
- """
140
- Analiza la claridad del texto considerando m煤ltiples factores.
141
- """
142
- try:
143
- sentences = list(doc.sents)
144
- if not sentences:
145
- return 0.0, {}
146
-
147
- # 1. Longitud de oraciones
148
- sentence_lengths = [len(sent) for sent in sentences]
149
- avg_length = sum(sentence_lengths) / len(sentences)
150
-
151
- # Normalizar usando los umbrales definidos para clarity
152
- length_score = normalize_score(
153
- value=avg_length,
154
- metric_type='clarity',
155
- optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
156
- min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
157
- target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
158
- )
159
-
160
- # 2. An谩lisis de conectores
161
- connector_count = 0
162
- connector_weights = {
163
- 'CCONJ': 1.0, # Coordinantes
164
- 'SCONJ': 1.2, # Subordinantes
165
- 'ADV': 0.8 # Adverbios conectivos
166
- }
167
-
168
- for token in doc:
169
- if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
170
- connector_count += connector_weights[token.pos_]
171
-
172
- # Normalizar conectores por oraci贸n
173
- connectors_per_sentence = connector_count / len(sentences) if sentences else 0
174
- connector_score = normalize_score(
175
- value=connectors_per_sentence,
176
- metric_type='clarity',
177
- optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
178
- min_threshold=0.60,
179
- target_threshold=0.75
180
- )
181
-
182
- # 3. Complejidad estructural
183
- clause_count = 0
184
- for sent in sentences:
185
- verbs = [token for token in sent if token.pos_ == 'VERB']
186
- clause_count += len(verbs)
187
-
188
- complexity_raw = clause_count / len(sentences) if sentences else 0
189
- complexity_score = normalize_score(
190
- value=complexity_raw,
191
- metric_type='clarity',
192
- optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
193
- min_threshold=0.60,
194
- target_threshold=0.75
195
- )
196
-
197
- # 4. Densidad l茅xica
198
- content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
199
- total_words = len([token for token in doc if token.is_alpha])
200
- density = content_words / total_words if total_words > 0 else 0
201
-
202
- density_score = normalize_score(
203
- value=density,
204
- metric_type='clarity',
205
- optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
206
- min_threshold=0.60,
207
- target_threshold=0.75
208
- )
209
-
210
- # Score final ponderado
211
- weights = {
212
- 'length': 0.3,
213
- 'connectors': 0.3,
214
- 'complexity': 0.2,
215
- 'density': 0.2
216
- }
217
-
218
- clarity_score = (
219
- weights['length'] * length_score +
220
- weights['connectors'] * connector_score +
221
- weights['complexity'] * complexity_score +
222
- weights['density'] * density_score
223
- )
224
-
225
- details = {
226
- 'length_score': length_score,
227
- 'connector_score': connector_score,
228
- 'complexity_score': complexity_score,
229
- 'density_score': density_score,
230
- 'avg_sentence_length': avg_length,
231
- 'connectors_per_sentence': connectors_per_sentence,
232
- 'density': density
233
- }
234
-
235
- # Agregar logging para diagn贸stico
236
- logger.info(f"""
237
- Scores de Claridad:
238
- - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
239
- - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
240
- - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
241
- - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
242
- - Score Final: {clarity_score:.2f}
243
- """)
244
-
245
- return clarity_score, details
246
-
247
- except Exception as e:
248
- logger.error(f"Error en analyze_clarity: {str(e)}")
249
- return 0.0, {}
250
-
251
- #########################################################################
252
- def analyze_vocabulary_diversity(doc):
253
- """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
254
- try:
255
- # 1. An谩lisis b谩sico de diversidad
256
- unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
257
- total_words = len([token for token in doc if token.is_alpha])
258
- basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
259
-
260
- # 2. An谩lisis de registro
261
- academic_words = 0
262
- narrative_words = 0
263
- technical_terms = 0
264
-
265
- # Clasificar palabras por registro
266
- for token in doc:
267
- if token.is_alpha:
268
- # Detectar t茅rminos acad茅micos/t茅cnicos
269
- if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
270
- if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
271
- technical_terms += 1
272
- # Detectar palabras narrativas
273
- if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
274
- narrative_words += 1
275
-
276
- # 3. An谩lisis de complejidad sint谩ctica
277
- avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
278
-
279
- # 4. Calcular score ponderado
280
- weights = {
281
- 'diversity': 0.3,
282
- 'technical': 0.3,
283
- 'narrative': 0.2,
284
- 'complexity': 0.2
285
- }
286
-
287
- scores = {
288
- 'diversity': basic_diversity,
289
- 'technical': technical_terms / total_words if total_words > 0 else 0,
290
- 'narrative': narrative_words / total_words if total_words > 0 else 0,
291
- 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
292
- }
293
-
294
- # Score final ponderado
295
- final_score = sum(weights[key] * scores[key] for key in weights)
296
-
297
- # Informaci贸n adicional para diagn贸stico
298
- details = {
299
- 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
300
- 'scores': scores
301
- }
302
-
303
- return final_score, details
304
-
305
- except Exception as e:
306
- logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
307
- return 0.0, {}
308
-
309
- #########################################################################
310
- def analyze_cohesion(doc):
311
- """Analiza la cohesi贸n textual"""
312
- try:
313
- sentences = list(doc.sents)
314
- if len(sentences) < 2:
315
- logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
316
- return 0.0
317
-
318
- # 1. An谩lisis de conexiones l茅xicas
319
- lexical_connections = 0
320
- total_possible_connections = 0
321
-
322
- for i in range(len(sentences)-1):
323
- # Obtener lemmas significativos (no stopwords)
324
- sent1_words = {token.lemma_ for token in sentences[i]
325
- if token.is_alpha and not token.is_stop}
326
- sent2_words = {token.lemma_ for token in sentences[i+1]
327
- if token.is_alpha and not token.is_stop}
328
-
329
- if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
330
- intersection = len(sent1_words.intersection(sent2_words))
331
- total_possible = min(len(sent1_words), len(sent2_words))
332
-
333
- if total_possible > 0:
334
- lexical_score = intersection / total_possible
335
- lexical_connections += lexical_score
336
- total_possible_connections += 1
337
-
338
- # 2. An谩lisis de conectores
339
- connector_count = 0
340
- connector_types = {
341
- 'CCONJ': 1.0, # Coordinantes
342
- 'SCONJ': 1.2, # Subordinantes
343
- 'ADV': 0.8 # Adverbios conectivos
344
- }
345
-
346
- for token in doc:
347
- if (token.pos_ in connector_types and
348
- token.dep_ in ['cc', 'mark', 'advmod'] and
349
- not token.is_stop):
350
- connector_count += connector_types[token.pos_]
351
-
352
- # 3. C谩lculo de scores normalizados
353
- if total_possible_connections > 0:
354
- lexical_cohesion = lexical_connections / total_possible_connections
355
- else:
356
- lexical_cohesion = 0
357
-
358
- if len(sentences) > 1:
359
- connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
360
- else:
361
- connector_cohesion = 0
362
-
363
- # 4. Score final ponderado
364
- weights = {
365
- 'lexical': 0.7,
366
- 'connectors': 0.3
367
- }
368
-
369
- cohesion_score = (
370
- weights['lexical'] * lexical_cohesion +
371
- weights['connectors'] * connector_cohesion
372
- )
373
-
374
- # 5. Logging para diagn贸stico
375
- logger.info(f"""
376
- An谩lisis de Cohesi贸n:
377
- - Conexiones l茅xicas encontradas: {lexical_connections}
378
- - Conexiones posibles: {total_possible_connections}
379
- - Lexical cohesion score: {lexical_cohesion}
380
- - Conectores encontrados: {connector_count}
381
- - Connector cohesion score: {connector_cohesion}
382
- - Score final: {cohesion_score}
383
- """)
384
-
385
- return cohesion_score
386
-
387
- except Exception as e:
388
- logger.error(f"Error en analyze_cohesion: {str(e)}")
389
- return 0.0
390
-
391
- #########################################################################
392
- def analyze_structure(doc):
393
- try:
394
- if len(doc) == 0:
395
- return 0.0
396
-
397
- structure_scores = []
398
- for token in doc:
399
- if token.dep_ == 'ROOT':
400
- result = get_dependency_depths(token)
401
- structure_scores.append(result['final_score'])
402
-
403
- if not structure_scores:
404
- return 0.0
405
-
406
- return min(1.0, sum(structure_scores) / len(structure_scores))
407
-
408
- except Exception as e:
409
- logger.error(f"Error en analyze_structure: {str(e)}")
410
- return 0.0
411
-
412
- #########################################################################
413
- # Funciones auxiliares de an谩lisis
414
- def get_dependency_depths(token, depth=0, analyzed_tokens=None):
415
- """
416
- Analiza la profundidad y calidad de las relaciones de dependencia.
417
-
418
- Args:
419
- token: Token a analizar
420
- depth: Profundidad actual en el 谩rbol
421
- analyzed_tokens: Set para evitar ciclos en el an谩lisis
422
-
423
- Returns:
424
- dict: Informaci贸n detallada sobre las dependencias
425
- - depths: Lista de profundidades
426
- - relations: Diccionario con tipos de relaciones encontradas
427
- - complexity_score: Puntuaci贸n de complejidad
428
- """
429
- if analyzed_tokens is None:
430
- analyzed_tokens = set()
431
-
432
- # Evitar ciclos
433
- if token.i in analyzed_tokens:
434
- return {
435
- 'depths': [],
436
- 'relations': {},
437
- 'complexity_score': 0
438
- }
439
-
440
- analyzed_tokens.add(token.i)
441
-
442
- # Pesos para diferentes tipos de dependencias
443
- dependency_weights = {
444
- # Dependencias principales
445
- 'nsubj': 1.2, # Sujeto nominal
446
- 'obj': 1.1, # Objeto directo
447
- 'iobj': 1.1, # Objeto indirecto
448
- 'ROOT': 1.3, # Ra铆z
449
-
450
- # Modificadores
451
- 'amod': 0.8, # Modificador adjetival
452
- 'advmod': 0.8, # Modificador adverbial
453
- 'nmod': 0.9, # Modificador nominal
454
-
455
- # Estructuras complejas
456
- 'csubj': 1.4, # Cl谩usula como sujeto
457
- 'ccomp': 1.3, # Complemento clausal
458
- 'xcomp': 1.2, # Complemento clausal abierto
459
- 'advcl': 1.2, # Cl谩usula adverbial
460
-
461
- # Coordinaci贸n y subordinaci贸n
462
- 'conj': 1.1, # Conjunci贸n
463
- 'cc': 0.7, # Coordinaci贸n
464
- 'mark': 0.8, # Marcador
465
-
466
- # Otros
467
- 'det': 0.5, # Determinante
468
- 'case': 0.5, # Caso
469
- 'punct': 0.1 # Puntuaci贸n
470
- }
471
-
472
- # Inicializar resultados
473
- current_result = {
474
- 'depths': [depth],
475
- 'relations': {token.dep_: 1},
476
- 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
477
- }
478
-
479
- # Analizar hijos recursivamente
480
- for child in token.children:
481
- child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
482
-
483
- # Combinar profundidades
484
- current_result['depths'].extend(child_result['depths'])
485
-
486
- # Combinar relaciones
487
- for rel, count in child_result['relations'].items():
488
- current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
489
-
490
- # Acumular score de complejidad
491
- current_result['complexity_score'] += child_result['complexity_score']
492
-
493
- # Calcular m茅tricas adicionales
494
- current_result['max_depth'] = max(current_result['depths'])
495
- current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
496
- current_result['relation_diversity'] = len(current_result['relations'])
497
-
498
- # Calcular score ponderado por tipo de estructura
499
- structure_bonus = 0
500
-
501
- # Bonus por estructuras complejas
502
- if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
503
- structure_bonus += 0.3
504
-
505
- # Bonus por coordinaci贸n balanceada
506
- if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
507
- structure_bonus += 0.2
508
-
509
- # Bonus por modificaci贸n rica
510
- if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
511
- structure_bonus += 0.2
512
-
513
- current_result['final_score'] = (
514
- current_result['complexity_score'] * (1 + structure_bonus)
515
- )
516
-
517
- return current_result
518
-
519
- #########################################################################
520
- def normalize_score(value, metric_type,
521
- min_threshold=0.0, target_threshold=1.0,
522
- range_factor=2.0, optimal_length=None,
523
- optimal_connections=None, optimal_depth=None):
524
- """
525
- Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica.
526
-
527
- Args:
528
- value: Valor a normalizar
529
- metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity')
530
- min_threshold: Valor m铆nimo aceptable
531
- target_threshold: Valor objetivo
532
- range_factor: Factor para ajustar el rango
533
- optimal_length: Longitud 贸ptima (opcional)
534
- optimal_connections: N煤mero 贸ptimo de conexiones (opcional)
535
- optimal_depth: Profundidad 贸ptima de estructura (opcional)
536
-
537
- Returns:
538
- float: Valor normalizado entre 0 y 1
539
- """
540
- try:
541
- # Definir umbrales por tipo de m茅trica
542
- METRIC_THRESHOLDS = {
543
- 'vocabulary': {
544
- 'min': 0.60,
545
- 'target': 0.75,
546
- 'range_factor': 1.5
547
- },
548
- 'structure': {
549
- 'min': 0.65,
550
- 'target': 0.80,
551
- 'range_factor': 1.8
552
- },
553
- 'cohesion': {
554
- 'min': 0.55,
555
- 'target': 0.70,
556
- 'range_factor': 1.6
557
- },
558
- 'clarity': {
559
- 'min': 0.60,
560
- 'target': 0.75,
561
- 'range_factor': 1.7
562
- }
563
- }
564
-
565
- # Validar valores negativos o cero
566
- if value < 0:
567
- logger.warning(f"Valor negativo recibido: {value}")
568
- return 0.0
569
-
570
- # Manejar caso donde el valor es cero
571
- if value == 0:
572
- logger.warning("Valor cero recibido")
573
- return 0.0
574
-
575
- # Obtener umbrales espec铆ficos para el tipo de m茅trica
576
- thresholds = METRIC_THRESHOLDS.get(metric_type, {
577
- 'min': min_threshold,
578
- 'target': target_threshold,
579
- 'range_factor': range_factor
580
- })
581
-
582
- # Identificar el valor de referencia a usar
583
- if optimal_depth is not None:
584
- reference = optimal_depth
585
- elif optimal_connections is not None:
586
- reference = optimal_connections
587
- elif optimal_length is not None:
588
- reference = optimal_length
589
- else:
590
- reference = thresholds['target']
591
-
592
- # Validar valor de referencia
593
- if reference <= 0:
594
- logger.warning(f"Valor de referencia inv谩lido: {reference}")
595
- return 0.0
596
-
597
- # Calcular score basado en umbrales
598
- if value < thresholds['min']:
599
- # Valor por debajo del m铆nimo
600
- score = (value / thresholds['min']) * 0.5 # M谩ximo 0.5 para valores bajo el m铆nimo
601
- elif value < thresholds['target']:
602
- # Valor entre m铆nimo y objetivo
603
- range_size = thresholds['target'] - thresholds['min']
604
- progress = (value - thresholds['min']) / range_size
605
- score = 0.5 + (progress * 0.5) # Escala entre 0.5 y 1.0
606
- else:
607
- # Valor alcanza o supera el objetivo
608
- score = 1.0
609
-
610
- # Penalizar valores muy por encima del objetivo
611
- if value > (thresholds['target'] * thresholds['range_factor']):
612
- excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor'])
613
- score = max(0.7, 1.0 - excess) # No bajar de 0.7 para valores altos
614
-
615
- # Asegurar que el resultado est茅 entre 0 y 1
616
- return max(0.0, min(1.0, score))
617
-
618
- except Exception as e:
619
- logger.error(f"Error en normalize_score: {str(e)}")
620
- return 0.0
621
-
622
- #########################################################################
623
- #########################################################################
624
-
625
- def generate_recommendations(metrics, text_type, lang_code='es'):
626
- """
627
- Genera recomendaciones personalizadas basadas en las m茅tricas del texto y el tipo de texto.
628
-
629
- Args:
630
- metrics: Diccionario con las m茅tricas analizadas
631
- text_type: Tipo de texto ('academic_article', 'student_essay', 'general_communication')
632
- lang_code: C贸digo del idioma para las recomendaciones (es, en, uk)
633
-
634
- Returns:
635
- dict: Recomendaciones organizadas por categor铆a en el idioma correspondiente
636
- """
637
- try:
638
- # A帽adir debug log para verificar el c贸digo de idioma recibido
639
- logger.info(f"generate_recommendations llamado con idioma: {lang_code}")
640
-
641
- # Comprobar que importamos RECOMMENDATIONS correctamente
642
- logger.info(f"Idiomas disponibles en RECOMMENDATIONS: {list(RECOMMENDATIONS.keys())}")
643
-
644
- # Obtener umbrales seg煤n el tipo de texto
645
- thresholds = TEXT_TYPES[text_type]['thresholds']
646
-
647
- # Verificar que el idioma est茅 soportado, usar espa帽ol como respaldo
648
- if lang_code not in RECOMMENDATIONS:
649
- logger.warning(f"Idioma {lang_code} no soportado para recomendaciones, usando espa帽ol")
650
- lang_code = 'es'
651
-
652
- # Obtener traducciones para el idioma seleccionado
653
- translations = RECOMMENDATIONS[lang_code]
654
-
655
- # Inicializar diccionario de recomendaciones
656
- recommendations = {
657
- 'vocabulary': [],
658
- 'structure': [],
659
- 'cohesion': [],
660
- 'clarity': [],
661
- 'specific': [],
662
- 'priority': {
663
- 'area': 'general',
664
- 'tips': []
665
- },
666
- 'text_type_name': translations['text_types'][text_type],
667
- 'dimension_names': translations['dimension_names'],
668
- 'ui_text': {
669
- 'priority_intro': translations['priority_intro'],
670
- 'detailed_recommendations': translations['detailed_recommendations'],
671
- 'save_button': translations['save_button'],
672
- 'save_success': translations['save_success'],
673
- 'save_error': translations['save_error'],
674
- 'area_priority': translations['area_priority']
675
- }
676
- }
677
-
678
- # Determinar nivel para cada dimensi贸n y asignar recomendaciones
679
- dimensions = ['vocabulary', 'structure', 'cohesion', 'clarity']
680
- scores = {}
681
-
682
- for dim in dimensions:
683
- score = metrics[dim]['normalized_score']
684
- scores[dim] = score
685
-
686
- # Determinar nivel (bajo, medio, alto)
687
- if score < thresholds[dim]['min']:
688
- level = 'low'
689
- elif score < thresholds[dim]['target']:
690
- level = 'medium'
691
- else:
692
- level = 'high'
693
-
694
- # Asignar recomendaciones para ese nivel
695
- recommendations[dim] = translations[dim][level]
696
-
697
- # Asignar recomendaciones espec铆ficas por tipo de texto
698
- recommendations['specific'] = translations[text_type]
699
-
700
- # Determinar 谩rea prioritaria (la que tiene menor puntuaci贸n)
701
- priority_dimension = min(scores, key=scores.get)
702
- recommendations['priority']['area'] = priority_dimension
703
- recommendations['priority']['tips'] = recommendations[priority_dimension]
704
-
705
- logger.info(f"Generadas recomendaciones en {lang_code} para texto tipo {text_type}")
706
- return recommendations
707
-
708
- except Exception as e:
709
- logger.error(f"Error en generate_recommendations: {str(e)}")
710
-
711
- # Utilizar un enfoque basado en el idioma actual en lugar de casos codificados
712
- # Esto permite manejar ucraniano y cualquier otro idioma futuro
713
- fallback_translations = {
714
- 'en': {
715
- 'basic_recommendations': {
716
- 'vocabulary': ["Try enriching your vocabulary"],
717
- 'structure': ["Work on the structure of your sentences"],
718
- 'cohesion': ["Improve the connection between your ideas"],
719
- 'clarity': ["Try to express your ideas more clearly"],
720
- 'specific': ["Adapt your text according to its purpose"],
721
- },
722
- 'dimension_names': {
723
- 'vocabulary': 'Vocabulary',
724
- 'structure': 'Structure',
725
- 'cohesion': 'Cohesion',
726
- 'clarity': 'Clarity',
727
- 'general': 'General'
728
- },
729
- 'ui_text': {
730
- 'priority_intro': "This is where you should focus your efforts.",
731
- 'detailed_recommendations': "Detailed recommendations",
732
- 'save_button': "Save analysis",
733
- 'save_success': "Analysis saved successfully",
734
- 'save_error': "Error saving analysis",
735
- 'area_priority': "Priority area"
736
- }
737
- },
738
- 'uk': {
739
- 'basic_recommendations': {
740
- 'vocabulary': ["袪芯蟹褕懈褉褌械 褋胁褨泄 褋谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋"],
741
- 'structure': ["袩芯泻褉邪褖褨褌褜 褋褌褉褍泻褌褍褉褍 胁邪褕懈褏 褉械褔械薪褜"],
742
- 'cohesion': ["袩芯泻褉邪褖褨褌褜 蟹胁'褟蟹芯泻 屑褨卸 胁邪褕懈屑懈 褨写械褟屑懈"],
743
- 'clarity': ["袙懈褋谢芯胁谢褞泄褌械 褋胁芯褩 褨写械褩 褟褋薪褨褕械"],
744
- 'specific': ["袗写邪锌褌褍泄褌械 褋胁褨泄 褌械泻褋褌 胁褨写锌芯胁褨写薪芯 写芯 泄芯谐芯 屑械褌懈"],
745
- },
746
- 'dimension_names': {
747
- 'vocabulary': '小谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋',
748
- 'structure': '小褌褉褍泻褌褍褉邪',
749
- 'cohesion': '袟胁\'褟蟹薪褨褋褌褜',
750
- 'clarity': '携褋薪褨褋褌褜',
751
- 'general': '袟邪谐邪谢褜薪械'
752
- },
753
- 'ui_text': {
754
- 'priority_intro': "笑械 芯斜谢邪褋褌褜, 写械 胁懈 锌芯胁懈薪薪褨 蟹芯褋械褉械写懈褌懈 褋胁芯褩 蟹褍褋懈谢谢褟.",
755
- 'detailed_recommendations': "袛械褌邪谢褜薪褨 褉械泻芯屑械薪写邪褑褨褩",
756
- 'save_button': "袟斜械褉械谐褌懈 邪薪邪谢褨蟹",
757
- 'save_success': "袗薪邪谢褨蟹 褍褋锌褨褕薪芯 蟹斜械褉械卸械薪芯",
758
- 'save_error': "袩芯屑懈谢泻邪 锌褉懈 蟹斜械褉械卸械薪薪褨 邪薪邪谢褨蟹褍",
759
- 'area_priority': "袩褉褨芯褉懈褌械褌薪邪 芯斜谢邪褋褌褜"
760
- }
761
- },
762
- 'es': {
763
- 'basic_recommendations': {
764
- 'vocabulary': ["Intenta enriquecer tu vocabulario"],
765
- 'structure': ["Trabaja en la estructura de tus oraciones"],
766
- 'cohesion': ["Mejora la conexi贸n entre tus ideas"],
767
- 'clarity': ["Busca expresar tus ideas con mayor claridad"],
768
- 'specific': ["Adapta tu texto seg煤n su prop贸sito"],
769
- },
770
- 'dimension_names': {
771
- 'vocabulary': 'Vocabulario',
772
- 'structure': 'Estructura',
773
- 'cohesion': 'Cohesi贸n',
774
- 'clarity': 'Claridad',
775
- 'general': 'General'
776
- },
777
- 'ui_text': {
778
- 'priority_intro': "Esta es el 谩rea donde debes concentrar tus esfuerzos.",
779
- 'detailed_recommendations': "Recomendaciones detalladas",
780
- 'save_button': "Guardar an谩lisis",
781
- 'save_success': "An谩lisis guardado con 茅xito",
782
- 'save_error': "Error al guardar el an谩lisis",
783
- 'area_priority': "脕rea prioritaria"
784
- }
785
- }
786
- }
787
-
788
- # Usar el idioma actual si est谩 disponible, o ingl茅s, o espa帽ol como 煤ltima opci贸n
789
- current_lang = fallback_translations.get(lang_code,
790
- fallback_translations.get('en',
791
- fallback_translations['es']))
792
-
793
- basic_recommendations = current_lang['basic_recommendations']
794
-
795
- return {
796
- 'vocabulary': basic_recommendations['vocabulary'],
797
- 'structure': basic_recommendations['structure'],
798
- 'cohesion': basic_recommendations['cohesion'],
799
- 'clarity': basic_recommendations['clarity'],
800
- 'specific': basic_recommendations['specific'],
801
- 'priority': {
802
- 'area': 'general',
803
- 'tips': ["Busca retroalimentaci贸n espec铆fica de un tutor o profesor"]
804
- },
805
- 'dimension_names': current_lang['dimension_names'],
806
- 'ui_text': current_lang['ui_text']
807
- }
808
-
809
-
810
-
811
-
812
- #########################################################################
813
- #########################################################################
814
- # Funciones de generaci贸n de gr谩ficos
815
- def generate_sentence_graphs(doc):
816
- """Genera visualizaciones de estructura de oraciones"""
817
- fig, ax = plt.subplots(figsize=(10, 6))
818
- # Implementar visualizaci贸n
819
- plt.close()
820
- return fig
821
-
822
- ############################################################################
823
- def generate_word_connections(doc):
824
- """Genera red de conexiones de palabras"""
825
- fig, ax = plt.subplots(figsize=(10, 6))
826
- # Implementar visualizaci贸n
827
- plt.close()
828
- return fig
829
-
830
- ############################################################################
831
- def generate_connection_paths(doc):
832
- """Genera patrones de conexi贸n"""
833
- fig, ax = plt.subplots(figsize=(10, 6))
834
- # Implementar visualizaci贸n
835
- plt.close()
836
- return fig
837
-
838
- ############################################################################
839
- def create_vocabulary_network(doc):
840
- """
841
- Genera el grafo de red de vocabulario.
842
- """
843
- G = nx.Graph()
844
-
845
- # Crear nodos para palabras significativas
846
- words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
847
- word_freq = Counter(words)
848
-
849
- # A帽adir nodos con tama帽o basado en frecuencia
850
- for word, freq in word_freq.items():
851
- G.add_node(word, size=freq)
852
-
853
- # Crear conexiones basadas en co-ocurrencia
854
- window_size = 5
855
- for i in range(len(words) - window_size):
856
- window = words[i:i+window_size]
857
- for w1, w2 in combinations(set(window), 2):
858
- if G.has_edge(w1, w2):
859
- G[w1][w2]['weight'] += 1
860
- else:
861
- G.add_edge(w1, w2, weight=1)
862
-
863
- # Crear visualizaci贸n
864
- fig, ax = plt.subplots(figsize=(12, 8))
865
- pos = nx.spring_layout(G)
866
-
867
- # Dibujar nodos
868
- nx.draw_networkx_nodes(G, pos,
869
- node_size=[G.nodes[node]['size']*100 for node in G.nodes],
870
- node_color='lightblue',
871
- alpha=0.7)
872
-
873
- # Dibujar conexiones
874
- nx.draw_networkx_edges(G, pos,
875
- width=[G[u][v]['weight']*0.5 for u,v in G.edges],
876
- alpha=0.5)
877
-
878
- # A帽adir etiquetas
879
- nx.draw_networkx_labels(G, pos)
880
-
881
- plt.title("Red de Vocabulario")
882
- plt.axis('off')
883
- return fig
884
-
885
- ############################################################################
886
- def create_syntax_complexity_graph(doc):
887
- """
888
- Genera el diagrama de arco de complejidad sint谩ctica.
889
- Muestra la estructura de dependencias con colores basados en la complejidad.
890
- """
891
- try:
892
- # Preparar datos para la visualizaci贸n
893
- sentences = list(doc.sents)
894
- if not sentences:
895
- return None
896
-
897
- # Crear figura para el gr谩fico
898
- fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
899
-
900
- # Colores para diferentes niveles de profundidad
901
- depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
902
-
903
- y_offset = 0
904
- max_x = 0
905
-
906
- for sent in sentences:
907
- words = [token.text for token in sent]
908
- x_positions = range(len(words))
909
- max_x = max(max_x, len(words))
910
-
911
- # Dibujar palabras
912
- plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
913
- plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
914
-
915
- # A帽adir texto
916
- for i, word in enumerate(words):
917
- plt.annotate(word, (i, y_offset), xytext=(0, -10),
918
- textcoords='offset points', ha='center')
919
-
920
- # Dibujar arcos de dependencia
921
- for token in sent:
922
- if token.dep_ != "ROOT":
923
- # Calcular profundidad de dependencia
924
- depth = 0
925
- current = token
926
- while current.head != current:
927
- depth += 1
928
- current = current.head
929
-
930
- # Determinar posiciones para el arco
931
- start = token.i - sent[0].i
932
- end = token.head.i - sent[0].i
933
-
934
- # Altura del arco basada en la distancia entre palabras
935
- height = 0.5 * abs(end - start)
936
-
937
- # Color basado en la profundidad
938
- color = depth_colors[min(depth, len(depth_colors)-1)]
939
-
940
- # Crear arco
941
- arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
942
- width=abs(end - start),
943
- height=height,
944
- angle=0,
945
- theta1=0,
946
- theta2=180,
947
- color=color,
948
- alpha=0.6)
949
- ax.add_patch(arc)
950
-
951
- y_offset -= 2
952
-
953
- # Configurar el gr谩fico
954
- plt.xlim(-1, max_x)
955
- plt.ylim(y_offset - 1, 1)
956
- plt.axis('off')
957
- plt.title("Complejidad Sint谩ctica")
958
-
959
- return fig
960
-
961
- except Exception as e:
962
- logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
963
- return None
964
-
965
- ############################################################################
966
- def create_cohesion_heatmap(doc):
967
- """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones."""
968
- try:
969
- sentences = list(doc.sents)
970
- n_sentences = len(sentences)
971
-
972
- if n_sentences < 2:
973
- return None
974
-
975
- similarity_matrix = np.zeros((n_sentences, n_sentences))
976
-
977
- for i in range(n_sentences):
978
- for j in range(n_sentences):
979
- sent1_lemmas = {token.lemma_ for token in sentences[i]
980
- if token.is_alpha and not token.is_stop}
981
- sent2_lemmas = {token.lemma_ for token in sentences[j]
982
- if token.is_alpha and not token.is_stop}
983
-
984
- if sent1_lemmas and sent2_lemmas:
985
- intersection = len(sent1_lemmas & sent2_lemmas) # Corregido aqu铆
986
- union = len(sent1_lemmas | sent2_lemmas) # Y aqu铆
987
- similarity_matrix[i, j] = intersection / union if union > 0 else 0
988
-
989
- # Crear visualizaci贸n
990
- fig, ax = plt.subplots(figsize=(10, 8))
991
-
992
- sns.heatmap(similarity_matrix,
993
- cmap='YlOrRd',
994
- square=True,
995
- xticklabels=False,
996
- yticklabels=False,
997
- cbar_kws={'label': 'Cohesi贸n'},
998
- ax=ax)
999
-
1000
- plt.title("Mapa de Cohesi贸n Textual")
1001
- plt.xlabel("Oraciones")
1002
- plt.ylabel("Oraciones")
1003
-
1004
- plt.tight_layout()
1005
- return fig
1006
-
1007
- except Exception as e:
1008
- logger.error(f"Error en create_cohesion_heatmap: {str(e)}")
1009
- return None
 
1
+ #v3/modules/studentact/current_situation_analysis.py
2
+
3
+ import streamlit as st
4
+ import matplotlib.pyplot as plt
5
+ import networkx as nx
6
+ import seaborn as sns
7
+ from collections import Counter
8
+ from itertools import combinations
9
+ import numpy as np
10
+ import matplotlib.patches as patches
11
+ import logging
12
+ import os
13
+
14
+ # 2. Configuraci贸n b谩sica del logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.StreamHandler(),
20
+ logging.FileHandler('app.log')
21
+ ]
22
+ )
23
+
24
+ # 3. Obtener el logger espec铆fico para este m贸dulo
25
+ logger = logging.getLogger(__name__)
26
+
27
+ #########################################################################
28
+
29
+ def correlate_metrics(scores):
30
+ """
31
+ Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas.
32
+
33
+ Args:
34
+ scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad
35
+
36
+ Returns:
37
+ dict con scores ajustados
38
+ """
39
+ try:
40
+ # 1. Correlaci贸n estructura-cohesi贸n
41
+ # La cohesi贸n no puede ser menor que estructura * 0.7
42
+ min_cohesion = scores['structure']['normalized_score'] * 0.7
43
+ if scores['cohesion']['normalized_score'] < min_cohesion:
44
+ scores['cohesion']['normalized_score'] = min_cohesion
45
+
46
+ # 2. Correlaci贸n vocabulario-cohesi贸n
47
+ # La cohesi贸n l茅xica depende del vocabulario
48
+ vocab_influence = scores['vocabulary']['normalized_score'] * 0.6
49
+ scores['cohesion']['normalized_score'] = max(
50
+ scores['cohesion']['normalized_score'],
51
+ vocab_influence
52
+ )
53
+
54
+ # 3. Correlaci贸n cohesi贸n-claridad
55
+ # La claridad no puede superar cohesi贸n * 1.2
56
+ max_clarity = scores['cohesion']['normalized_score'] * 1.2
57
+ if scores['clarity']['normalized_score'] > max_clarity:
58
+ scores['clarity']['normalized_score'] = max_clarity
59
+
60
+ # 4. Correlaci贸n estructura-claridad
61
+ # La claridad no puede superar estructura * 1.1
62
+ struct_max_clarity = scores['structure']['normalized_score'] * 1.1
63
+ scores['clarity']['normalized_score'] = min(
64
+ scores['clarity']['normalized_score'],
65
+ struct_max_clarity
66
+ )
67
+
68
+ # Normalizar todos los scores entre 0 y 1
69
+ for metric in scores:
70
+ scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score']))
71
+
72
+ return scores
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error en correlate_metrics: {str(e)}")
76
+ return scores
77
+
78
+ ##########################################################################
79
+
80
+ def analyze_text_dimensions(doc):
81
+ """
82
+ Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas.
83
+ """
84
+ try:
85
+ # Obtener scores iniciales
86
+ vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
87
+ struct_score = analyze_structure(doc)
88
+ cohesion_score = analyze_cohesion(doc)
89
+ clarity_score, clarity_details = analyze_clarity(doc)
90
+
91
+ # Crear diccionario de scores inicial
92
+ scores = {
93
+ 'vocabulary': {
94
+ 'normalized_score': vocab_score,
95
+ 'details': vocab_details
96
+ },
97
+ 'structure': {
98
+ 'normalized_score': struct_score,
99
+ 'details': None
100
+ },
101
+ 'cohesion': {
102
+ 'normalized_score': cohesion_score,
103
+ 'details': None
104
+ },
105
+ 'clarity': {
106
+ 'normalized_score': clarity_score,
107
+ 'details': clarity_details
108
+ }
109
+ }
110
+
111
+ # Ajustar correlaciones entre m茅tricas
112
+ adjusted_scores = correlate_metrics(scores)
113
+
114
+ # Logging para diagn贸stico
115
+ logger.info(f"""
116
+ Scores originales vs ajustados:
117
+ Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f}
118
+ Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f}
119
+ Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f}
120
+ Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f}
121
+ """)
122
+
123
+ return adjusted_scores
124
+
125
+ except Exception as e:
126
+ logger.error(f"Error en analyze_text_dimensions: {str(e)}")
127
+ return {
128
+ 'vocabulary': {'normalized_score': 0.0, 'details': {}},
129
+ 'structure': {'normalized_score': 0.0, 'details': {}},
130
+ 'cohesion': {'normalized_score': 0.0, 'details': {}},
131
+ 'clarity': {'normalized_score': 0.0, 'details': {}}
132
+ }
133
+
134
+
135
+
136
+ #############################################################################################
137
+
138
+ def analyze_clarity(doc):
139
+ """
140
+ Analiza la claridad del texto considerando m煤ltiples factores.
141
+ """
142
+ try:
143
+ sentences = list(doc.sents)
144
+ if not sentences:
145
+ return 0.0, {}
146
+
147
+ # 1. Longitud de oraciones
148
+ sentence_lengths = [len(sent) for sent in sentences]
149
+ avg_length = sum(sentence_lengths) / len(sentences)
150
+
151
+ # Normalizar usando los umbrales definidos para clarity
152
+ length_score = normalize_score(
153
+ value=avg_length,
154
+ metric_type='clarity',
155
+ optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
156
+ min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
157
+ target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
158
+ )
159
+
160
+ # 2. An谩lisis de conectores
161
+ connector_count = 0
162
+ connector_weights = {
163
+ 'CCONJ': 1.0, # Coordinantes
164
+ 'SCONJ': 1.2, # Subordinantes
165
+ 'ADV': 0.8 # Adverbios conectivos
166
+ }
167
+
168
+ for token in doc:
169
+ if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
170
+ connector_count += connector_weights[token.pos_]
171
+
172
+ # Normalizar conectores por oraci贸n
173
+ connectors_per_sentence = connector_count / len(sentences) if sentences else 0
174
+ connector_score = normalize_score(
175
+ value=connectors_per_sentence,
176
+ metric_type='clarity',
177
+ optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
178
+ min_threshold=0.60,
179
+ target_threshold=0.75
180
+ )
181
+
182
+ # 3. Complejidad estructural
183
+ clause_count = 0
184
+ for sent in sentences:
185
+ verbs = [token for token in sent if token.pos_ == 'VERB']
186
+ clause_count += len(verbs)
187
+
188
+ complexity_raw = clause_count / len(sentences) if sentences else 0
189
+ complexity_score = normalize_score(
190
+ value=complexity_raw,
191
+ metric_type='clarity',
192
+ optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
193
+ min_threshold=0.60,
194
+ target_threshold=0.75
195
+ )
196
+
197
+ # 4. Densidad l茅xica
198
+ content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
199
+ total_words = len([token for token in doc if token.is_alpha])
200
+ density = content_words / total_words if total_words > 0 else 0
201
+
202
+ density_score = normalize_score(
203
+ value=density,
204
+ metric_type='clarity',
205
+ optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
206
+ min_threshold=0.60,
207
+ target_threshold=0.75
208
+ )
209
+
210
+ # Score final ponderado
211
+ weights = {
212
+ 'length': 0.3,
213
+ 'connectors': 0.3,
214
+ 'complexity': 0.2,
215
+ 'density': 0.2
216
+ }
217
+
218
+ clarity_score = (
219
+ weights['length'] * length_score +
220
+ weights['connectors'] * connector_score +
221
+ weights['complexity'] * complexity_score +
222
+ weights['density'] * density_score
223
+ )
224
+
225
+ details = {
226
+ 'length_score': length_score,
227
+ 'connector_score': connector_score,
228
+ 'complexity_score': complexity_score,
229
+ 'density_score': density_score,
230
+ 'avg_sentence_length': avg_length,
231
+ 'connectors_per_sentence': connectors_per_sentence,
232
+ 'density': density
233
+ }
234
+
235
+ # Agregar logging para diagn贸stico
236
+ logger.info(f"""
237
+ Scores de Claridad:
238
+ - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
239
+ - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
240
+ - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
241
+ - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
242
+ - Score Final: {clarity_score:.2f}
243
+ """)
244
+
245
+ return clarity_score, details
246
+
247
+ except Exception as e:
248
+ logger.error(f"Error en analyze_clarity: {str(e)}")
249
+ return 0.0, {}
250
+
251
+ #########################################################################
252
+ def analyze_vocabulary_diversity(doc):
253
+ """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
254
+ try:
255
+ # 1. An谩lisis b谩sico de diversidad
256
+ unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
257
+ total_words = len([token for token in doc if token.is_alpha])
258
+ basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
259
+
260
+ # 2. An谩lisis de registro
261
+ academic_words = 0
262
+ narrative_words = 0
263
+ technical_terms = 0
264
+
265
+ # Clasificar palabras por registro
266
+ for token in doc:
267
+ if token.is_alpha:
268
+ # Detectar t茅rminos acad茅micos/t茅cnicos
269
+ if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
270
+ if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
271
+ technical_terms += 1
272
+ # Detectar palabras narrativas
273
+ if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
274
+ narrative_words += 1
275
+
276
+ # 3. An谩lisis de complejidad sint谩ctica
277
+ avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
278
+
279
+ # 4. Calcular score ponderado
280
+ weights = {
281
+ 'diversity': 0.3,
282
+ 'technical': 0.3,
283
+ 'narrative': 0.2,
284
+ 'complexity': 0.2
285
+ }
286
+
287
+ scores = {
288
+ 'diversity': basic_diversity,
289
+ 'technical': technical_terms / total_words if total_words > 0 else 0,
290
+ 'narrative': narrative_words / total_words if total_words > 0 else 0,
291
+ 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
292
+ }
293
+
294
+ # Score final ponderado
295
+ final_score = sum(weights[key] * scores[key] for key in weights)
296
+
297
+ # Informaci贸n adicional para diagn贸stico
298
+ details = {
299
+ 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
300
+ 'scores': scores
301
+ }
302
+
303
+ return final_score, details
304
+
305
+ except Exception as e:
306
+ logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
307
+ return 0.0, {}
308
+
309
+ #########################################################################
310
+ def analyze_cohesion(doc):
311
+ """Analiza la cohesi贸n textual"""
312
+ try:
313
+ sentences = list(doc.sents)
314
+ if len(sentences) < 2:
315
+ logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
316
+ return 0.0
317
+
318
+ # 1. An谩lisis de conexiones l茅xicas
319
+ lexical_connections = 0
320
+ total_possible_connections = 0
321
+
322
+ for i in range(len(sentences)-1):
323
+ # Obtener lemmas significativos (no stopwords)
324
+ sent1_words = {token.lemma_ for token in sentences[i]
325
+ if token.is_alpha and not token.is_stop}
326
+ sent2_words = {token.lemma_ for token in sentences[i+1]
327
+ if token.is_alpha and not token.is_stop}
328
+
329
+ if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
330
+ intersection = len(sent1_words.intersection(sent2_words))
331
+ total_possible = min(len(sent1_words), len(sent2_words))
332
+
333
+ if total_possible > 0:
334
+ lexical_score = intersection / total_possible
335
+ lexical_connections += lexical_score
336
+ total_possible_connections += 1
337
+
338
+ # 2. An谩lisis de conectores
339
+ connector_count = 0
340
+ connector_types = {
341
+ 'CCONJ': 1.0, # Coordinantes
342
+ 'SCONJ': 1.2, # Subordinantes
343
+ 'ADV': 0.8 # Adverbios conectivos
344
+ }
345
+
346
+ for token in doc:
347
+ if (token.pos_ in connector_types and
348
+ token.dep_ in ['cc', 'mark', 'advmod'] and
349
+ not token.is_stop):
350
+ connector_count += connector_types[token.pos_]
351
+
352
+ # 3. C谩lculo de scores normalizados
353
+ if total_possible_connections > 0:
354
+ lexical_cohesion = lexical_connections / total_possible_connections
355
+ else:
356
+ lexical_cohesion = 0
357
+
358
+ if len(sentences) > 1:
359
+ connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
360
+ else:
361
+ connector_cohesion = 0
362
+
363
+ # 4. Score final ponderado
364
+ weights = {
365
+ 'lexical': 0.7,
366
+ 'connectors': 0.3
367
+ }
368
+
369
+ cohesion_score = (
370
+ weights['lexical'] * lexical_cohesion +
371
+ weights['connectors'] * connector_cohesion
372
+ )
373
+
374
+ # 5. Logging para diagn贸stico
375
+ logger.info(f"""
376
+ An谩lisis de Cohesi贸n:
377
+ - Conexiones l茅xicas encontradas: {lexical_connections}
378
+ - Conexiones posibles: {total_possible_connections}
379
+ - Lexical cohesion score: {lexical_cohesion}
380
+ - Conectores encontrados: {connector_count}
381
+ - Connector cohesion score: {connector_cohesion}
382
+ - Score final: {cohesion_score}
383
+ """)
384
+
385
+ return cohesion_score
386
+
387
+ except Exception as e:
388
+ logger.error(f"Error en analyze_cohesion: {str(e)}")
389
+ return 0.0
390
+
391
+ #########################################################################
392
+ def analyze_structure(doc):
393
+ try:
394
+ if len(doc) == 0:
395
+ return 0.0
396
+
397
+ structure_scores = []
398
+ for token in doc:
399
+ if token.dep_ == 'ROOT':
400
+ result = get_dependency_depths(token)
401
+ structure_scores.append(result['final_score'])
402
+
403
+ if not structure_scores:
404
+ return 0.0
405
+
406
+ return min(1.0, sum(structure_scores) / len(structure_scores))
407
+
408
+ except Exception as e:
409
+ logger.error(f"Error en analyze_structure: {str(e)}")
410
+ return 0.0
411
+
412
+ #########################################################################
413
+ # Funciones auxiliares de an谩lisis
414
+ def get_dependency_depths(token, depth=0, analyzed_tokens=None):
415
+ """
416
+ Analiza la profundidad y calidad de las relaciones de dependencia.
417
+
418
+ Args:
419
+ token: Token a analizar
420
+ depth: Profundidad actual en el 谩rbol
421
+ analyzed_tokens: Set para evitar ciclos en el an谩lisis
422
+
423
+ Returns:
424
+ dict: Informaci贸n detallada sobre las dependencias
425
+ - depths: Lista de profundidades
426
+ - relations: Diccionario con tipos de relaciones encontradas
427
+ - complexity_score: Puntuaci贸n de complejidad
428
+ """
429
+ if analyzed_tokens is None:
430
+ analyzed_tokens = set()
431
+
432
+ # Evitar ciclos
433
+ if token.i in analyzed_tokens:
434
+ return {
435
+ 'depths': [],
436
+ 'relations': {},
437
+ 'complexity_score': 0
438
+ }
439
+
440
+ analyzed_tokens.add(token.i)
441
+
442
+ # Pesos para diferentes tipos de dependencias
443
+ dependency_weights = {
444
+ # Dependencias principales
445
+ 'nsubj': 1.2, # Sujeto nominal
446
+ 'obj': 1.1, # Objeto directo
447
+ 'iobj': 1.1, # Objeto indirecto
448
+ 'ROOT': 1.3, # Ra铆z
449
+
450
+ # Modificadores
451
+ 'amod': 0.8, # Modificador adjetival
452
+ 'advmod': 0.8, # Modificador adverbial
453
+ 'nmod': 0.9, # Modificador nominal
454
+
455
+ # Estructuras complejas
456
+ 'csubj': 1.4, # Cl谩usula como sujeto
457
+ 'ccomp': 1.3, # Complemento clausal
458
+ 'xcomp': 1.2, # Complemento clausal abierto
459
+ 'advcl': 1.2, # Cl谩usula adverbial
460
+
461
+ # Coordinaci贸n y subordinaci贸n
462
+ 'conj': 1.1, # Conjunci贸n
463
+ 'cc': 0.7, # Coordinaci贸n
464
+ 'mark': 0.8, # Marcador
465
+
466
+ # Otros
467
+ 'det': 0.5, # Determinante
468
+ 'case': 0.5, # Caso
469
+ 'punct': 0.1 # Puntuaci贸n
470
+ }
471
+
472
+ # Inicializar resultados
473
+ current_result = {
474
+ 'depths': [depth],
475
+ 'relations': {token.dep_: 1},
476
+ 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
477
+ }
478
+
479
+ # Analizar hijos recursivamente
480
+ for child in token.children:
481
+ child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
482
+
483
+ # Combinar profundidades
484
+ current_result['depths'].extend(child_result['depths'])
485
+
486
+ # Combinar relaciones
487
+ for rel, count in child_result['relations'].items():
488
+ current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
489
+
490
+ # Acumular score de complejidad
491
+ current_result['complexity_score'] += child_result['complexity_score']
492
+
493
+ # Calcular m茅tricas adicionales
494
+ current_result['max_depth'] = max(current_result['depths'])
495
+ current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
496
+ current_result['relation_diversity'] = len(current_result['relations'])
497
+
498
+ # Calcular score ponderado por tipo de estructura
499
+ structure_bonus = 0
500
+
501
+ # Bonus por estructuras complejas
502
+ if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
503
+ structure_bonus += 0.3
504
+
505
+ # Bonus por coordinaci贸n balanceada
506
+ if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
507
+ structure_bonus += 0.2
508
+
509
+ # Bonus por modificaci贸n rica
510
+ if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
511
+ structure_bonus += 0.2
512
+
513
+ current_result['final_score'] = (
514
+ current_result['complexity_score'] * (1 + structure_bonus)
515
+ )
516
+
517
+ return current_result
518
+
519
+ #########################################################################
520
+ def normalize_score(value, metric_type,
521
+ min_threshold=0.0, target_threshold=1.0,
522
+ range_factor=2.0, optimal_length=None,
523
+ optimal_connections=None, optimal_depth=None):
524
+ """
525
+ Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica.
526
+
527
+ Args:
528
+ value: Valor a normalizar
529
+ metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity')
530
+ min_threshold: Valor m铆nimo aceptable
531
+ target_threshold: Valor objetivo
532
+ range_factor: Factor para ajustar el rango
533
+ optimal_length: Longitud 贸ptima (opcional)
534
+ optimal_connections: N煤mero 贸ptimo de conexiones (opcional)
535
+ optimal_depth: Profundidad 贸ptima de estructura (opcional)
536
+
537
+ Returns:
538
+ float: Valor normalizado entre 0 y 1
539
+ """
540
+ try:
541
+ # Definir umbrales por tipo de m茅trica
542
+ METRIC_THRESHOLDS = {
543
+ 'vocabulary': {
544
+ 'min': 0.60,
545
+ 'target': 0.75,
546
+ 'range_factor': 1.5
547
+ },
548
+ 'structure': {
549
+ 'min': 0.65,
550
+ 'target': 0.80,
551
+ 'range_factor': 1.8
552
+ },
553
+ 'cohesion': {
554
+ 'min': 0.55,
555
+ 'target': 0.70,
556
+ 'range_factor': 1.6
557
+ },
558
+ 'clarity': {
559
+ 'min': 0.60,
560
+ 'target': 0.75,
561
+ 'range_factor': 1.7
562
+ }
563
+ }
564
+
565
+ # Validar valores negativos o cero
566
+ if value < 0:
567
+ logger.warning(f"Valor negativo recibido: {value}")
568
+ return 0.0
569
+
570
+ # Manejar caso donde el valor es cero
571
+ if value == 0:
572
+ logger.warning("Valor cero recibido")
573
+ return 0.0
574
+
575
+ # Obtener umbrales espec铆ficos para el tipo de m茅trica
576
+ thresholds = METRIC_THRESHOLDS.get(metric_type, {
577
+ 'min': min_threshold,
578
+ 'target': target_threshold,
579
+ 'range_factor': range_factor
580
+ })
581
+
582
+ # Identificar el valor de referencia a usar
583
+ if optimal_depth is not None:
584
+ reference = optimal_depth
585
+ elif optimal_connections is not None:
586
+ reference = optimal_connections
587
+ elif optimal_length is not None:
588
+ reference = optimal_length
589
+ else:
590
+ reference = thresholds['target']
591
+
592
+ # Validar valor de referencia
593
+ if reference <= 0:
594
+ logger.warning(f"Valor de referencia inv谩lido: {reference}")
595
+ return 0.0
596
+
597
+ # Calcular score basado en umbrales
598
+ if value < thresholds['min']:
599
+ # Valor por debajo del m铆nimo
600
+ score = (value / thresholds['min']) * 0.5 # M谩ximo 0.5 para valores bajo el m铆nimo
601
+ elif value < thresholds['target']:
602
+ # Valor entre m铆nimo y objetivo
603
+ range_size = thresholds['target'] - thresholds['min']
604
+ progress = (value - thresholds['min']) / range_size
605
+ score = 0.5 + (progress * 0.5) # Escala entre 0.5 y 1.0
606
+ else:
607
+ # Valor alcanza o supera el objetivo
608
+ score = 1.0
609
+
610
+ # Penalizar valores muy por encima del objetivo
611
+ if value > (thresholds['target'] * thresholds['range_factor']):
612
+ excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor'])
613
+ score = max(0.7, 1.0 - excess) # No bajar de 0.7 para valores altos
614
+
615
+ # Asegurar que el resultado est茅 entre 0 y 1
616
+ return max(0.0, min(1.0, score))
617
+
618
+ except Exception as e:
619
+ logger.error(f"Error en normalize_score: {str(e)}")
620
+ return 0.0
621
+
622
+ #########################################################################
623
+ #########################################################################
624
+
625
+ def generate_recommendations(metrics, text_type, lang_code='es'):
626
+ """
627
+ Genera recomendaciones personalizadas basadas en las m茅tricas del texto y el tipo de texto.
628
+
629
+ Args:
630
+ metrics: Diccionario con las m茅tricas analizadas
631
+ text_type: Tipo de texto ('academic_article', 'student_essay', 'general_communication')
632
+ lang_code: C贸digo del idioma para las recomendaciones (es, en, uk)
633
+
634
+ Returns:
635
+ dict: Recomendaciones organizadas por categor铆a en el idioma correspondiente
636
+ """
637
+ try:
638
+ # A帽adir debug log para verificar el c贸digo de idioma recibido
639
+ logger.info(f"generate_recommendations llamado con idioma: {lang_code}")
640
+
641
+ # Comprobar que importamos RECOMMENDATIONS correctamente
642
+ logger.info(f"Idiomas disponibles en RECOMMENDATIONS: {list(RECOMMENDATIONS.keys())}")
643
+
644
+ # Obtener umbrales seg煤n el tipo de texto
645
+ thresholds = TEXT_TYPES[text_type]['thresholds']
646
+
647
+ # Verificar que el idioma est茅 soportado, usar espa帽ol como respaldo
648
+ if lang_code not in RECOMMENDATIONS:
649
+ logger.warning(f"Idioma {lang_code} no soportado para recomendaciones, usando espa帽ol")
650
+ lang_code = 'es'
651
+
652
+ # Obtener traducciones para el idioma seleccionado
653
+ translations = RECOMMENDATIONS[lang_code]
654
+
655
+ # Inicializar diccionario de recomendaciones
656
+ recommendations = {
657
+ 'vocabulary': [],
658
+ 'structure': [],
659
+ 'cohesion': [],
660
+ 'clarity': [],
661
+ 'specific': [],
662
+ 'priority': {
663
+ 'area': 'general',
664
+ 'tips': []
665
+ },
666
+ 'text_type_name': translations['text_types'][text_type],
667
+ 'dimension_names': translations['dimension_names'],
668
+ 'ui_text': {
669
+ 'priority_intro': translations['priority_intro'],
670
+ 'detailed_recommendations': translations['detailed_recommendations'],
671
+ 'save_button': translations['save_button'],
672
+ 'save_success': translations['save_success'],
673
+ 'save_error': translations['save_error'],
674
+ 'area_priority': translations['area_priority']
675
+ }
676
+ }
677
+
678
+ # Determinar nivel para cada dimensi贸n y asignar recomendaciones
679
+ dimensions = ['vocabulary', 'structure', 'cohesion', 'clarity']
680
+ scores = {}
681
+
682
+ for dim in dimensions:
683
+ score = metrics[dim]['normalized_score']
684
+ scores[dim] = score
685
+
686
+ # Determinar nivel (bajo, medio, alto)
687
+ if score < thresholds[dim]['min']:
688
+ level = 'low'
689
+ elif score < thresholds[dim]['target']:
690
+ level = 'medium'
691
+ else:
692
+ level = 'high'
693
+
694
+ # Asignar recomendaciones para ese nivel
695
+ recommendations[dim] = translations[dim][level]
696
+
697
+ # Asignar recomendaciones espec铆ficas por tipo de texto
698
+ recommendations['specific'] = translations[text_type]
699
+
700
+ # Determinar 谩rea prioritaria (la que tiene menor puntuaci贸n)
701
+ priority_dimension = min(scores, key=scores.get)
702
+ recommendations['priority']['area'] = priority_dimension
703
+ recommendations['priority']['tips'] = recommendations[priority_dimension]
704
+
705
+ logger.info(f"Generadas recomendaciones en {lang_code} para texto tipo {text_type}")
706
+ return recommendations
707
+
708
+ except Exception as e:
709
+ logger.error(f"Error en generate_recommendations: {str(e)}")
710
+
711
+ # Utilizar un enfoque basado en el idioma actual en lugar de casos codificados
712
+ # Esto permite manejar ucraniano y cualquier otro idioma futuro
713
+ fallback_translations = {
714
+ 'en': {
715
+ 'basic_recommendations': {
716
+ 'vocabulary': ["Try enriching your vocabulary"],
717
+ 'structure': ["Work on the structure of your sentences"],
718
+ 'cohesion': ["Improve the connection between your ideas"],
719
+ 'clarity': ["Try to express your ideas more clearly"],
720
+ 'specific': ["Adapt your text according to its purpose"],
721
+ },
722
+ 'dimension_names': {
723
+ 'vocabulary': 'Vocabulary',
724
+ 'structure': 'Structure',
725
+ 'cohesion': 'Cohesion',
726
+ 'clarity': 'Clarity',
727
+ 'general': 'General'
728
+ },
729
+ 'ui_text': {
730
+ 'priority_intro': "This is where you should focus your efforts.",
731
+ 'detailed_recommendations': "Detailed recommendations",
732
+ 'save_button': "Save analysis",
733
+ 'save_success': "Analysis saved successfully",
734
+ 'save_error': "Error saving analysis",
735
+ 'area_priority': "Priority area"
736
+ }
737
+ },
738
+ 'uk': {
739
+ 'basic_recommendations': {
740
+ 'vocabulary': ["袪芯蟹褕懈褉褌械 褋胁褨泄 褋谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋"],
741
+ 'structure': ["袩芯泻褉邪褖褨褌褜 褋褌褉褍泻褌褍褉褍 胁邪褕懈褏 褉械褔械薪褜"],
742
+ 'cohesion': ["袩芯泻褉邪褖褨褌褜 蟹胁'褟蟹芯泻 屑褨卸 胁邪褕懈屑懈 褨写械褟屑懈"],
743
+ 'clarity': ["袙懈褋谢芯胁谢褞泄褌械 褋胁芯褩 褨写械褩 褟褋薪褨褕械"],
744
+ 'specific': ["袗写邪锌褌褍泄褌械 褋胁褨泄 褌械泻褋褌 胁褨写锌芯胁褨写薪芯 写芯 泄芯谐芯 屑械褌懈"],
745
+ },
746
+ 'dimension_names': {
747
+ 'vocabulary': '小谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋',
748
+ 'structure': '小褌褉褍泻褌褍褉邪',
749
+ 'cohesion': '袟胁\'褟蟹薪褨褋褌褜',
750
+ 'clarity': '携褋薪褨褋褌褜',
751
+ 'general': '袟邪谐邪谢褜薪械'
752
+ },
753
+ 'ui_text': {
754
+ 'priority_intro': "笑械 芯斜谢邪褋褌褜, 写械 胁懈 锌芯胁懈薪薪褨 蟹芯褋械褉械写懈褌懈 褋胁芯褩 蟹褍褋懈谢谢褟.",
755
+ 'detailed_recommendations': "袛械褌邪谢褜薪褨 褉械泻芯屑械薪写邪褑褨褩",
756
+ 'save_button': "袟斜械褉械谐褌懈 邪薪邪谢褨蟹",
757
+ 'save_success': "袗薪邪谢褨蟹 褍褋锌褨褕薪芯 蟹斜械褉械卸械薪芯",
758
+ 'save_error': "袩芯屑懈谢泻邪 锌褉懈 蟹斜械褉械卸械薪薪褨 邪薪邪谢褨蟹褍",
759
+ 'area_priority': "袩褉褨芯褉懈褌械褌薪邪 芯斜谢邪褋褌褜"
760
+ }
761
+ },
762
+ 'es': {
763
+ 'basic_recommendations': {
764
+ 'vocabulary': ["Intenta enriquecer tu vocabulario"],
765
+ 'structure': ["Trabaja en la estructura de tus oraciones"],
766
+ 'cohesion': ["Mejora la conexi贸n entre tus ideas"],
767
+ 'clarity': ["Busca expresar tus ideas con mayor claridad"],
768
+ 'specific': ["Adapta tu texto seg煤n su prop贸sito"],
769
+ },
770
+ 'dimension_names': {
771
+ 'vocabulary': 'Vocabulario',
772
+ 'structure': 'Estructura',
773
+ 'cohesion': 'Cohesi贸n',
774
+ 'clarity': 'Claridad',
775
+ 'general': 'General'
776
+ },
777
+ 'ui_text': {
778
+ 'priority_intro': "Esta es el 谩rea donde debes concentrar tus esfuerzos.",
779
+ 'detailed_recommendations': "Recomendaciones detalladas",
780
+ 'save_button': "Guardar an谩lisis",
781
+ 'save_success': "An谩lisis guardado con 茅xito",
782
+ 'save_error': "Error al guardar el an谩lisis",
783
+ 'area_priority': "脕rea prioritaria"
784
+ }
785
+ }
786
+ }
787
+
788
+ # Usar el idioma actual si est谩 disponible, o ingl茅s, o espa帽ol como 煤ltima opci贸n
789
+ current_lang = fallback_translations.get(lang_code,
790
+ fallback_translations.get('en',
791
+ fallback_translations['es']))
792
+
793
+ basic_recommendations = current_lang['basic_recommendations']
794
+
795
+ return {
796
+ 'vocabulary': basic_recommendations['vocabulary'],
797
+ 'structure': basic_recommendations['structure'],
798
+ 'cohesion': basic_recommendations['cohesion'],
799
+ 'clarity': basic_recommendations['clarity'],
800
+ 'specific': basic_recommendations['specific'],
801
+ 'priority': {
802
+ 'area': 'general',
803
+ 'tips': ["Busca retroalimentaci贸n espec铆fica de un tutor o profesor"]
804
+ },
805
+ 'dimension_names': current_lang['dimension_names'],
806
+ 'ui_text': current_lang['ui_text']
807
+ }
808
+
809
+
810
+
811
+
812
+ #########################################################################
813
+ #########################################################################
814
+ # Funciones de generaci贸n de gr谩ficos
815
+ def generate_sentence_graphs(doc):
816
+ """Genera visualizaciones de estructura de oraciones"""
817
+ fig, ax = plt.subplots(figsize=(10, 6))
818
+ # Implementar visualizaci贸n
819
+ plt.close()
820
+ return fig
821
+
822
+ ############################################################################
823
+ def generate_word_connections(doc):
824
+ """Genera red de conexiones de palabras"""
825
+ fig, ax = plt.subplots(figsize=(10, 6))
826
+ # Implementar visualizaci贸n
827
+ plt.close()
828
+ return fig
829
+
830
+ ############################################################################
831
+ def generate_connection_paths(doc):
832
+ """Genera patrones de conexi贸n"""
833
+ fig, ax = plt.subplots(figsize=(10, 6))
834
+ # Implementar visualizaci贸n
835
+ plt.close()
836
+ return fig
837
+
838
+ ############################################################################
839
+ def create_vocabulary_network(doc):
840
+ """
841
+ Genera el grafo de red de vocabulario.
842
+ """
843
+ G = nx.Graph()
844
+
845
+ # Crear nodos para palabras significativas
846
+ words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
847
+ word_freq = Counter(words)
848
+
849
+ # A帽adir nodos con tama帽o basado en frecuencia
850
+ for word, freq in word_freq.items():
851
+ G.add_node(word, size=freq)
852
+
853
+ # Crear conexiones basadas en co-ocurrencia
854
+ window_size = 5
855
+ for i in range(len(words) - window_size):
856
+ window = words[i:i+window_size]
857
+ for w1, w2 in combinations(set(window), 2):
858
+ if G.has_edge(w1, w2):
859
+ G[w1][w2]['weight'] += 1
860
+ else:
861
+ G.add_edge(w1, w2, weight=1)
862
+
863
+ # Crear visualizaci贸n
864
+ fig, ax = plt.subplots(figsize=(12, 8))
865
+ pos = nx.spring_layout(G)
866
+
867
+ # Dibujar nodos
868
+ nx.draw_networkx_nodes(G, pos,
869
+ node_size=[G.nodes[node]['size']*100 for node in G.nodes],
870
+ node_color='lightblue',
871
+ alpha=0.7)
872
+
873
+ # Dibujar conexiones
874
+ nx.draw_networkx_edges(G, pos,
875
+ width=[G[u][v]['weight']*0.5 for u,v in G.edges],
876
+ alpha=0.5)
877
+
878
+ # A帽adir etiquetas
879
+ nx.draw_networkx_labels(G, pos)
880
+
881
+ plt.title("Red de Vocabulario")
882
+ plt.axis('off')
883
+ return fig
884
+
885
+ ############################################################################
886
+ def create_syntax_complexity_graph(doc):
887
+ """
888
+ Genera el diagrama de arco de complejidad sint谩ctica.
889
+ Muestra la estructura de dependencias con colores basados en la complejidad.
890
+ """
891
+ try:
892
+ # Preparar datos para la visualizaci贸n
893
+ sentences = list(doc.sents)
894
+ if not sentences:
895
+ return None
896
+
897
+ # Crear figura para el gr谩fico
898
+ fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
899
+
900
+ # Colores para diferentes niveles de profundidad
901
+ depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
902
+
903
+ y_offset = 0
904
+ max_x = 0
905
+
906
+ for sent in sentences:
907
+ words = [token.text for token in sent]
908
+ x_positions = range(len(words))
909
+ max_x = max(max_x, len(words))
910
+
911
+ # Dibujar palabras
912
+ plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
913
+ plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
914
+
915
+ # A帽adir texto
916
+ for i, word in enumerate(words):
917
+ plt.annotate(word, (i, y_offset), xytext=(0, -10),
918
+ textcoords='offset points', ha='center')
919
+
920
+ # Dibujar arcos de dependencia
921
+ for token in sent:
922
+ if token.dep_ != "ROOT":
923
+ # Calcular profundidad de dependencia
924
+ depth = 0
925
+ current = token
926
+ while current.head != current:
927
+ depth += 1
928
+ current = current.head
929
+
930
+ # Determinar posiciones para el arco
931
+ start = token.i - sent[0].i
932
+ end = token.head.i - sent[0].i
933
+
934
+ # Altura del arco basada en la distancia entre palabras
935
+ height = 0.5 * abs(end - start)
936
+
937
+ # Color basado en la profundidad
938
+ color = depth_colors[min(depth, len(depth_colors)-1)]
939
+
940
+ # Crear arco
941
+ arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
942
+ width=abs(end - start),
943
+ height=height,
944
+ angle=0,
945
+ theta1=0,
946
+ theta2=180,
947
+ color=color,
948
+ alpha=0.6)
949
+ ax.add_patch(arc)
950
+
951
+ y_offset -= 2
952
+
953
+ # Configurar el gr谩fico
954
+ plt.xlim(-1, max_x)
955
+ plt.ylim(y_offset - 1, 1)
956
+ plt.axis('off')
957
+ plt.title("Complejidad Sint谩ctica")
958
+
959
+ return fig
960
+
961
+ except Exception as e:
962
+ logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
963
+ return None
964
+
965
+ ############################################################################
966
+ def create_cohesion_heatmap(doc):
967
+ """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones."""
968
+ try:
969
+ sentences = list(doc.sents)
970
+ n_sentences = len(sentences)
971
+
972
+ if n_sentences < 2:
973
+ return None
974
+
975
+ similarity_matrix = np.zeros((n_sentences, n_sentences))
976
+
977
+ for i in range(n_sentences):
978
+ for j in range(n_sentences):
979
+ sent1_lemmas = {token.lemma_ for token in sentences[i]
980
+ if token.is_alpha and not token.is_stop}
981
+ sent2_lemmas = {token.lemma_ for token in sentences[j]
982
+ if token.is_alpha and not token.is_stop}
983
+
984
+ if sent1_lemmas and sent2_lemmas:
985
+ intersection = len(sent1_lemmas & sent2_lemmas) # Corregido aqu铆
986
+ union = len(sent1_lemmas | sent2_lemmas) # Y aqu铆
987
+ similarity_matrix[i, j] = intersection / union if union > 0 else 0
988
+
989
+ # Crear visualizaci贸n
990
+ fig, ax = plt.subplots(figsize=(10, 8))
991
+
992
+ sns.heatmap(similarity_matrix,
993
+ cmap='YlOrRd',
994
+ square=True,
995
+ xticklabels=False,
996
+ yticklabels=False,
997
+ cbar_kws={'label': 'Cohesi贸n'},
998
+ ax=ax)
999
+
1000
+ plt.title("Mapa de Cohesi贸n Textual")
1001
+ plt.xlabel("Oraciones")
1002
+ plt.ylabel("Oraciones")
1003
+
1004
+ plt.tight_layout()
1005
+ return fig
1006
+
1007
+ except Exception as e:
1008
+ logger.error(f"Error en create_cohesion_heatmap: {str(e)}")
1009
+ return None