AIdeaText commited on
Commit
ee7b3c9
verified
1 Parent(s): d8f54ea

Update modules/morphosyntax/morphosyntax_interface.py

Browse files
modules/morphosyntax/morphosyntax_interface.py CHANGED
@@ -1,356 +1,150 @@
1
  # modules/morphosyntax/morphosyntax_interface.py
2
 
3
  import streamlit as st
4
- from streamlit_float import *
5
- from streamlit_antd_components import *
6
- from streamlit.components.v1 import html
7
- import spacy
8
- from spacy import displacy
9
- import spacy_streamlit
10
- import pandas as pd
11
- import base64
12
  import re
 
 
13
 
14
- # Importaciones locales
15
- from .morphosyntax_process import (
16
- process_morphosyntactic_input,
17
- format_analysis_results,
18
- perform_advanced_morphosyntactic_analysis,
19
- get_repeated_words_colors,
20
- highlight_repeated_words,
21
- POS_COLORS,
22
- POS_TRANSLATIONS
23
- )
24
-
25
- from ..utils.widget_utils import generate_unique_key
26
 
27
  from ..database.morphosyntax_iterative_mongo_db import (
28
  store_student_morphosyntax_base,
29
  store_student_morphosyntax_iteration,
30
- get_student_morphosyntax_analysis,
31
- update_student_morphosyntax_analysis,
32
- delete_student_morphosyntax_analysis,
33
- get_student_morphosyntax_data
34
  )
35
 
36
- import logging
37
  logger = logging.getLogger(__name__)
38
 
39
- ###########################################################################
40
  def initialize_arc_analysis_state():
41
  """Inicializa el estado del an谩lisis de arcos y el cach茅 si no existen."""
42
- if 'arc_analysis_state' not in st.session_state:
43
  st.session_state.arc_analysis_state = {
44
- 'base_id': None, # ID del an谩lisis base (ObjectId)
45
- 'original_text': '', # Texto original
46
- 'original_analysis': None, # Dict con resultados del an谩lisis base
47
- 'iteration_text': '', # Texto de iteraci贸n
48
- 'iteration_analysis': None,# Dict con resultados de la iteraci贸n
49
- 'analysis_count': 0
50
  }
51
- logger.info("Estado de an谩lisis de arcos inicializado")
52
-
53
- # Inicializar cach茅 de an谩lisis
54
- if 'analysis_cache' not in st.session_state:
55
- st.session_state.analysis_cache = {}
56
- logger.info("Cach茅 de an谩lisis inicializado")
57
-
58
 
59
  def reset_morpho_state():
60
  """Resetea el estado del an谩lisis morfosint谩ctico en sesi贸n."""
61
- if 'arc_analysis_state' in st.session_state:
62
- st.session_state.arc_analysis_state = {
63
- 'base_id': None,
64
- 'original_text': '',
65
- 'original_analysis': None,
66
- 'iteration_text': '',
67
- 'iteration_analysis': None,
68
- 'analysis_count': 0
69
- }
70
-
71
-
72
- def display_original_analysis(container, analysis, lang_code, morpho_t):
73
- """Muestra el an谩lisis original en el contenedor especificado."""
74
- with container:
75
- st.subheader("An谩lisis Original")
76
- display_morphosyntax_results(analysis, lang_code, morpho_t)
77
-
78
-
79
- def display_iteration_analysis(container, analysis, lang_code, morpho_t):
80
- """Muestra el an谩lisis de cambios en el contenedor especificado."""
81
- with container:
82
- st.subheader("An谩lisis de Cambios")
83
- display_morphosyntax_results(analysis, lang_code, morpho_t)
84
-
85
-
86
- def display_arc_diagram(doc, analysis):
87
- """Muestra un diagrama de arco sin t铆tulo."""
88
  try:
89
  for sent in doc.sents:
90
- svg_html = displacy.render(
91
- sent,
92
  style="dep",
93
- options={
94
- "distance": 100,
95
- "arrow_spacing": 20,
96
- "word_spacing": 30
97
- }
98
  )
99
- # Ajustar tama帽o y posici贸n
100
- svg_html = svg_html.replace('height="375"', 'height="200"')
101
- svg_html = re.sub(
102
  r'<svg[^>]*>',
103
  lambda m: m.group(0).replace('height="450"', 'height="300"'),
104
- svg_html
105
  )
106
- svg_html = re.sub(
107
  r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
108
  lambda m: f'<g transform="translate({m.group(1)},50)"',
109
- svg_html
110
  )
111
-
112
- # Envolver en contenedor con estilo
113
- svg_html = f'<div class="arc-diagram-container">{svg_html}</div>'
114
- st.write(svg_html, unsafe_allow_html=True)
115
 
116
  except Exception as e:
117
  logger.error(f"Error en display_arc_diagram: {str(e)}")
118
 
119
-
120
- def cache_analysis_results(key, result):
121
- """Almacena resultados de an谩lisis en cach茅."""
122
- if 'analysis_cache' not in st.session_state:
123
- initialize_arc_analysis_state()
124
- st.session_state.analysis_cache[key] = result
125
- logger.info(f"Resultado almacenado en cach茅 con clave: {key}")
126
-
127
-
128
- def get_cached_analysis(key):
129
- """Recupera resultados de an谩lisis del cach茅."""
130
- if 'analysis_cache' not in st.session_state:
131
- initialize_arc_analysis_state()
132
- return None
133
- return st.session_state.analysis_cache.get(key)
134
-
135
-
136
- def display_morphosyntax_interface(lang_code, nlp_models, morpho_t):
137
- """
138
- Interfaz principal para el an谩lisis morfosint谩ctico.
139
- """
140
- try:
141
- # CSS para layout estable
142
- st.markdown("""
143
- <style>
144
- .stTextArea textarea {
145
- font-size: 1rem;
146
- line-height: 1.5;
147
- min-height: 100px !important;
148
- height: 100px !important;
149
- }
150
- .arc-diagram-container {
151
- width: 100%;
152
- padding: 0.5rem;
153
- margin: 0.5rem 0;
154
- }
155
- .divider {
156
- height: 3px;
157
- border: none;
158
- background-color: #333;
159
- margin: 2rem 0;
160
- }
161
- </style>
162
- """, unsafe_allow_html=True)
163
-
164
- # Inicializar estados si no existen
165
- initialize_arc_analysis_state()
166
-
167
- # Crear subtabs
168
- subtabs = st.tabs([
169
- "An谩lisis de Diagramas de Arco",
170
- "An谩lisis de Categor铆as",
171
- "An谩lisis Morfol贸gico"
172
- ])
173
-
174
- # -------------------- Subtab 0: Diagramas de Arco --------------------
175
- with subtabs[0]:
176
- # Bot贸n de reset
177
- col1, col2, col3 = st.columns([2, 1, 2])
178
- with col1:
179
- if st.button("Nuevo An谩lisis", type="secondary", use_container_width=True):
180
- reset_morpho_state()
181
- # Forzar un refresco completo solo en este caso
182
- st.experimental_rerun()
183
-
184
- # Container principal para an谩lisis base
185
- analysis_container = st.container()
186
- with analysis_container:
187
- # Entrada de texto original
188
- text_input_key = f"original_text_{st.session_state.arc_analysis_state['analysis_count']}"
189
- text_input = st.text_area(
190
- "Texto original",
191
- value=st.session_state.arc_analysis_state.get('original_text', ''),
192
- key=text_input_key,
193
- height=100
194
  )
195
-
196
- # Bot贸n de an谩lisis
197
- col1, col2, col3 = st.columns([2, 1, 2])
198
- with col1:
199
- analyze_button = st.button(
200
- "Analizar Texto",
201
- type="primary",
202
- use_container_width=True
203
- )
204
-
205
- # Procesar texto original
206
- if analyze_button and text_input.strip():
207
- try:
208
- if not getattr(st.session_state, 'username', None):
209
- st.error("No se detect贸 username en session_state. Verifica tu login.")
210
- return
211
-
212
- doc = nlp_models[lang_code](text_input)
213
- analysis = perform_advanced_morphosyntactic_analysis(
214
- text_input,
215
- nlp_models[lang_code]
216
- )
217
-
218
- # Guardar en BD (retornar谩 ObjectId)
219
- base_id = store_student_morphosyntax_base(
220
- st.session_state.username,
221
- text_input,
222
- analysis['arc_diagrams']
223
- )
224
-
225
- if base_id:
226
- # Actualizar el estado en session_state
227
- st.session_state.arc_analysis_state.update({
228
- 'base_id': base_id, # Guardamos el ObjectId
229
- 'original_text': text_input,
230
- 'original_analysis': analysis,
231
- 'analysis_count': st.session_state.arc_analysis_state['analysis_count'] + 1,
232
- # Reiniciamos la iteraci贸n
233
- 'iteration_text': '',
234
- 'iteration_analysis': None
235
- })
236
-
237
- # Mostrar diagrama base
238
- display_arc_diagram(doc, analysis)
239
-
240
- # L铆nea divisora
241
- st.markdown('<hr class="divider">', unsafe_allow_html=True)
242
-
243
- # Secci贸n para la iteraci贸n
244
- st.subheader("Iteraci贸n / Cambios")
245
- with st.form("iteration_form"):
246
- # key para la iteraci贸n
247
- iteration_text_key = f"iteration_text_{st.session_state.arc_analysis_state['analysis_count']}"
248
-
249
- # En lugar de default = text_input, ahora dejamos vac铆o por defecto
250
- iteration_text = st.text_area(
251
- "Texto de iteraci贸n",
252
- value=st.session_state.arc_analysis_state.get('iteration_text', ''),
253
- key=iteration_text_key,
254
- height=100
255
- )
256
-
257
- # Bot贸n de submit en el formulario
258
- col1, col2, col3 = st.columns([2,1,2])
259
- with col1:
260
- submitted = st.form_submit_button(
261
- "Analizar Cambios",
262
- type="primary",
263
- use_container_width=True
264
- )
265
-
266
- if submitted and iteration_text.strip():
267
- try:
268
- doc_iter = nlp_models[lang_code](iteration_text)
269
- analysis_iter = perform_advanced_morphosyntactic_analysis(
270
- iteration_text,
271
- nlp_models[lang_code]
272
- )
273
-
274
- iteration_id = store_student_morphosyntax_iteration(
275
- st.session_state.username,
276
- base_id, # este es ObjectId
277
- text_input, # Texto original
278
- iteration_text, # Texto de iteraci贸n
279
- analysis_iter['arc_diagrams']
280
- )
281
-
282
- if iteration_id:
283
- # Actualizamos el estado de la iteraci贸n
284
- st.session_state.arc_analysis_state.update({
285
- 'iteration_text': iteration_text,
286
- 'iteration_analysis': analysis_iter
287
- })
288
- # Mostrar diagrama de iteraci贸n
289
- display_arc_diagram(doc_iter, analysis_iter)
290
-
291
- except Exception as e:
292
- st.error("Error procesando iteraci贸n")
293
- logger.error(f"Error en iteraci贸n: {str(e)}")
294
-
295
- except Exception as e:
296
- st.error("Error procesando an谩lisis base")
297
- logger.error(f"Error base: {str(e)}")
298
-
299
- # -------------------- Subtab 1: An谩lisis de Categor铆as ----------------
300
- with subtabs[1]:
301
- st.info("An谩lisis de Categor铆as en desarrollo...")
302
-
303
- # -------------------- Subtab 2: An谩lisis Morfol贸gico ------------------
304
- with subtabs[2]:
305
- st.info("An谩lisis Morfol贸gico en desarrollo...")
306
-
307
- except Exception as e:
308
- st.error("Error en la interfaz de morfosintaxis")
309
- logger.error(f"Error general en la interfaz: {str(e)}")
310
-
311
-
312
- def display_morphosyntax_results(result, lang_code, morpho_t):
313
- """
314
- Muestra solo el diagrama de arco.
315
- Args:
316
- result: Diccionario con el documento procesado y su an谩lisis
317
- lang_code: C贸digo del idioma
318
- morpho_t: Diccionario de traducciones (opcional)
319
- """
320
- if not result:
321
- return
322
- try:
323
- doc = result['doc']
324
- sentences = list(doc.sents)
325
- for i, sent in enumerate(sentences):
326
- try:
327
- st.subheader(f"{morpho_t.get('sentence', 'Sentence')} {i+1}")
328
- svg_html = displacy.render(
329
- sent,
330
- style="dep",
331
- options={
332
- "distance": 100,
333
- "arrow_spacing": 20,
334
- "word_spacing": 30
335
- }
336
  )
337
- svg_html = svg_html.replace('height="375"', 'height="200"')
338
- svg_html = re.sub(
339
- r'<svg[^>]*>',
340
- lambda m: m.group(0).replace('height="450"', 'height="300"'),
341
- svg_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  )
343
- svg_html = re.sub(
344
- r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
345
- lambda m: f'<g transform="translate({m.group(1)},50)"',
346
- svg_html
 
 
 
 
 
347
  )
348
- svg_html = f'<div class="arc-diagram-container">{svg_html}</div>'
349
- st.write(svg_html, unsafe_allow_html=True)
350
- except Exception as exc:
351
- logger.error(f"Error mostrando diagrama de la oraci贸n {i}: {str(exc)}")
352
- continue
353
- except Exception as e:
354
- logger.error(f"Error en display_morphosyntax_results: {str(e)}")
355
-
 
 
356
 
 
1
  # modules/morphosyntax/morphosyntax_interface.py
2
 
3
  import streamlit as st
 
 
 
 
 
 
 
 
4
  import re
5
+ import logging
6
+ from spacy import displacy
7
 
8
+ # Importa tu pipeline de spacy, p. ej.:
9
+ # nlp_models = {"es": spacy.load("es_core_news_sm")}
 
 
 
 
 
 
 
 
 
 
10
 
11
  from ..database.morphosyntax_iterative_mongo_db import (
12
  store_student_morphosyntax_base,
13
  store_student_morphosyntax_iteration,
 
 
 
 
14
  )
15
 
 
16
  logger = logging.getLogger(__name__)
17
 
 
18
  def initialize_arc_analysis_state():
19
  """Inicializa el estado del an谩lisis de arcos y el cach茅 si no existen."""
20
+ if "arc_analysis_state" not in st.session_state:
21
  st.session_state.arc_analysis_state = {
22
+ "base_id": None,
23
+ "original_text": "",
24
+ "iteration_text": "",
25
+ "analysis_count": 0,
 
 
26
  }
 
 
 
 
 
 
 
27
 
28
  def reset_morpho_state():
29
  """Resetea el estado del an谩lisis morfosint谩ctico en sesi贸n."""
30
+ st.session_state.arc_analysis_state = {
31
+ "base_id": None,
32
+ "original_text": "",
33
+ "iteration_text": "",
34
+ "analysis_count": 0,
35
+ }
36
+
37
+ def display_arc_diagram(doc):
38
+ """Muestra un diagrama de arco."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  try:
40
  for sent in doc.sents:
41
+ rendered = displacy.render(
42
+ sent,
43
  style="dep",
44
+ options={"distance": 100, "arrow_spacing": 20, "word_spacing": 30}
 
 
 
 
45
  )
46
+ # Ajustes del SVG
47
+ rendered = rendered.replace('height="375"', 'height="200"')
48
+ rendered = re.sub(
49
  r'<svg[^>]*>',
50
  lambda m: m.group(0).replace('height="450"', 'height="300"'),
51
+ rendered
52
  )
53
+ rendered = re.sub(
54
  r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
55
  lambda m: f'<g transform="translate({m.group(1)},50)"',
56
+ rendered
57
  )
58
+ st.write(f'<div class="arc-diagram-container">{rendered}</div>', unsafe_allow_html=True)
 
 
 
59
 
60
  except Exception as e:
61
  logger.error(f"Error en display_arc_diagram: {str(e)}")
62
 
63
+ def display_morphosyntax_interface(lang_code, nlp_models):
64
+ st.title("An谩lisis Morfosint谩ctico")
65
+
66
+ # 1) Inicializar estados
67
+ initialize_arc_analysis_state()
68
+
69
+ # 2) Tabs
70
+ tabs = st.tabs(["Arco", "Categor铆as", "Morfol贸gico"])
71
+
72
+ # ==================== TAB 0: An谩lisis de Arco =========================
73
+ with tabs[0]:
74
+ st.write("Texto base e iteraciones...")
75
+
76
+ # Bot贸n Nuevo An谩lisis
77
+ if st.button("Nuevo An谩lisis", key="new_analysis"):
78
+ reset_morpho_state()
79
+ st.experimental_rerun()
80
+
81
+ # Texto base
82
+ st.session_state.arc_analysis_state["original_text"] = st.text_area(
83
+ "Texto Original",
84
+ value=st.session_state.arc_analysis_state["original_text"],
85
+ key="base_text_input"
86
+ )
87
+
88
+ # Bot贸n para analizar texto base
89
+ if st.button("Analizar Texto Base", key="analyze_base"):
90
+ # L贸gica de Spacy
91
+ if not st.session_state.arc_analysis_state["original_text"].strip():
92
+ st.warning("Ingrese texto original para analizar.")
93
+ else:
94
+ doc = nlp_models[lang_code](
95
+ st.session_state.arc_analysis_state["original_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  )
97
+ # Realizar an谩lisis (displacy, etc.)
98
+ display_arc_diagram(doc)
99
+
100
+ # Guardar en DB
101
+ base_id = store_student_morphosyntax_base(
102
+ st.session_state.username,
103
+ st.session_state.arc_analysis_state["original_text"],
104
+ arc_diagrams=None # Pon el dict real con arcos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  )
106
+ if base_id:
107
+ st.session_state.arc_analysis_state["base_id"] = base_id
108
+ st.success(f"An谩lisis base guardado. base_id={base_id}")
109
+
110
+ st.markdown("---")
111
+
112
+ # Texto de iteraci贸n
113
+ st.session_state.arc_analysis_state["iteration_text"] = st.text_area(
114
+ "Texto de Iteraci贸n",
115
+ value=st.session_state.arc_analysis_state["iteration_text"],
116
+ key="iteration_text_input"
117
+ )
118
+
119
+ # Bot贸n para analizar iteraci贸n
120
+ if st.button("Analizar Cambios", key="analyze_iteration"):
121
+ if not st.session_state.arc_analysis_state["iteration_text"].strip():
122
+ st.warning("Ingrese texto de iteraci贸n para analizar.")
123
+ elif not st.session_state.arc_analysis_state["base_id"]:
124
+ st.error("No existe un base_id a煤n (analice el texto base primero).")
125
+ else:
126
+ # Realizar el an谩lisis con Spacy
127
+ doc_iter = nlp_models[lang_code](
128
+ st.session_state.arc_analysis_state["iteration_text"]
129
  )
130
+ display_arc_diagram(doc_iter)
131
+
132
+ # Guardar iteraci贸n en DB
133
+ iteration_id = store_student_morphosyntax_iteration(
134
+ st.session_state.username,
135
+ st.session_state.arc_analysis_state["base_id"],
136
+ st.session_state.arc_analysis_state["original_text"],
137
+ st.session_state.arc_analysis_state["iteration_text"],
138
+ arc_diagrams=None # Pon el dict real con arcos
139
  )
140
+ if iteration_id:
141
+ st.success(f"Iteraci贸n guardada. iteration_id={iteration_id}")
142
+
143
+ # ==================== TAB 1: An谩lisis de Categor铆as ====================
144
+ with tabs[1]:
145
+ st.info("En desarrollo...")
146
+
147
+ # ==================== TAB 2: An谩lisis Morfol贸gico ======================
148
+ with tabs[2]:
149
+ st.info("En desarrollo...")
150