v3 / modules /morphosyntax /morphosyntax_interface.py
AIdeaText's picture
Update modules/morphosyntax/morphosyntax_interface.py
b58af47 verified
raw
history blame
12.1 kB
# modules/morphosyntax/morphosyntax_interface.py
import streamlit as st
from streamlit_float import *
from streamlit_antd_components import *
from streamlit.components.v1 import html
import spacy
from spacy import displacy
import spacy_streamlit
import pandas as pd
import base64
import re
# Importaciones locales
from .morphosyntax_process import (
process_morphosyntactic_input,
format_analysis_results,
perform_advanced_morphosyntactic_analysis,
get_repeated_words_colors,
highlight_repeated_words,
POS_COLORS,
POS_TRANSLATIONS
)
from ..utils.widget_utils import generate_unique_key
from ..database.morphosyntax_iterative_mongo_db import (
store_student_morphosyntax_base,
store_student_morphosyntax_iteration,
get_student_morphosyntax_analysis,
update_student_morphosyntax_analysis,
delete_student_morphosyntax_analysis,
get_student_morphosyntax_data
)
import logging
logger = logging.getLogger(__name__)
###########################################################################
def initialize_arc_analysis_state():
"""Inicializa el estado del an谩lisis de arcos y el cach茅 si no existen"""
if 'arc_analysis_state' not in st.session_state:
st.session_state.arc_analysis_state = {
'base_id': None,
'original_text': '',
'iteration_text': '',
'analysis_count': 0
}
logger.info("Estado de an谩lisis de arcos inicializado")
# Inicializar cach茅 de an谩lisis
if 'analysis_cache' not in st.session_state:
st.session_state.analysis_cache = {}
logger.info("Cach茅 de an谩lisis inicializado")
###########################################################################
def reset_morpho_state():
"""Resetea el estado del an谩lisis morfosint谩ctico"""
if 'arc_analysis_state' in st.session_state:
st.session_state.arc_analysis_state = {
'base_id': None,
'original_text': '',
'iteration_text': '',
'analysis_count': 0
}
###########################################################################
def display_original_analysis(container, analysis, lang_code, morpho_t):
"""Muestra el an谩lisis original en el contenedor especificado"""
with container:
st.subheader("An谩lisis Original")
display_morphosyntax_results(analysis, lang_code, morpho_t)
def display_iteration_analysis(container, analysis, lang_code, morpho_t):
"""Muestra el an谩lisis de cambios en el contenedor especificado"""
with container:
st.subheader("An谩lisis de Cambios")
display_morphosyntax_results(analysis, lang_code, morpho_t)
def display_arc_diagram(doc, analysis):
"""Muestra un diagrama de arco sin t铆tulo"""
try:
for sent in doc.sents:
rendered = displacy.render(sent, style="dep", options={
"distance": 100,
"arrow_spacing": 20,
"word_spacing": 30
})
# Ajustar tama帽o y posici贸n
rendered = rendered.replace('height="375"', 'height="200"')
rendered = re.sub(
r'<svg[^>]*>',
lambda m: m.group(0).replace('height="450"', 'height="300"'),
rendered
)
rendered = re.sub(
r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
lambda m: f'<g transform="translate({m.group(1)},50)"',
rendered
)
# Envolver en contenedor con estilo
rendered = f'<div class="arc-diagram-container">{rendered}</div>'
st.write(rendered, unsafe_allow_html=True)
except Exception as e:
logger.error(f"Error en display_arc_diagram: {str(e)}")
###########################################################################
def cache_analysis_results(key, result):
"""Almacena resultados de an谩lisis en cach茅"""
if 'analysis_cache' not in st.session_state:
st.session_state.analysis_cache = {}
st.session_state.analysis_cache[key] = result
logger.info(f"Resultado almacenado en cach茅 con clave: {key}")
def get_cached_analysis(key):
"""Recupera resultados de an谩lisis del cach茅"""
if 'analysis_cache' not in st.session_state:
return None
return st.session_state.analysis_cache.get(key)
###########################################################################
def display_morphosyntax_interface(lang_code, nlp_models, morpho_t):
try:
# CSS para layout estable
st.markdown("""
<style>
.stTextArea textarea {
font-size: 1rem;
line-height: 1.5;
min-height: 100px !important;
height: 100px !important;
}
.arc-diagram-container {
width: 100%;
padding: 0.5rem;
margin: 0.5rem 0;
}
.divider {
height: 3px;
border: none;
background-color: #333;
margin: 2rem 0;
}
</style>
""", unsafe_allow_html=True)
# Inicializar estados si no existen
initialize_arc_analysis_state()
# -- Tabs: definimos 3 pesta帽as
subtabs = st.tabs([
"An谩lisis de Diagramas de Arco",
"An谩lisis de Categor铆as",
"An谩lisis Morfol贸gico"
])
# =========== Pesta帽a 1: Diagrama de Arco ===============
with subtabs[0]:
col1, col2, col3 = st.columns([2,1,2])
with col1:
if st.button("Nuevo An谩lisis", type="secondary", use_container_width=True):
reset_morpho_state()
# Contenedor de todo el proceso
with st.container():
# Input texto original
text_input = st.text_area(
"Ingrese su texto",
value=st.session_state.arc_analysis_state.get('original_text', ''),
key=f"original_text_{st.session_state.arc_analysis_state['analysis_count']}",
height=100
)
analyze_button = st.button(
"Analizar Texto",
type="primary",
use_container_width=True
)
# Procesar texto original
if analyze_button and text_input.strip():
try:
doc = nlp_models[lang_code](text_input)
analysis = perform_advanced_morphosyntactic_analysis(
text_input,
nlp_models[lang_code]
)
# Guardar en Mongo
base_id = store_student_morphosyntax_base(
st.session_state.username,
text_input,
analysis['arc_diagrams']
)
if base_id:
# Actualizar estado
st.session_state.arc_analysis_state['base_id'] = base_id
st.session_state.arc_analysis_state['original_text'] = text_input
st.session_state.arc_analysis_state['analysis_count'] += 1
display_arc_diagram(doc, analysis)
st.markdown('<hr class="divider">', unsafe_allow_html=True)
# Caja de texto para la iteraci贸n
st.subheader("Iteraci贸n / Cambios:")
iteration_text = st.text_area(
"Ingrese cambios o versi贸n modificada",
value=st.session_state.arc_analysis_state.get('iteration_text', ''),
key="iteration_text",
height=100
)
# Bot贸n para analizar iteraci贸n
iteration_button = st.button(
"Analizar Cambios",
type="primary",
use_container_width=True
)
if iteration_button and iteration_text.strip():
try:
doc_iter = nlp_models[lang_code](iteration_text)
analysis_iter = perform_advanced_morphosyntactic_analysis(
iteration_text,
nlp_models[lang_code]
)
iteration_id = store_student_morphosyntax_iteration(
st.session_state.username,
base_id,
text_input,
iteration_text,
analysis_iter['arc_diagrams']
)
if iteration_id:
# Guardamos el texto de iteraci贸n en session_state
st.session_state.arc_analysis_state['iteration_text'] = iteration_text
display_arc_diagram(doc_iter, analysis_iter)
except Exception as e:
st.error("Error procesando iteraci贸n")
logger.error(f"Error en iteraci贸n: {str(e)}")
except Exception as e:
st.error("Error procesando an谩lisis base")
logger.error(f"Error: {str(e)}")
# =========== Pesta帽a 2: Categor铆as (placeholder) ============
with subtabs[1]:
st.info("An谩lisis de Categor铆as en desarrollo...")
# =========== Pesta帽a 3: An谩lisis Morfol贸gico (placeholder) ============
with subtabs[2]:
st.info("An谩lisis Morfol贸gico en desarrollo...")
except Exception as e:
st.error("Error en la interfaz")
logger.error(f"Error general en la interfaz: {str(e)}")
###########################################################################
def display_morphosyntax_results(result, lang_code, morpho_t):
"""
Muestra solo el diagrama de arco.
Args:
result: Diccionario con el documento procesado y su an谩lisis
lang_code: C贸digo del idioma
morpho_t: Diccionario de traducciones
"""
if result is None:
return
try:
doc = result['doc']
sentences = list(doc.sents)
for i, sent in enumerate(sentences):
try:
st.subheader(f"{morpho_t.get('sentence', 'Sentence')} {i+1}")
rendered = displacy.render(sent, style="dep", options={
"distance": 100,
"arrow_spacing": 20,
"word_spacing": 30
})
rendered = rendered.replace('height="375"', 'height="200"')
rendered = re.sub(
r'<svg[^>]*>',
lambda m: m.group(0).replace('height="450"', 'height="300"'),
rendered
)
rendered = re.sub(
r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
lambda m: f'<g transform="translate({m.group(1)},50)"',
rendered
)
rendered = f'<div class="arc-diagram-container">{rendered}</div>'
st.write(rendered, unsafe_allow_html=True)
except Exception as e:
logger.error(f"Error en diagrama {i}: {str(e)}")
continue
except Exception as e:
logger.error(f"Error en display_morphosyntax_results: {str(e)}")