v3 / modules /morphosyntax /morphosyntax_interface.py
AIdeaText's picture
Update modules/morphosyntax/morphosyntax_interface.py
b6ee9f7 verified
raw
history blame
10.6 kB
import streamlit as st
from streamlit_float import *
from streamlit_antd_components import *
from streamlit.components.v1 import html
import spacy
from spacy import displacy
import spacy_streamlit
import pandas as pd
import base64
import re
from .morphosyntax_process import (
process_morphosyntactic_input,
format_analysis_results,
perform_advanced_morphosyntactic_analysis,
get_repeated_words_colors,
highlight_repeated_words,
POS_COLORS,
POS_TRANSLATIONS
)
from ..utils.widget_utils import generate_unique_key
from ..database.morphosintax_mongo_db import store_student_morphosyntax_result
from ..database.chat_mongo_db import store_chat_history, get_chat_history
import logging
logger = logging.getLogger(__name__)
def display_morphosyntax_interface(lang_code, nlp_models, morpho_t):
try:
# CSS para mejorar la estabilidad y prevenir saltos
st.markdown("""
<style>
.stTextArea textarea {
font-size: 1rem;
line-height: 1.5;
resize: vertical;
}
.block-container {
padding-top: 1rem;
padding-bottom: 1rem;
}
.stExpander {
border: none;
box-shadow: 0 1px 2px rgba(0,0,0,0.1);
margin-bottom: 1rem;
}
.legend-container {
position: sticky;
top: 0;
background: white;
z-index: 100;
padding: 0.5rem 0;
border-bottom: 1px solid #eee;
}
</style>
""", unsafe_allow_html=True)
# 1. Inicializar el estado
if 'morphosyntax_state' not in st.session_state:
st.session_state.morphosyntax_state = {
'input_text': "",
'analysis_count': 0,
'last_analysis': None,
'current_tab': 0
}
# 2. Contenedor principal con diseño sticky
with st.container():
# Campo de entrada de texto
input_key = f"morpho_input_{st.session_state.morphosyntax_state['analysis_count']}"
sentence_input = st.text_area(
morpho_t.get('morpho_input_label', 'Enter text to analyze'),
height=150,
placeholder=morpho_t.get('morpho_input_placeholder', 'Enter your text here...'),
key=input_key,
on_change=lambda: None # Previene recargas innecesarias
)
# 3. Botón de análisis centrado
col1, col2, col3 = st.columns([2,1,2])
with col1:
analyze_button = st.button(
morpho_t.get('morpho_analyze_button', 'Analyze Morphosyntax'),
key=f"morpho_button_{st.session_state.morphosyntax_state['analysis_count']}",
type="primary",
icon="🔍",
disabled=not bool(sentence_input.strip()),
use_container_width=True
)
# 4. Procesar análisis
if analyze_button and sentence_input.strip():
try:
with st.spinner(morpho_t.get('processing', 'Processing...')):
doc = nlp_models[lang_code](sentence_input)
advanced_analysis = perform_advanced_morphosyntactic_analysis(
sentence_input,
nlp_models[lang_code]
)
st.session_state.morphosyntax_result = {
'doc': doc,
'advanced_analysis': advanced_analysis
}
st.session_state.morphosyntax_state['analysis_count'] += 1
# Guardar resultado
if store_student_morphosyntax_result(
username=st.session_state.username,
text=sentence_input,
arc_diagrams=advanced_analysis['arc_diagrams']
):
st.success(morpho_t.get('success_message', 'Analysis saved successfully'))
st.session_state.morphosyntax_state['current_tab'] = 0
display_morphosyntax_results(
st.session_state.morphosyntax_result,
lang_code,
morpho_t
)
else:
st.error(morpho_t.get('error_message', 'Error saving analysis'))
except Exception as e:
logger.error(f"Error en análisis morfosintáctico: {str(e)}")
st.error(morpho_t.get('error_processing', f'Error processing text: {str(e)}'))
# 5. Mostrar resultados previos
elif 'morphosyntax_result' in st.session_state and st.session_state.morphosyntax_result:
display_morphosyntax_results(
st.session_state.morphosyntax_result,
lang_code,
morpho_t
)
elif not sentence_input.strip():
st.info(morpho_t.get('morpho_initial_message', 'Enter text to begin analysis'))
except Exception as e:
logger.error(f"Error general en display_morphosyntax_interface: {str(e)}")
st.error("Se produjo un error. Por favor, intente de nuevo.")
def display_morphosyntax_results(result, lang_code, morpho_t):
if result is None:
st.warning(morpho_t.get('no_results', 'No results available'))
return
doc = result['doc']
advanced_analysis = result['advanced_analysis']
# Leyenda fija en la parte superior
with st.container():
st.markdown(f"##### {morpho_t.get('legend', 'Legend: Grammatical categories')}")
legend_html = "<div class='legend-container'><div style='display: flex; flex-wrap: wrap;'>"
for pos, color in POS_COLORS.items():
if pos in POS_TRANSLATIONS[lang_code]:
legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[lang_code][pos]}</span></div>"
legend_html += "</div></div>"
st.markdown(legend_html, unsafe_allow_html=True)
# Palabras repetidas
with st.expander(morpho_t.get('repeated_words', 'Repeated words'), expanded=True):
word_colors = get_repeated_words_colors(doc)
highlighted_text = highlight_repeated_words(doc, word_colors)
st.markdown(highlighted_text, unsafe_allow_html=True)
# Análisis sintáctico (diagramas de arco)
with st.expander(morpho_t.get('arc_diagram', 'Syntactic analysis: Arc diagram'), expanded=True):
sentences = list(doc.sents)
for i, sent in enumerate(sentences):
st.subheader(f"{morpho_t.get('sentence', 'Sentence')} {i+1}")
html = displacy.render(sent, style="dep", options={"distance": 100})
html = html.replace('height="375"', 'height="200"')
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
lambda m: f'<g transform="translate({m.group(1)},50)"', html)
st.write(html, unsafe_allow_html=True)
# Estructura de oraciones
with st.expander(morpho_t.get('sentence_structure', 'Sentence structure'), expanded=True):
for i, sent_analysis in enumerate(advanced_analysis['sentence_structure']):
sentence_str = (
f"**{morpho_t.get('sentence', 'Sentence')} {i+1}** "
f"{morpho_t.get('root', 'Root')}: {sent_analysis['root']} ({sent_analysis['root_pos']}) -- "
f"{morpho_t.get('subjects', 'Subjects')}: {', '.join(sent_analysis['subjects'])} -- "
f"{morpho_t.get('objects', 'Objects')}: {', '.join(sent_analysis['objects'])} -- "
f"{morpho_t.get('verbs', 'Verbs')}: {', '.join(sent_analysis['verbs'])}"
)
st.markdown(sentence_str)
# Análisis de categorías gramaticales
with st.expander(morpho_t.get('pos_analysis', 'Part of speech'), expanded=True):
pos_df = pd.DataFrame(advanced_analysis['pos_analysis'])
pos_df['pos'] = pos_df['pos'].map(lambda x: POS_TRANSLATIONS[lang_code].get(x, x))
pos_df = pos_df.rename(columns={
'pos': morpho_t.get('grammatical_category', 'Grammatical category'),
'count': morpho_t.get('count', 'Count'),
'percentage': morpho_t.get('percentage', 'Percentage'),
'examples': morpho_t.get('examples', 'Examples')
})
st.dataframe(pos_df, use_container_width=True)
# Análisis morfológico
with st.expander(morpho_t.get('morphological_analysis', 'Morphological Analysis'), expanded=True):
morph_df = pd.DataFrame(advanced_analysis['morphological_analysis'])
column_mapping = {
'text': morpho_t.get('word', 'Word'),
'lemma': morpho_t.get('lemma', 'Lemma'),
'pos': morpho_t.get('grammatical_category', 'Grammatical category'),
'dep': morpho_t.get('dependency', 'Dependency'),
'morph': morpho_t.get('morphology', 'Morphology')
}
morph_df = morph_df.rename(columns=column_mapping)
# Traducir categorías gramaticales
grammatical_category = morpho_t.get('grammatical_category', 'Grammatical category')
morph_df[grammatical_category] = morph_df[grammatical_category].map(
lambda x: POS_TRANSLATIONS[lang_code].get(x, x)
)
# Aplicar traducciones de dependencias y morfología
dependency = morpho_t.get('dependency', 'Dependency')
morphology = morpho_t.get('morphology', 'Morphology')
def translate_morph(morph_string, lang_code):
for key, value in morph_translations[lang_code].items():
morph_string = morph_string.replace(key, value)
return morph_string
morph_df[dependency] = morph_df[dependency].map(
lambda x: dep_translations[lang_code].get(x, x)
)
morph_df[morphology] = morph_df[morphology].apply(
lambda x: translate_morph(x, lang_code)
)
st.dataframe(morph_df, use_container_width=True)