|
|
|
import os |
|
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' |
|
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' |
|
|
|
import streamlit as st |
|
import spacy |
|
from spacy import displacy |
|
import re |
|
|
|
|
|
st.set_page_config( |
|
page_title="AIdeaText", |
|
layout="wide", |
|
page_icon="random" |
|
) |
|
|
|
from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax |
|
|
|
@st.cache_resource |
|
def load_spacy_models(): |
|
return { |
|
'es': spacy.load("es_core_news_lg"), |
|
'en': spacy.load("en_core_web_lg"), |
|
'fr': spacy.load("fr_core_news_lg") |
|
} |
|
|
|
|
|
nlp_models = load_spacy_models() |
|
|
|
|
|
languages = { |
|
'Español': 'es', |
|
'English': 'en', |
|
'Français': 'fr' |
|
} |
|
selected_lang = st.sidebar.selectbox("Select Language / Seleccione el idioma / Choisissez la langue", list(languages.keys())) |
|
lang_code = languages[selected_lang] |
|
|
|
|
|
translations = { |
|
'es': { |
|
'title': "AIdeaText - Análisis morfológico y sintáctico", |
|
'input_label': "Ingrese un texto para analizar (máx. 5,000 palabras):", |
|
'input_placeholder': "Escriba o pegue su texto aquí...", |
|
'analyze_button': "Analizar texto", |
|
'repeated_words': "Palabras repetidas", |
|
'legend': "Leyenda: Categorías gramaticales", |
|
'arc_diagram': "Análisis sintáctico: Diagrama de arco", |
|
'network_diagram': "Análisis sintáctico: Diagrama de red", |
|
'sentence': "Oración" |
|
}, |
|
'en': { |
|
'title': "AIdeaText - Morphological and Syntactic Analysis", |
|
'input_label': "Enter a text to analyze (max 5,000 words):", |
|
'input_placeholder': "Type or paste your text here...", |
|
'analyze_button': "Analyze text", |
|
'repeated_words': "Repeated words", |
|
'legend': "Legend: Grammatical categories", |
|
'arc_diagram': "Syntactic analysis: Arc diagram", |
|
'network_diagram': "Syntactic analysis: Network diagram", |
|
'sentence': "Sentence" |
|
}, |
|
'fr': { |
|
'title': "AIdeaText - Analyse morphologique et syntaxique", |
|
'input_label': "Entrez un texte à analyser (max 5 000 mots) :", |
|
'input_placeholder': "Tapez ou collez votre texte ici...", |
|
'analyze_button': "Analyser le texte", |
|
'repeated_words': "Mots répétés", |
|
'legend': "Légende : Catégories grammaticales", |
|
'arc_diagram': "Analyse syntaxique : Diagramme en arc", |
|
'network_diagram': "Analyse syntaxique : Diagramme de réseau", |
|
'sentence': "Phrase" |
|
} |
|
} |
|
|
|
|
|
t = translations[lang_code] |
|
|
|
st.markdown(f"### {t['title']}") |
|
|
|
|
|
sentence_input = st.text_area(t['input_label'], height=150, placeholder=t['input_placeholder']) |
|
|
|
if st.button(t['analyze_button']): |
|
if sentence_input: |
|
doc = nlp_models[lang_code](sentence_input) |
|
|
|
|
|
with st.expander(t['repeated_words'], expanded=True): |
|
word_colors = get_repeated_words_colors(doc) |
|
highlighted_text = highlight_repeated_words(doc, word_colors) |
|
st.markdown(highlighted_text, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(f"##### {t['legend']}") |
|
legend_html = "<div style='display: flex; flex-wrap: wrap;'>" |
|
for pos, color in POS_COLORS.items(): |
|
if pos in POS_TRANSLATIONS: |
|
legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>" |
|
legend_html += "</div>" |
|
st.markdown(legend_html, unsafe_allow_html=True) |
|
|
|
|
|
with st.expander(t['arc_diagram'], expanded=True): |
|
sentences = list(doc.sents) |
|
for i, sent in enumerate(sentences): |
|
st.subheader(f"{t['sentence']} {i+1}") |
|
html = displacy.render(sent, style="dep", options={"distance": 100}) |
|
html = html.replace('height="375"', 'height="200"') |
|
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html) |
|
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html) |
|
st.write(html, unsafe_allow_html=True) |
|
|
|
|
|
with st.expander(t['network_diagram'], expanded=True): |
|
fig = visualize_syntax(sentence_input, nlp_models[lang_code]) |
|
st.pyplot(fig) |