# app.py import os os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' import streamlit as st import spacy from spacy import displacy import re import pydantic import numpy as np import thinc st.write(f"spaCy version: {spacy.__version__}") st.write(f"Pydantic version: {pydantic.__version__}") st.write(f"NumPy version: {np.__version__}") st.write(f"Thinc version: {thinc.__version__}") from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax #@st.cache_resource #def load_spacy_model(): # return spacy.load("es_core_news_lg") @st.cache_resource def load_spacy_model(): try: nlp = spacy.load("es_core_news_lg") except IOError: st.info("Downloading spaCy model...") spacy.cli.download("es_core_news_lg") nlp = spacy.load("es_core_news_lg") return nlp # Load spaCy model nlp = spacy.load("models/spacy/es_core_news_lg") # Configure the page to use the full width st.set_page_config(layout="wide", page_title="AIdeaText") st.markdown("### AIdeaText - Advanced Text Analysis Tool") # First horizontal band: Text Input sentence_input = st.text_area("Ingresa un texto para analizar (max 5,000 words):", height=150) if st.button("Analizar texto"): if sentence_input: doc = nlp(sentence_input) # Second horizontal band: Highlighted Repeated Words with st.expander("Palabras repetidas", expanded=True): #st.markdown("#### Palabras repetidas") #st.write("En esta sección, se indican las palabras repetidas por categoría gramatical.") word_colors = get_repeated_words_colors(doc) highlighted_text = highlight_repeated_words(doc, word_colors) st.markdown(highlighted_text, unsafe_allow_html=True) # Legend for grammatical categories st.markdown("##### Legenda: Categorías gramaticales") legend_html = "

" for pos, color in POS_COLORS.items(): if pos in POS_TRANSLATIONS: legend_html += f"

{POS_TRANSLATIONS[pos]}

" legend_html += "

" st.markdown(legend_html, unsafe_allow_html=True) # Third horizontal band: Arc Diagram with st.expander("Análisis sintáctico: Diagrama de arco", expanded=True): #st.write("This section displays the syntactic structure of each sentence using arc diagrams.") sentences = list(doc.sents) for i, sent in enumerate(sentences): st.subheader(f"Sentence {i+1}") html = displacy.render(sent, style="dep", options={"distance": 100}) # Reduce the height of the SVG html = html.replace('height="375"', 'height="200"') # Reduce the top margin of the SVG html = re.sub(r']*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html) html = re.sub(r']*transform="translate\((\d+),(\d+)\)"', lambda m: f'