File size: 4,596 Bytes
5b5c9f7
 
 
 
 
 
e0f08c6
 
 
ba1dfbc
 
 
 
 
e0f08c6
 
5b5c9f7
 
 
a43be03
e0f08c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b5c9f7
e0f08c6
5b5c9f7
e0f08c6
 
5b5c9f7
 
 
 
e0f08c6
 
 
 
 
 
 
 
 
 
 
5b5c9f7
 
e0f08c6
5b5c9f7
 
 
e0f08c6
5b5c9f7
 
e0f08c6
 
 
5b5c9f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# app.py
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

import streamlit as st
import spacy
from spacy import displacy
import re

# Configure the page to use the full width
st.set_page_config(
    page_title="AIdeaText",
    layout="wide",
    page_icon="random"
)

from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax

@st.cache_resource
def load_spacy_models():
    return {
        'es': spacy.load("es_core_news_lg"),
        'en': spacy.load("en_core_web_lg"),
        'fr': spacy.load("fr_core_news_lg")
    }

# Load spaCy models
nlp_models = load_spacy_models()

# Language selection
languages = {
    'Español': 'es',
    'English': 'en',
    'Français': 'fr'
}
selected_lang = st.sidebar.selectbox("Select Language / Seleccione el idioma / Choisissez la langue", list(languages.keys()))
lang_code = languages[selected_lang]

# Translations
translations = {
    'es': {
        'title': "AIdeaText - Análisis morfológico y sintáctico",
        'input_label': "Ingrese un texto para analizar (máx. 5,000 palabras):",
        'input_placeholder': "Escriba o pegue su texto aquí...",
        'analyze_button': "Analizar texto",
        'repeated_words': "Palabras repetidas",
        'legend': "Leyenda: Categorías gramaticales",
        'arc_diagram': "Análisis sintáctico: Diagrama de arco",
        'network_diagram': "Análisis sintáctico: Diagrama de red",
        'sentence': "Oración"
    },
    'en': {
        'title': "AIdeaText - Morphological and Syntactic Analysis",
        'input_label': "Enter a text to analyze (max 5,000 words):",
        'input_placeholder': "Type or paste your text here...",
        'analyze_button': "Analyze text",
        'repeated_words': "Repeated words",
        'legend': "Legend: Grammatical categories",
        'arc_diagram': "Syntactic analysis: Arc diagram",
        'network_diagram': "Syntactic analysis: Network diagram",
        'sentence': "Sentence"
    },
    'fr': {
        'title': "AIdeaText - Analyse morphologique et syntaxique",
        'input_label': "Entrez un texte à analyser (max 5 000 mots) :",
        'input_placeholder': "Tapez ou collez votre texte ici...",
        'analyze_button': "Analyser le texte",
        'repeated_words': "Mots répétés",
        'legend': "Légende : Catégories grammaticales",
        'arc_diagram': "Analyse syntaxique : Diagramme en arc",
        'network_diagram': "Analyse syntaxique : Diagramme de réseau",
        'sentence': "Phrase"
    }
}

# Use translations
t = translations[lang_code]

st.markdown(f"### {t['title']}")

# Text Input with instructions
sentence_input = st.text_area(t['input_label'], height=150, placeholder=t['input_placeholder'])

if st.button(t['analyze_button']):
    if sentence_input:
        doc = nlp_models[lang_code](sentence_input)

        # Highlighted Repeated Words
        with st.expander(t['repeated_words'], expanded=True):
            word_colors = get_repeated_words_colors(doc)
            highlighted_text = highlight_repeated_words(doc, word_colors)
            st.markdown(highlighted_text, unsafe_allow_html=True)

        # Legend for grammatical categories
        st.markdown(f"##### {t['legend']}")
        legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
        for pos, color in POS_COLORS.items():
            if pos in POS_TRANSLATIONS:
                legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
        legend_html += "</div>"
        st.markdown(legend_html, unsafe_allow_html=True)

        # Arc Diagram
        with st.expander(t['arc_diagram'], expanded=True):
            sentences = list(doc.sents)
            for i, sent in enumerate(sentences):
                st.subheader(f"{t['sentence']} {i+1}")
                html = displacy.render(sent, style="dep", options={"distance": 100})
                html = html.replace('height="375"', 'height="200"')
                html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
                html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
                st.write(html, unsafe_allow_html=True)

        # Network graph
        with st.expander(t['network_diagram'], expanded=True):
            fig = visualize_syntax(sentence_input, nlp_models[lang_code])
            st.pyplot(fig)