Update app.py
Browse files
app.py
CHANGED
@@ -1,96 +1,118 @@
|
|
1 |
-
|
2 |
# app.py
|
3 |
import os
|
4 |
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
5 |
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
6 |
|
7 |
import streamlit as st
|
|
|
|
|
|
|
8 |
|
9 |
# Configure the page to use the full width
|
10 |
st.set_page_config(
|
11 |
page_title="AIdeaText",
|
12 |
layout="wide",
|
13 |
-
page_icon="random"
|
14 |
-
|
15 |
-
|
16 |
-
import spacy
|
17 |
-
from spacy import displacy
|
18 |
-
import re
|
19 |
-
import pydantic
|
20 |
-
import numpy as np
|
21 |
-
import thinc
|
22 |
-
|
23 |
-
#st.write(f"spaCy version: {spacy.__version__}")
|
24 |
-
#st.write(f"Pydantic version: {pydantic.__version__}")
|
25 |
-
#st.write(f"NumPy version: {np.__version__}")
|
26 |
-
#st.write(f"Thinc version: {thinc.__version__}")
|
27 |
|
28 |
from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax
|
29 |
|
30 |
@st.cache_resource
|
31 |
-
def
|
32 |
-
return
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
#
|
39 |
-
|
40 |
-
|
41 |
-
#
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
st.
|
48 |
-
|
49 |
-
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
if sentence_input:
|
55 |
-
doc =
|
56 |
|
57 |
-
#
|
58 |
-
with st.expander(
|
59 |
-
#st.markdown("#### Palabras repetidas")
|
60 |
-
#st.write("En esta sección, se indican las palabras repetidas por categoría gramatical.")
|
61 |
word_colors = get_repeated_words_colors(doc)
|
62 |
highlighted_text = highlight_repeated_words(doc, word_colors)
|
63 |
st.markdown(highlighted_text, unsafe_allow_html=True)
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
#
|
75 |
-
with st.expander(
|
76 |
-
#st.write("This section displays the syntactic structure of each sentence using arc diagrams.")
|
77 |
-
|
78 |
sentences = list(doc.sents)
|
79 |
for i, sent in enumerate(sentences):
|
80 |
-
st.subheader(f"
|
81 |
html = displacy.render(sent, style="dep", options={"distance": 100})
|
82 |
-
# Reduce the height of the SVG
|
83 |
html = html.replace('height="375"', 'height="200"')
|
84 |
-
# Reduce the top margin of the SVG
|
85 |
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
86 |
-
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
|
87 |
-
lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
88 |
st.write(html, unsafe_allow_html=True)
|
89 |
|
90 |
-
#
|
91 |
-
with st.expander(
|
92 |
-
|
93 |
-
#st.write("Esta sección muestra la estructura sintáctica del texto completo usando un diagrama de red.")
|
94 |
-
|
95 |
-
fig = visualize_syntax(sentence_input)
|
96 |
st.pyplot(fig)
|
|
|
|
|
1 |
# app.py
|
2 |
import os
|
3 |
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
4 |
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
5 |
|
6 |
import streamlit as st
|
7 |
+
import spacy
|
8 |
+
from spacy import displacy
|
9 |
+
import re
|
10 |
|
11 |
# Configure the page to use the full width
|
12 |
st.set_page_config(
|
13 |
page_title="AIdeaText",
|
14 |
layout="wide",
|
15 |
+
page_icon="random"
|
16 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax
|
19 |
|
20 |
@st.cache_resource
|
21 |
+
def load_spacy_models():
|
22 |
+
return {
|
23 |
+
'es': spacy.load("es_core_news_lg"),
|
24 |
+
'en': spacy.load("en_core_web_lg"),
|
25 |
+
'fr': spacy.load("fr_core_news_lg")
|
26 |
+
}
|
27 |
+
|
28 |
+
# Load spaCy models
|
29 |
+
nlp_models = load_spacy_models()
|
30 |
+
|
31 |
+
# Language selection
|
32 |
+
languages = {
|
33 |
+
'Español': 'es',
|
34 |
+
'English': 'en',
|
35 |
+
'Français': 'fr'
|
36 |
+
}
|
37 |
+
selected_lang = st.sidebar.selectbox("Select Language / Seleccione el idioma / Choisissez la langue", list(languages.keys()))
|
38 |
+
lang_code = languages[selected_lang]
|
39 |
+
|
40 |
+
# Translations
|
41 |
+
translations = {
|
42 |
+
'es': {
|
43 |
+
'title': "AIdeaText - Análisis morfológico y sintáctico",
|
44 |
+
'input_label': "Ingrese un texto para analizar (máx. 5,000 palabras):",
|
45 |
+
'input_placeholder': "Escriba o pegue su texto aquí...",
|
46 |
+
'analyze_button': "Analizar texto",
|
47 |
+
'repeated_words': "Palabras repetidas",
|
48 |
+
'legend': "Leyenda: Categorías gramaticales",
|
49 |
+
'arc_diagram': "Análisis sintáctico: Diagrama de arco",
|
50 |
+
'network_diagram': "Análisis sintáctico: Diagrama de red",
|
51 |
+
'sentence': "Oración"
|
52 |
+
},
|
53 |
+
'en': {
|
54 |
+
'title': "AIdeaText - Morphological and Syntactic Analysis",
|
55 |
+
'input_label': "Enter a text to analyze (max 5,000 words):",
|
56 |
+
'input_placeholder': "Type or paste your text here...",
|
57 |
+
'analyze_button': "Analyze text",
|
58 |
+
'repeated_words': "Repeated words",
|
59 |
+
'legend': "Legend: Grammatical categories",
|
60 |
+
'arc_diagram': "Syntactic analysis: Arc diagram",
|
61 |
+
'network_diagram': "Syntactic analysis: Network diagram",
|
62 |
+
'sentence': "Sentence"
|
63 |
+
},
|
64 |
+
'fr': {
|
65 |
+
'title': "AIdeaText - Analyse morphologique et syntaxique",
|
66 |
+
'input_label': "Entrez un texte à analyser (max 5 000 mots) :",
|
67 |
+
'input_placeholder': "Tapez ou collez votre texte ici...",
|
68 |
+
'analyze_button': "Analyser le texte",
|
69 |
+
'repeated_words': "Mots répétés",
|
70 |
+
'legend': "Légende : Catégories grammaticales",
|
71 |
+
'arc_diagram': "Analyse syntaxique : Diagramme en arc",
|
72 |
+
'network_diagram': "Analyse syntaxique : Diagramme de réseau",
|
73 |
+
'sentence': "Phrase"
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
# Use translations
|
78 |
+
t = translations[lang_code]
|
79 |
+
|
80 |
+
st.markdown(f"### {t['title']}")
|
81 |
+
|
82 |
+
# Text Input with instructions
|
83 |
+
sentence_input = st.text_area(t['input_label'], height=150, placeholder=t['input_placeholder'])
|
84 |
+
|
85 |
+
if st.button(t['analyze_button']):
|
86 |
if sentence_input:
|
87 |
+
doc = nlp_models[lang_code](sentence_input)
|
88 |
|
89 |
+
# Highlighted Repeated Words
|
90 |
+
with st.expander(t['repeated_words'], expanded=True):
|
|
|
|
|
91 |
word_colors = get_repeated_words_colors(doc)
|
92 |
highlighted_text = highlight_repeated_words(doc, word_colors)
|
93 |
st.markdown(highlighted_text, unsafe_allow_html=True)
|
94 |
|
95 |
+
# Legend for grammatical categories
|
96 |
+
st.markdown(f"##### {t['legend']}")
|
97 |
+
legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
|
98 |
+
for pos, color in POS_COLORS.items():
|
99 |
+
if pos in POS_TRANSLATIONS:
|
100 |
+
legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
|
101 |
+
legend_html += "</div>"
|
102 |
+
st.markdown(legend_html, unsafe_allow_html=True)
|
103 |
+
|
104 |
+
# Arc Diagram
|
105 |
+
with st.expander(t['arc_diagram'], expanded=True):
|
|
|
|
|
106 |
sentences = list(doc.sents)
|
107 |
for i, sent in enumerate(sentences):
|
108 |
+
st.subheader(f"{t['sentence']} {i+1}")
|
109 |
html = displacy.render(sent, style="dep", options={"distance": 100})
|
|
|
110 |
html = html.replace('height="375"', 'height="200"')
|
|
|
111 |
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
112 |
+
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
|
|
113 |
st.write(html, unsafe_allow_html=True)
|
114 |
|
115 |
+
# Network graph
|
116 |
+
with st.expander(t['network_diagram'], expanded=True):
|
117 |
+
fig = visualize_syntax(sentence_input, nlp_models[lang_code])
|
|
|
|
|
|
|
118 |
st.pyplot(fig)
|