Spaces:

Jacobo
/

syntax

Running

App Files Files Community

Jacobo commited on Jun 15, 2024

Commit

4869be9

verified ·

1 Parent(s): c7cb00a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -28

app.py CHANGED Viewed

@@ -1,17 +1,10 @@
-from typing import Optional
 import spacy
 from spacy import displacy
-from spacy.language import Language
 import streamlit as st
-from spacy_streamlit import visualize_parser
-from spacy_streamlit import visualize_tokens
-from spacy_streamlit import visualize_ner
 import base64
 from PIL import Image
-#import pandas
 st.set_page_config(layout="wide")
@@ -19,7 +12,7 @@ st.image("logo.png", use_column_width=False, width=150)
 st.title("Ancient Greek Syntax and Named Entities")
-st.markdown("Welcome to our analyzer. Here you can parse the parts of speech (POS) and the syntactic relationships of any ancient Greek sentence. This analysis is done by our language models trained with transformers and the NLP library spaCy.  Below, you can choose which model do you want to use (each model may produce a different analysis).  Documentation about the linguistic terms used by our models to annotate your sentences can be found here.  If you have any questions, please contact us at [email protected]")
 st.header("Select a model:")
 spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proiel_sm","grc_perseus_lg","grc_perseus_trf","grc_perseus_sm","grc_ner_trf"])
@@ -27,21 +20,19 @@ spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proie
 st.header("Enter text:")
 text = st.text_area("Greek text","ἐπὶ τοῦτον δὴ τὸν Ἄμασιν Καμβύσης ὁ Κύρου ἐστρατεύετο, ἄγων καί ἄλλους τῶν ἦρχε καὶ Ἑλλήνων Ἴωνάς τε καὶ Αἰολέας.")
-#config = {"punct_chars": [".", ";", "·"]}
 nlp = spacy.load(spacy_model)
-#nlp.add_pipe("sentencizer", config=config, before="parser")
-# Get the pipeline order
 doc = nlp(text)
 def get_html(html: str):
     """Convert HTML so it can be rendered."""
     WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
-    # Newlines seem to mess with the rendering
     html = html.replace("\n", " ")
     return WRAPPER.format(html)
@@ -66,23 +57,18 @@ def visualize_parser(
     )
     options = {
         "collapse_punct": cols[1].checkbox(
-            "Collapse punct", value=True, key=f"{key}_parser_collapse_punct"
         ),
         "compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
     }
     docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
     for sent in docs:
         html = displacy.render(sent, options=options, style="dep")
-        # Double newlines seem to mess with the rendering
         html = html.replace("\n\n", "\n")
         if split_sents and len(docs) > 1:
             st.markdown(f"> {sent.text}")
         st.write(get_svg(html), unsafe_allow_html=True)
-#displacy.render(doc, style="ent")
 visualize_parser(doc)
 visualize_ner(
@@ -92,9 +78,4 @@ visualize_ner(
     title="Persons, locations, groups, gods, and languages",
 )
-#pd.set_option('display.max_colwidth', None)
 visualize_tokens(doc, attrs=["text", "lemma_", "pos_", "dep_","ent_type_"], title="Table view:", key="tokens")

+from typing import Optional
 import spacy
 from spacy import displacy
 import streamlit as st
+from spacy_streamlit import visualize_parser, visualize_tokens, visualize_ner
 import base64
 from PIL import Image
 st.set_page_config(layout="wide")
 st.title("Ancient Greek Syntax and Named Entities")
+st.markdown("Welcome to our analyzer. Here you can parse the parts of speech (POS) and the syntactic relationships of any ancient Greek sentence. This analysis is done by our language models trained with transformers and the NLP library spaCy. Below, you can choose which model do you want to use (each model may produce a different analysis). Documentation about the linguistic terms used by our models to annotate your sentences can be found here. If you have any questions, please contact us at [email protected]")
 st.header("Select a model:")
 spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proiel_sm","grc_perseus_lg","grc_perseus_trf","grc_perseus_sm","grc_ner_trf"])
 st.header("Enter text:")
 text = st.text_area("Greek text","ἐπὶ τοῦτον δὴ τὸν Ἄμασιν Καμβύσης ὁ Κύρου ἐστρατεύετο, ἄγων καί ἄλλους τῶν ἦρχε καὶ Ἑλλήνων Ἴωνάς τε καὶ Αἰολέας.")
 nlp = spacy.load(spacy_model)
+# Add the NER pipeline from grc_ner_trf if the selected model is grc_proiel_trf or grc_perseus_trf
+if spacy_model in ["grc_proiel_trf", "grc_perseus_trf"]:
+    ner = spacy.load("grc_ner_trf")
+    for pipe_name, pipe in ner.pipeline:
+        nlp.add_pipe(pipe, name=pipe_name, source=ner)
 doc = nlp(text)
 def get_html(html: str):
     """Convert HTML so it can be rendered."""
     WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
     html = html.replace("\n", " ")
     return WRAPPER.format(html)
     )
     options = {
         "collapse_punct": cols[1].checkbox(
+            "Collapse punct", value=True, key=f"{key}_parser_collapse_punct"
         ),
         "compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
     }
     docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
     for sent in docs:
         html = displacy.render(sent, options=options, style="dep")
         html = html.replace("\n\n", "\n")
         if split_sents and len(docs) > 1:
             st.markdown(f"> {sent.text}")
         st.write(get_svg(html), unsafe_allow_html=True)
 visualize_parser(doc)
 visualize_ner(
     title="Persons, locations, groups, gods, and languages",
 )
 visualize_tokens(doc, attrs=["text", "lemma_", "pos_", "dep_","ent_type_"], title="Table view:", key="tokens")