Spaces:

Jacobo
/

syntax

Running

App Files Files Community

syntax / app.py

Jacobo

Update app.py

15ffe15 verified about 1 year ago

raw

history blame

3.58 kB

	from typing import Optional
	import spacy
	from spacy import displacy
	import streamlit as st
	from spacy_streamlit import visualize_parser, visualize_tokens, visualize_ner
	import base64
	from PIL import Image

	st.set_page_config(layout="wide")

	st.image("logo.png", use_column_width=False, width=150)

	st.title("Ancient Greek Syntax and Named Entities")

	st.markdown("Welcome to our analyzer. Here you can parse the parts of speech (POS) and the syntactic relationships of any ancient Greek sentence. This analysis is done by our language models trained with transformers and the NLP library spaCy. Below, you can choose which model do you want to use (each model may produce a different analysis). Documentation about the linguistic terms used by our models to annotate your sentences can be found here. If you have any questions, please contact us at [email protected]")

	st.header("Select a model:")
	spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proiel_sm","grc_perseus_lg","grc_perseus_trf","grc_perseus_sm","grc_ner_trf"])

	st.header("Enter text:")
	text = st.text_area("Greek text","ἐπὶ τοῦτον δὴ τὸν Ἄμασιν Καμβύσης ὁ Κύρου ἐστρατεύετο, ἄγων καί ἄλλους τῶν ἦρχε καὶ Ἑλλήνων Ἴωνάς τε καὶ Αἰολέας.")

	nlp = spacy.load(spacy_model)

	# Add the NER pipeline from grc_ner_trf if the selected model is grc_proiel_trf or grc_perseus_trf
	if spacy_model in ["grc_proiel_trf", "grc_perseus_trf"]:
	ner = spacy.load("grc_ner_trf")
	for pipe_name in ner.pipe_names:
	if pipe_name not in nlp.pipe_names:
	nlp.add_pipe(pipe_name, source=ner)

	doc = nlp(text)

	def get_html(html: str):
	"""Convert HTML so it can be rendered."""
	WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
	html = html.replace("\n", " ")
	return WRAPPER.format(html)

	def get_svg(svg: str, style: str = "", wrap: bool = True):
	"""Convert an SVG to a base64-encoded image."""
	b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
	html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
	return get_html(html) if wrap else html

	def visualize_parser(
	doc: spacy.tokens.Doc,
	*,
	title: Optional[str] = "Dependency parse & part of speech:",
	key: Optional[str] = None,
	) -> None:
	"""Visualizer for dependency parses."""
	if title:
	st.header(title)
	cols = st.columns(4)
	split_sents = cols[0].checkbox(
	"Split sentences", value=True, key=f"{key}_parser_split_sents"
	)
	options = {
	"collapse_punct": cols[1].checkbox(
	"Collapse punct", value=True, key=f"{key}_parser_collapse_punct"
	),
	"compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
	}
	docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
	for sent in docs:
	html = displacy.render(sent, options=options, style="dep")
	html = html.replace("\n\n", "\n")
	if split_sents and len(docs) > 1:
	st.markdown(f"> {sent.text}")
	st.write(get_svg(html), unsafe_allow_html=True)

	visualize_parser(doc)

	visualize_ner(
	doc,
	labels=["PERSON","LOC","NORP","GOD","LANGUAGE"],
	show_table=False,
	title="Persons, locations, groups, gods, and languages",
	)

	visualize_tokens(doc, attrs=["text", "lemma_", "pos_", "dep_","ent_type_"], title="Table view:", key="tokens")