|
import streamlit as st |
|
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer |
|
from cltk.data.fetch import FetchCorpus |
|
import builtins |
|
import stanza |
|
|
|
|
|
_original_input = builtins.input |
|
|
|
def _always_yes(prompt=""): |
|
print(prompt, "Y") |
|
return "Y" |
|
|
|
builtins.input = _always_yes |
|
|
|
stanza.download("la") |
|
corpus_downloader = FetchCorpus(language="lat") |
|
corpus_downloader.import_corpus("lat_models_cltk") |
|
|
|
|
|
builtins.input = _original_input |
|
|
|
try: |
|
from cltk import NLP |
|
nlp_lat = NLP(language="lat") |
|
except ImportError: |
|
nlp_lat = None |
|
|
|
|
|
if "input_text_value" not in st.session_state: |
|
st.session_state["input_text_value"] = "Lorem ipsum dolor sit amet, [MASK] adipiscing elit." |
|
|
|
|
|
examples = [ |
|
"Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens ...", |
|
"hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt ...", |
|
"Lorem ipsum dolor sit amet, [MASK] adipiscing elit.", |
|
"Populus Romanus cum Macedonibus [MASK] ter gessit" |
|
] |
|
|
|
st.title("Completamento di parole in testi Latino Antico con Analisi Morfologica") |
|
|
|
st.write("Esempi di testo (clicca sul bottone per copiare la frase nel campo di input):") |
|
|
|
|
|
for i, example in enumerate(examples, start=1): |
|
cols = st.columns([4,1]) |
|
with cols[0]: |
|
st.write(f"Esempio {i}: {example}") |
|
with cols[1]: |
|
|
|
if st.button(f"Usa {i}"): |
|
st.session_state["input_text_value"] = example |
|
|
|
|
|
input_text = st.text_input("Testo:", key="input_text_value") |
|
|
|
|
|
|
|
tokenizer_roberta = AutoTokenizer.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased") |
|
model_roberta = AutoModelForMaskedLM.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased") |
|
fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta) |
|
|
|
|
|
if input_text: |
|
|
|
input_text_roberta = input_text.replace("[MASK]", "<mask>") |
|
predictions_roberta = fill_mask_roberta(input_text_roberta) |
|
|
|
st.subheader("Risultati delle previsioni (RoBERTa):") |
|
for pred in predictions_roberta: |
|
st.write( |
|
f"Token: {pred['token_str']} - " |
|
f"Probabilità: {pred['score']:.4f}\n" |
|
f"Sequence: {pred['sequence']}\n" |
|
) |
|
st.write("---") |
|
|
|
|
|
if nlp_lat is not None: |
|
st.subheader("Analisi Morfologica con CLTK (opzionale)") |
|
|
|
for i, pred in enumerate(predictions_roberta, start=1): |
|
|
|
predicted_text = input_text_roberta.replace("<mask>", pred['token_str']) |
|
doc = nlp_lat(predicted_text) |
|
|
|
st.write(f"Frase {i}: {predicted_text}") |
|
for token in doc.tokens: |
|
st.write( |
|
f"- Token: {token.string}\n" |
|
f" - Lemma: {token.lemma}\n" |
|
f" - UPOS: {token.upos}\n" |
|
f" - Morfologia: {token.features}\n" |
|
) |
|
st.write("---") |
|
else: |
|
st.warning("CLTK non installato (o non importato). Esegui 'pip install cltk' per abilitare l'analisi.") |