Cicciokr commited on
Commit
0ef6009
·
verified ·
1 Parent(s): 64c52ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -31
app.py CHANGED
@@ -6,7 +6,7 @@ import streamlit as st
6
  from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer
7
 
8
 
9
- st.title("Completamento del testo in Latino con Latin BERT")
10
  st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")
11
 
12
 
@@ -15,9 +15,6 @@ st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus
15
  st.write("hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt > primo");
16
  st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur");
17
  st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum");
18
- #Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens => cum
19
- #hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt => primo
20
- #Lorem ipsum dolor sit amet, [MASK] adipiscing elit. => consectetur
21
  input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
22
 
23
  # Model based on BERT
@@ -25,39 +22,20 @@ input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] a
25
  #Hugging face LuisAVasquez/simple-latin-bert-uncased
26
  #modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
27
  #https://github.com/dbamman/latin-bert
28
- modelname = "./models/bert-base-latin-uncased"
29
 
30
  #ClassCat/roberta-base-latin-v2
31
- tokenizer_robertaclasscat = AutoTokenizer.from_pretrained("Cicciokr/xlm-roberta-latin")
32
- model_robertaclasscat = AutoModelForMaskedLM.from_pretrained("Cicciokr/xlm-roberta-latin")
33
- fill_mask_robertaclasscat = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat)
34
-
35
-
36
-
37
-
38
- tokenizer_robertapstroe = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased")
39
- model_robertapstroe = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased")
40
- fill_mask_robertapstroe = pipeline("fill-mask", model=model_robertapstroe, tokenizer=tokenizer_robertapstroe)
41
-
42
- tokenizer = AutoTokenizer.from_pretrained(modelname)
43
- model = AutoModelForMaskedLM.from_pretrained(modelname)
44
- fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
45
-
46
 
47
 
48
  if input_text:
49
  predictions = fill_mask(input_text)
50
- st.subheader("Risultati delle previsioni con Bert:")
51
- for pred in predictions:
52
- st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
53
  input_text_roberta = input_text.replace("[MASK]", "<mask>")
54
- predictions_robertaclasscat = fill_mask_robertaclasscat(input_text_roberta)
55
- st.subheader("Risultati delle previsioni con Roberta ClassCat:")
56
- for pred_robertaclasscat in predictions_robertaclasscat:
57
- st.write(f"**Parola**: {pred_robertaclasscat['token_str']}, **Probabilità**: {pred_robertaclasscat['score']:.4f}, **Sequence**: {pred_robertaclasscat['sequence']}")
58
- predictions_robertapstroe = fill_mask_robertapstroe(input_text_roberta)
59
- st.subheader("Risultati delle previsioni con Roberta Pstroe:")
60
- for pred_robertapstroe in predictions_robertapstroe:
61
- st.write(f"**Parola**: {pred_robertapstroe['token_str']}, **Probabilità**: {pred_robertapstroe['score']:.4f}, **Sequence**: {pred_robertapstroe['sequence']}")
62
 
63
 
 
6
  from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer
7
 
8
 
9
+ st.title("Completamento di parole in testi Latino Antico")
10
  st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")
11
 
12
 
 
15
  st.write("hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt > primo");
16
  st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur");
17
  st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum");
 
 
 
18
  input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
19
 
20
  # Model based on BERT
 
22
  #Hugging face LuisAVasquez/simple-latin-bert-uncased
23
  #modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
24
  #https://github.com/dbamman/latin-bert
 
25
 
26
  #ClassCat/roberta-base-latin-v2
27
+ tokenizer_roberta = AutoTokenizer.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased")
28
+ model_roberta = AutoModelForMaskedLM.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased")
29
+ fill_mask_roberta = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat)
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  if input_text:
33
  predictions = fill_mask(input_text)
34
+
 
 
35
  input_text_roberta = input_text.replace("[MASK]", "<mask>")
36
+ predictions_roberta = fill_mask_roberta(input_text_roberta)
37
+ st.subheader("Risultati delle previsioni:")
38
+ for pred_roberta in predictions_roberta:
39
+ st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}")
 
 
 
 
40
 
41