File size: 3,315 Bytes
3954682
 
 
 
 
 
 
 
 
 
832ee1c
3954682
 
832ee1c
3954682
 
832ee1c
3954682
 
832ee1c
3954682
 
832ee1c
3954682
 
832ee1c
3954682
 
 
 
 
 
8cb1dfe
3954682
8cb1dfe
3954682
 
 
 
 
 
 
 
832ee1c
 
 
3954682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa5172e
832ee1c
3954682
 
 
 
 
 
 
 
 
8cb1dfe
3954682
832ee1c
8cb1dfe
 
 
 
3954682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import random
from mtranslate import translate
import streamlit as st
from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline


LOGO = "https://raw.githubusercontent.com/nlp-en-es/assets/main/logo.png"

MODELS = {
    "RoBERTa Base": {
        "url": "bertin-project/bertin-roberta-base-spanish"
    },
    "RoBERTa Base Gaussian": {
        "url": "bertin-project/bertin-base-gaussian"
    },
    "RoBERTa Base Random": {
        "url": "bertin-project/bertin-base-random"
    },
    "RoBERTa Base Stepwise": {
        "url": "bertin-project/bertin-base-stepwise"
    },
    "RoBERTa Base Gaussian Experiment": {
        "url": "bertin-project/bertin-base-gaussian-exp-512seqlen"
    },
    "RoBERTa Base Random Experiment": {
        "url": "bertin-project/bertin-base-random-exp-512seqlen"
    }
}

PROMPT_LIST = [
    "Fui a la librería a comprar un <mask>.",
    "¡Qué buen <mask> hace hoy!",
    "Hoy empiezan las vacaciones así que vamos a la <mask>.",
    "Mi color favorito es el <mask>.",
    "Voy a <mask> porque estoy muy cansada.",
    "Mañana vienen mis amigos de <mask>.",
    "¿Te apetece venir a <mask> conmigo?",
    "En verano hace mucho <mask>.",
    "En el bosque había <mask>."
]


@st.cache(show_spinner=False, persist=True)
def load_model(masked_text, model_url):
    model = AutoModelForMaskedLM.from_pretrained(model_url)
    tokenizer = AutoTokenizer.from_pretrained(model_url)
    nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
    result = nlp(masked_text)
    return result


# Page
st.set_page_config(page_title="BERTIN Demo", page_icon=LOGO)
st.title("BERTIN")

#Sidebar
st.sidebar.image(LOGO)

# Body
st.markdown(
    """
    BERTIN is a series of BERT-based models for Spanish.  
    The models are trained with Flax and using TPUs sponsored by Google since this is part of the
    [Flax/Jax Community Week](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104)
    organised by HuggingFace.
    """
)

model_name = st.selectbox("Model", list(MODELS.keys()))
model_url = MODELS[model_name]["url"]

prompt = st.selectbox("Prompt", ["Random", "Custom"])
if prompt == "Custom":
    prompt_box = "Enter your masked text here..."
else:
    prompt_box = random.choice(PROMPT_LIST)
text = st.text_area("Enter text", prompt_box)

if st.button("Fill the mask"):
    with st.spinner(text="Filling the mask..."):
        st.subheader("Result")
        result = load_model(text, model_url)
        result_sequence = result[0]["sequence"]
        st.write(result_sequence)
        st.write("_English_ _translation:_", translate(result_sequence, "en", "es"))
        st.write(result)

st.markdown(
    """
    ### Team members
    - Javier de la Rosa ([versae](https://huggingface.co/versae))
    - Eduardo González ([edugp](https://huggingface.co/edugp))
    - Paulo Villegas ([paulo](https://huggingface.co/paulo))
    - Pablo González de Prado ([Pablogps](https://huggingface.co/Pablogps))
    - Manu Romero ([mrm8488](https://huggingface.co/mrm8488))
    - María Grandury ([mariagrandury](https://huggingface.co/mariagrandury))
        
    ### More information
    You can find more information about these models
    [here](https://huggingface.co/bertin-project/bertin-roberta-base-spanish).
    """
)