File size: 5,798 Bytes
905ea5f
 
 
 
 
 
 
 
 
 
 
1bbcde4
905ea5f
 
 
 
 
486464f
905ea5f
 
486464f
905ea5f
486464f
9d3f381
486464f
905ea5f
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
 
 
 
486464f
905ea5f
 
 
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
 
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
247d1f4
905ea5f
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
486464f
905ea5f
247d1f4
905ea5f
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
 
486464f
 
905ea5f
a160dea
905ea5f
 
 
 
 
 
486464f
905ea5f
 
486464f
 
 
 
905ea5f
486464f
905ea5f
 
 
 
 
 
486464f
905ea5f
 
d8464b4
905ea5f
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
486464f
905ea5f
486464f
905ea5f
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import datetime

import openai
import streamlit as st
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TextClassificationPipeline,
    pipeline,
)

from filters import (
    clean_html,
    remove_twitter_handles,
    remove_urls,
)

st.sidebar.markdown("## Modelos cargados")


st.title("Clasificador de sentimientos para Tweets en Espa帽ol")
st.write(
    "Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \
    de tweets enviados como entrada en positivos, neutrales o negativos . \
    Esta aplicaci贸n est谩 construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)."
)

models = [
    "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
    "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds",
    "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
]

load_all_models = st.checkbox("驴Cargar todos los modelos?")

if "pipelines" not in st.session_state:
    st.session_state.pipelines = []
    for model in models:
        with st.spinner(f"Cargando modelo {model}"):
            pipe = pipeline(
                "text-classification",
                model=AutoModelForSequenceClassification.from_pretrained(model),
                tokenizer=AutoTokenizer.from_pretrained(model),
                return_all_scores=True,
            )
        st.sidebar.subheader(pipe.model.config.name_or_path)
        st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
        st.sidebar.write(f"Modelo:\n{pipe.model.config}")
        st.session_state.pipelines.append(pipe)
        if not load_all_models:
            break
    st.session_state.last_updated = datetime.time(0, 0)


def update_model(
    local_model_id,
    model_id,
):
    st.session_state.pipelines = []
    if not load_all_models:
        if local_model_id:
            model_id = local_model_id
        st.text(f"Cargando modelo {model_id}")
        pipe = pipeline(
            "text-classification",
            model=AutoModelForSequenceClassification.from_pretrained(model_id),
            tokenizer=AutoTokenizer.from_pretrained(model_id),
            return_all_scores=True,
        )
        st.sidebar.subheader(pipe.model.config.name_or_path)
        st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
        st.sidebar.write(f"Modelo:\n{pipe.model.config}")
        st.session_state.pipelines.append(pipe)
    else:
        for model in models:
            with st.spinner(f"Cargando modelo {model}"):
                pipe = pipeline(
                    "text-classification",
                    model=AutoModelForSequenceClassification.from_pretrained(model),
                    tokenizer=AutoTokenizer.from_pretrained(model),
                    return_all_scores=True,
                )
                st.sidebar.subheader(pipe.model.config.name_or_path)
                st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
                st.sidebar.write(f"Modelo:\n{pipe.model.config}")
            st.session_state.pipelines.append(pipe)
    st.session_state.last_updated = datetime.datetime.now().time()


model_id = st.selectbox(f"Elige un modelo {load_all_models}", models)
local_model_id = st.text_input(f"Elige un modelo del disco local")
st.button(
    "Cargar modelo/s",
    on_click=update_model,
    args=(
        local_model_id,
        model_id,
    ),
)
st.write("Ultima actualizaci贸n = ", st.session_state.last_updated)


form = st.form(key="sentimient")
tweet_text = form.text_area("Introduce tu texto")
clean_input = form.checkbox("驴Limpiar el texto?")
ask_chatgpt = form.checkbox("驴Preguntarle a ChatGPT?")
openai_key = form.text_input("OpenAI Key")
submit = form.form_submit_button("Cargar")

if submit:
    if clean_input:
        tweet_text = remove_twitter_handles({"text": tweet_text})["text"]
        tweet_text = remove_urls({"text": tweet_text})["text"]
        tweet_text = clean_html({"text": tweet_text})["text"]
    st.write(f"Enviando este texto al modelo: {tweet_text}")

    for classifier in st.session_state.pipelines:
        st.subheader(f"Model\n{classifier.model.config.name_or_path}")
        result = classifier(tweet_text)
        st.json(result, expanded=False)
        predictions = result[0]
        label = "N/A"
        score = -1
        for p in predictions:
            if p["score"] > score:
                label = p["label"]
                score = p["score"]

        if label == "P":
            st.success(f"{label} sentimiento (puntuaci贸n: {score})")
        elif label == "NEU":
            st.warning(f"{label} sentimiento (puntuaci贸n: {score})")
        else:
            st.error(f"{label} sentimiento (puntuaci贸n: {score})")

    if ask_chatgpt:
        openai.api_key = openai_key
        prompt = f"""
        Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N).
        The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain
        each label, either P, NEU or N and its corresponding softmax score, or probability with the following
        format <LABEL>/<SCORE>: {tweet_text}
        """
        completion = openai.Completion.create(
            engine="text-davinci-003", prompt="What is the pandas library?", max_tokens=1000
        )
        st.write(completion.choices[0]["text"])