Spaces:

spanish-classifier-tfg
/

spanish-tweet-classifier

Sleeping

File size: 5,798 Bytes

905ea5f
 
 
 
 
 
 
 
 
 
 
1bbcde4
905ea5f
 
 
 
 
486464f
905ea5f
 
486464f
905ea5f
486464f
9d3f381
486464f
905ea5f
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
 
 
 
486464f
905ea5f
 
 
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
 
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
247d1f4
905ea5f
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
486464f
905ea5f
247d1f4
905ea5f
 
 
 
 
d8464b4
a160dea
905ea5f
 
 
 
486464f
 
905ea5f
a160dea
905ea5f
 
 
 
 
 
486464f
905ea5f
 
486464f
 
 
 
905ea5f
486464f
905ea5f
 
 
 
 
 
486464f
905ea5f
 
d8464b4
905ea5f
 
 
 
 
 
 
 
 
 
 
486464f
905ea5f
486464f
905ea5f
486464f
905ea5f

import datetime

import openai
import streamlit as st
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TextClassificationPipeline,
    pipeline,
)

from filters import (
    clean_html,
    remove_twitter_handles,
    remove_urls,
)

st.sidebar.markdown("## Modelos cargados")


st.title("Clasificador de sentimientos para Tweets en Español")
st.write(
    "Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \
    de tweets enviados como entrada en positivos, neutrales o negativos . \
    Esta aplicación está construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)."
)

models = [
    "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf",
    "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
    "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds",
    "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
]

load_all_models = st.checkbox("¿Cargar todos los modelos?")

if "pipelines" not in st.session_state:
    st.session_state.pipelines = []
    for model in models:
        with st.spinner(f"Cargando modelo {model}"):
            pipe = pipeline(
                "text-classification",
                model=AutoModelForSequenceClassification.from_pretrained(model),
                tokenizer=AutoTokenizer.from_pretrained(model),
                return_all_scores=True,
            )
        st.sidebar.subheader(pipe.model.config.name_or_path)
        st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
        st.sidebar.write(f"Modelo:\n{pipe.model.config}")
        st.session_state.pipelines.append(pipe)
        if not load_all_models:
            break
    st.session_state.last_updated = datetime.time(0, 0)


def update_model(
    local_model_id,
    model_id,
):
    st.session_state.pipelines = []
    if not load_all_models:
        if local_model_id:
            model_id = local_model_id
        st.text(f"Cargando modelo {model_id}")
        pipe = pipeline(
            "text-classification",
            model=AutoModelForSequenceClassification.from_pretrained(model_id),
            tokenizer=AutoTokenizer.from_pretrained(model_id),
            return_all_scores=True,
        )
        st.sidebar.subheader(pipe.model.config.name_or_path)
        st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
        st.sidebar.write(f"Modelo:\n{pipe.model.config}")
        st.session_state.pipelines.append(pipe)
    else:
        for model in models:
            with st.spinner(f"Cargando modelo {model}"):
                pipe = pipeline(
                    "text-classification",
                    model=AutoModelForSequenceClassification.from_pretrained(model),
                    tokenizer=AutoTokenizer.from_pretrained(model),
                    return_all_scores=True,
                )
                st.sidebar.subheader(pipe.model.config.name_or_path)
                st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
                st.sidebar.write(f"Modelo:\n{pipe.model.config}")
            st.session_state.pipelines.append(pipe)
    st.session_state.last_updated = datetime.datetime.now().time()


model_id = st.selectbox(f"Elige un modelo {load_all_models}", models)
local_model_id = st.text_input(f"Elige un modelo del disco local")
st.button(
    "Cargar modelo/s",
    on_click=update_model,
    args=(
        local_model_id,
        model_id,
    ),
)
st.write("Ultima actualización = ", st.session_state.last_updated)


form = st.form(key="sentimient")
tweet_text = form.text_area("Introduce tu texto")
clean_input = form.checkbox("¿Limpiar el texto?")
ask_chatgpt = form.checkbox("¿Preguntarle a ChatGPT?")
openai_key = form.text_input("OpenAI Key")
submit = form.form_submit_button("Cargar")

if submit:
    if clean_input:
        tweet_text = remove_twitter_handles({"text": tweet_text})["text"]
        tweet_text = remove_urls({"text": tweet_text})["text"]
        tweet_text = clean_html({"text": tweet_text})["text"]
    st.write(f"Enviando este texto al modelo: {tweet_text}")

    for classifier in st.session_state.pipelines:
        st.subheader(f"Model\n{classifier.model.config.name_or_path}")
        result = classifier(tweet_text)
        st.json(result, expanded=False)
        predictions = result[0]
        label = "N/A"
        score = -1
        for p in predictions:
            if p["score"] > score:
                label = p["label"]
                score = p["score"]

        if label == "P":
            st.success(f"{label} sentimiento (puntuación: {score})")
        elif label == "NEU":
            st.warning(f"{label} sentimiento (puntuación: {score})")
        else:
            st.error(f"{label} sentimiento (puntuación: {score})")

    if ask_chatgpt:
        openai.api_key = openai_key
        prompt = f"""
        Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N).
        The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain
        each label, either P, NEU or N and its corresponding softmax score, or probability with the following
        format <LABEL>/<SCORE>: {tweet_text}
        """
        completion = openai.Completion.create(
            engine="text-davinci-003", prompt="What is the pandas library?", max_tokens=1000
        )
        st.write(completion.choices[0]["text"])