|
import datetime |
|
|
|
import openai |
|
import streamlit as st |
|
from transformers import ( |
|
AutoModelForSequenceClassification, |
|
AutoTokenizer, |
|
TextClassificationPipeline, |
|
pipeline, |
|
) |
|
|
|
from filters import ( |
|
clean_html, |
|
remove_twitter_handles, |
|
remove_urls, |
|
) |
|
|
|
st.sidebar.markdown("## Modelos cargados") |
|
|
|
|
|
st.title("Clasificador de sentimientos para Tweets en Espa帽ol") |
|
st.write( |
|
"Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \ |
|
de tweets enviados como entrada en positivos, neutrales o negativos . \ |
|
Esta aplicaci贸n est谩 construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)." |
|
) |
|
|
|
models = [ |
|
"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf", |
|
"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf", |
|
"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf", |
|
"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds", |
|
"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds", |
|
"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds", |
|
] |
|
|
|
load_all_models = st.checkbox("驴Cargar todos los modelos?") |
|
|
|
if "pipelines" not in st.session_state: |
|
st.session_state.pipelines = [] |
|
for model in models: |
|
with st.spinner(f"Cargando modelo {model}"): |
|
pipe = pipeline( |
|
"text-classification", |
|
model=AutoModelForSequenceClassification.from_pretrained(model), |
|
tokenizer=AutoTokenizer.from_pretrained(model), |
|
return_all_scores=True, |
|
) |
|
st.sidebar.subheader(pipe.model.config.name_or_path) |
|
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") |
|
st.sidebar.write(f"Modelo:\n{pipe.model.config}") |
|
st.session_state.pipelines.append(pipe) |
|
if not load_all_models: |
|
break |
|
st.session_state.last_updated = datetime.time(0, 0) |
|
|
|
|
|
def update_model( |
|
local_model_id, |
|
model_id, |
|
): |
|
st.session_state.pipelines = [] |
|
if not load_all_models: |
|
if local_model_id: |
|
model_id = local_model_id |
|
st.text(f"Cargando modelo {model_id}") |
|
pipe = pipeline( |
|
"text-classification", |
|
model=AutoModelForSequenceClassification.from_pretrained(model_id), |
|
tokenizer=AutoTokenizer.from_pretrained(model_id), |
|
return_all_scores=True, |
|
) |
|
st.sidebar.subheader(pipe.model.config.name_or_path) |
|
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") |
|
st.sidebar.write(f"Modelo:\n{pipe.model.config}") |
|
st.session_state.pipelines.append(pipe) |
|
else: |
|
for model in models: |
|
with st.spinner(f"Cargando modelo {model}"): |
|
pipe = pipeline( |
|
"text-classification", |
|
model=AutoModelForSequenceClassification.from_pretrained(model), |
|
tokenizer=AutoTokenizer.from_pretrained(model), |
|
return_all_scores=True, |
|
) |
|
st.sidebar.subheader(pipe.model.config.name_or_path) |
|
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") |
|
st.sidebar.write(f"Modelo:\n{pipe.model.config}") |
|
st.session_state.pipelines.append(pipe) |
|
st.session_state.last_updated = datetime.datetime.now().time() |
|
|
|
|
|
model_id = st.selectbox(f"Elige un modelo {load_all_models}", models) |
|
local_model_id = st.text_input(f"Elige un modelo del disco local") |
|
st.button( |
|
"Cargar modelo/s", |
|
on_click=update_model, |
|
args=( |
|
local_model_id, |
|
model_id, |
|
), |
|
) |
|
st.write("Ultima actualizaci贸n = ", st.session_state.last_updated) |
|
|
|
|
|
form = st.form(key="sentimient") |
|
tweet_text = form.text_area("Introduce tu texto") |
|
clean_input = form.checkbox("驴Limpiar el texto?") |
|
ask_chatgpt = form.checkbox("驴Preguntarle a ChatGPT?") |
|
openai_key = form.text_input("OpenAI Key") |
|
submit = form.form_submit_button("Cargar") |
|
|
|
if submit: |
|
if clean_input: |
|
tweet_text = remove_twitter_handles({"text": tweet_text})["text"] |
|
tweet_text = remove_urls({"text": tweet_text})["text"] |
|
tweet_text = clean_html({"text": tweet_text})["text"] |
|
st.write(f"Enviando este texto al modelo: {tweet_text}") |
|
|
|
for classifier in st.session_state.pipelines: |
|
st.subheader(f"Model\n{classifier.model.config.name_or_path}") |
|
result = classifier(tweet_text) |
|
st.json(result, expanded=False) |
|
predictions = result[0] |
|
label = "N/A" |
|
score = -1 |
|
for p in predictions: |
|
if p["score"] > score: |
|
label = p["label"] |
|
score = p["score"] |
|
|
|
if label == "P": |
|
st.success(f"{label} sentimiento (puntuaci贸n: {score})") |
|
elif label == "NEU": |
|
st.warning(f"{label} sentimiento (puntuaci贸n: {score})") |
|
else: |
|
st.error(f"{label} sentimiento (puntuaci贸n: {score})") |
|
|
|
if ask_chatgpt: |
|
openai.api_key = openai_key |
|
prompt = f""" |
|
Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N). |
|
The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain |
|
each label, either P, NEU or N and its corresponding softmax score, or probability with the following |
|
format <LABEL>/<SCORE>: {tweet_text} |
|
""" |
|
completion = openai.Completion.create( |
|
engine="text-davinci-003", prompt="What is the pandas library?", max_tokens=1000 |
|
) |
|
st.write(completion.choices[0]["text"]) |
|
|