File size: 5,798 Bytes
905ea5f 1bbcde4 905ea5f 486464f 905ea5f 486464f 905ea5f 486464f 9d3f381 486464f 905ea5f 486464f 905ea5f 486464f 905ea5f d8464b4 a160dea 905ea5f 486464f 905ea5f 247d1f4 905ea5f d8464b4 a160dea 905ea5f 486464f 905ea5f 247d1f4 905ea5f d8464b4 a160dea 905ea5f 486464f 905ea5f a160dea 905ea5f 486464f 905ea5f 486464f 905ea5f 486464f 905ea5f 486464f 905ea5f d8464b4 905ea5f 486464f 905ea5f 486464f 905ea5f 486464f 905ea5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import datetime
import openai
import streamlit as st
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
TextClassificationPipeline,
pipeline,
)
from filters import (
clean_html,
remove_twitter_handles,
remove_urls,
)
st.sidebar.markdown("## Modelos cargados")
st.title("Clasificador de sentimientos para Tweets en Espa帽ol")
st.write(
"Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \
de tweets enviados como entrada en positivos, neutrales o negativos . \
Esta aplicaci贸n est谩 construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)."
)
models = [
"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf",
"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf",
"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf",
"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds",
"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
]
load_all_models = st.checkbox("驴Cargar todos los modelos?")
if "pipelines" not in st.session_state:
st.session_state.pipelines = []
for model in models:
with st.spinner(f"Cargando modelo {model}"):
pipe = pipeline(
"text-classification",
model=AutoModelForSequenceClassification.from_pretrained(model),
tokenizer=AutoTokenizer.from_pretrained(model),
return_all_scores=True,
)
st.sidebar.subheader(pipe.model.config.name_or_path)
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
st.sidebar.write(f"Modelo:\n{pipe.model.config}")
st.session_state.pipelines.append(pipe)
if not load_all_models:
break
st.session_state.last_updated = datetime.time(0, 0)
def update_model(
local_model_id,
model_id,
):
st.session_state.pipelines = []
if not load_all_models:
if local_model_id:
model_id = local_model_id
st.text(f"Cargando modelo {model_id}")
pipe = pipeline(
"text-classification",
model=AutoModelForSequenceClassification.from_pretrained(model_id),
tokenizer=AutoTokenizer.from_pretrained(model_id),
return_all_scores=True,
)
st.sidebar.subheader(pipe.model.config.name_or_path)
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
st.sidebar.write(f"Modelo:\n{pipe.model.config}")
st.session_state.pipelines.append(pipe)
else:
for model in models:
with st.spinner(f"Cargando modelo {model}"):
pipe = pipeline(
"text-classification",
model=AutoModelForSequenceClassification.from_pretrained(model),
tokenizer=AutoTokenizer.from_pretrained(model),
return_all_scores=True,
)
st.sidebar.subheader(pipe.model.config.name_or_path)
st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
st.sidebar.write(f"Modelo:\n{pipe.model.config}")
st.session_state.pipelines.append(pipe)
st.session_state.last_updated = datetime.datetime.now().time()
model_id = st.selectbox(f"Elige un modelo {load_all_models}", models)
local_model_id = st.text_input(f"Elige un modelo del disco local")
st.button(
"Cargar modelo/s",
on_click=update_model,
args=(
local_model_id,
model_id,
),
)
st.write("Ultima actualizaci贸n = ", st.session_state.last_updated)
form = st.form(key="sentimient")
tweet_text = form.text_area("Introduce tu texto")
clean_input = form.checkbox("驴Limpiar el texto?")
ask_chatgpt = form.checkbox("驴Preguntarle a ChatGPT?")
openai_key = form.text_input("OpenAI Key")
submit = form.form_submit_button("Cargar")
if submit:
if clean_input:
tweet_text = remove_twitter_handles({"text": tweet_text})["text"]
tweet_text = remove_urls({"text": tweet_text})["text"]
tweet_text = clean_html({"text": tweet_text})["text"]
st.write(f"Enviando este texto al modelo: {tweet_text}")
for classifier in st.session_state.pipelines:
st.subheader(f"Model\n{classifier.model.config.name_or_path}")
result = classifier(tweet_text)
st.json(result, expanded=False)
predictions = result[0]
label = "N/A"
score = -1
for p in predictions:
if p["score"] > score:
label = p["label"]
score = p["score"]
if label == "P":
st.success(f"{label} sentimiento (puntuaci贸n: {score})")
elif label == "NEU":
st.warning(f"{label} sentimiento (puntuaci贸n: {score})")
else:
st.error(f"{label} sentimiento (puntuaci贸n: {score})")
if ask_chatgpt:
openai.api_key = openai_key
prompt = f"""
Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N).
The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain
each label, either P, NEU or N and its corresponding softmax score, or probability with the following
format <LABEL>/<SCORE>: {tweet_text}
"""
completion = openai.Completion.create(
engine="text-davinci-003", prompt="What is the pandas library?", max_tokens=1000
)
st.write(completion.choices[0]["text"])
|