import datetime import openai import streamlit as st from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, TextClassificationPipeline, pipeline, ) from filters import ( clean_html, remove_twitter_handles, remove_urls, ) st.sidebar.markdown("## Modelos cargados") st.title("Clasificador de sentimientos para Tweets en Español") st.write( "Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \ de tweets enviados como entrada en positivos, neutrales o negativos . \ Esta aplicación está construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)." ) models = [ "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf", "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf", "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf", "francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds", "francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds", "francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds", ] load_all_models = st.checkbox("¿Cargar todos los modelos?") if "pipelines" not in st.session_state: st.session_state.pipelines = [] for model in models: with st.spinner(f"Cargando modelo {model}"): pipe = pipeline( "text-classification", model=AutoModelForSequenceClassification.from_pretrained(model), tokenizer=AutoTokenizer.from_pretrained(model), return_all_scores=True, ) st.sidebar.subheader(pipe.model.config.name_or_path) st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") st.sidebar.write(f"Modelo:\n{pipe.model.config}") st.session_state.pipelines.append(pipe) if not load_all_models: break st.session_state.last_updated = datetime.time(0, 0) def update_model( local_model_id, model_id, ): st.session_state.pipelines = [] if not load_all_models: if local_model_id: model_id = local_model_id st.text(f"Cargando modelo {model_id}") pipe = pipeline( "text-classification", model=AutoModelForSequenceClassification.from_pretrained(model_id), tokenizer=AutoTokenizer.from_pretrained(model_id), return_all_scores=True, ) st.sidebar.subheader(pipe.model.config.name_or_path) st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") st.sidebar.write(f"Modelo:\n{pipe.model.config}") st.session_state.pipelines.append(pipe) else: for model in models: with st.spinner(f"Cargando modelo {model}"): pipe = pipeline( "text-classification", model=AutoModelForSequenceClassification.from_pretrained(model), tokenizer=AutoTokenizer.from_pretrained(model), return_all_scores=True, ) st.sidebar.subheader(pipe.model.config.name_or_path) st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}") st.sidebar.write(f"Modelo:\n{pipe.model.config}") st.session_state.pipelines.append(pipe) st.session_state.last_updated = datetime.datetime.now().time() model_id = st.selectbox(f"Elige un modelo {load_all_models}", models) local_model_id = st.text_input(f"Elige un modelo del disco local") st.button( "Cargar modelo/s", on_click=update_model, args=( local_model_id, model_id, ), ) st.write("Ultima actualización = ", st.session_state.last_updated) form = st.form(key="sentimient") tweet_text = form.text_area("Introduce tu texto") clean_input = form.checkbox("¿Limpiar el texto?") ask_chatgpt = form.checkbox("¿Preguntarle a ChatGPT?") openai_key = form.text_input("OpenAI Key") submit = form.form_submit_button("Cargar") if submit: if clean_input: tweet_text = remove_twitter_handles({"text": tweet_text})["text"] tweet_text = remove_urls({"text": tweet_text})["text"] tweet_text = clean_html({"text": tweet_text})["text"] st.write(f"Enviando este texto al modelo: {tweet_text}") for classifier in st.session_state.pipelines: st.subheader(f"Model\n{classifier.model.config.name_or_path}") result = classifier(tweet_text) st.json(result, expanded=False) predictions = result[0] label = "N/A" score = -1 for p in predictions: if p["score"] > score: label = p["label"] score = p["score"] if label == "P": st.success(f"{label} sentimiento (puntuación: {score})") elif label == "NEU": st.warning(f"{label} sentimiento (puntuación: {score})") else: st.error(f"{label} sentimiento (puntuación: {score})") if ask_chatgpt: openai.api_key = openai_key prompt = f""" Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N). The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain each label, either P, NEU or N and its corresponding softmax score, or probability with the following format