Spaces:

spanish-classifier-tfg
/

spanish-tweet-classifier

Sleeping

spanish-tweet-classifier / showcase_app.py

Nfanlo

change text box

a160dea over 2 years ago

5.8 kB

	import datetime

	import openai
	import streamlit as st
	from transformers import (
	AutoModelForSequenceClassification,
	AutoTokenizer,
	TextClassificationPipeline,
	pipeline,
	)

	from filters import (
	clean_html,
	remove_twitter_handles,
	remove_urls,
	)

	st.sidebar.markdown("## Modelos cargados")


	st.title("Clasificador de sentimientos para Tweets en Español")
	st.write(
	"Utilizo la biblioteca Huggin Face Transformers para clasificar el sentimiento \
	de tweets enviados como entrada en positivos, neutrales o negativos . \
	Esta aplicación está construida usando [Streamlit](https://docs.streamlit.io/en/stable/getting_started.html)."
	)

	models = [
	"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf",
	"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf",
	"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf",
	"francisco-perez-sorrosal/distilbert-base-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
	"francisco-perez-sorrosal/distilbert-base-multilingual-cased-finetuned-with-spanish-tweets-clf-cleaned-ds",
	"francisco-perez-sorrosal/dccuchile-distilbert-base-spanish-uncased-finetuned-with-spanish-tweets-clf-cleaned-ds",
	]

	load_all_models = st.checkbox("¿Cargar todos los modelos?")

	if "pipelines" not in st.session_state:
	st.session_state.pipelines = []
	for model in models:
	with st.spinner(f"Cargando modelo {model}"):
	pipe = pipeline(
	"text-classification",
	model=AutoModelForSequenceClassification.from_pretrained(model),
	tokenizer=AutoTokenizer.from_pretrained(model),
	return_all_scores=True,
	)
	st.sidebar.subheader(pipe.model.config.name_or_path)
	st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
	st.sidebar.write(f"Modelo:\n{pipe.model.config}")
	st.session_state.pipelines.append(pipe)
	if not load_all_models:
	break
	st.session_state.last_updated = datetime.time(0, 0)


	def update_model(
	local_model_id,
	model_id,
	):
	st.session_state.pipelines = []
	if not load_all_models:
	if local_model_id:
	model_id = local_model_id
	st.text(f"Cargando modelo {model_id}")
	pipe = pipeline(
	"text-classification",
	model=AutoModelForSequenceClassification.from_pretrained(model_id),
	tokenizer=AutoTokenizer.from_pretrained(model_id),
	return_all_scores=True,
	)
	st.sidebar.subheader(pipe.model.config.name_or_path)
	st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
	st.sidebar.write(f"Modelo:\n{pipe.model.config}")
	st.session_state.pipelines.append(pipe)
	else:
	for model in models:
	with st.spinner(f"Cargando modelo {model}"):
	pipe = pipeline(
	"text-classification",
	model=AutoModelForSequenceClassification.from_pretrained(model),
	tokenizer=AutoTokenizer.from_pretrained(model),
	return_all_scores=True,
	)
	st.sidebar.subheader(pipe.model.config.name_or_path)
	st.sidebar.write(f"Tokenizer:\n{pipe.tokenizer}")
	st.sidebar.write(f"Modelo:\n{pipe.model.config}")
	st.session_state.pipelines.append(pipe)
	st.session_state.last_updated = datetime.datetime.now().time()


	model_id = st.selectbox(f"Elige un modelo {load_all_models}", models)
	local_model_id = st.text_input(f"Elige un modelo del disco local")
	st.button(
	"Cargar modelo/s",
	on_click=update_model,
	args=(
	local_model_id,
	model_id,
	),
	)
	st.write("Ultima actualización = ", st.session_state.last_updated)


	form = st.form(key="sentimient")
	tweet_text = form.text_area("Introduce tu texto")
	clean_input = form.checkbox("¿Limpiar el texto?")
	ask_chatgpt = form.checkbox("¿Preguntarle a ChatGPT?")
	openai_key = form.text_input("OpenAI Key")
	submit = form.form_submit_button("Cargar")

	if submit:
	if clean_input:
	tweet_text = remove_twitter_handles({"text": tweet_text})["text"]
	tweet_text = remove_urls({"text": tweet_text})["text"]
	tweet_text = clean_html({"text": tweet_text})["text"]
	st.write(f"Enviando este texto al modelo: {tweet_text}")

	for classifier in st.session_state.pipelines:
	st.subheader(f"Model\n{classifier.model.config.name_or_path}")
	result = classifier(tweet_text)
	st.json(result, expanded=False)
	predictions = result[0]
	label = "N/A"
	score = -1
	for p in predictions:
	if p["score"] > score:
	label = p["label"]
	score = p["score"]

	if label == "P":
	st.success(f"{label} sentimiento (puntuación: {score})")
	elif label == "NEU":
	st.warning(f"{label} sentimiento (puntuación: {score})")
	else:
	st.error(f"{label} sentimiento (puntuación: {score})")

	if ask_chatgpt:
	openai.api_key = openai_key
	prompt = f"""
	Classify the sentiment of the following tweet text into positive (P), neutral (NEU) or negative (N).
	The tweet text will appear after the ":" symbol at the end of the paragraph. The result will contain
	each label, either P, NEU or N and its corresponding softmax score, or probability with the following
	format <LABEL>/<SCORE>: {tweet_text}
	"""
	completion = openai.Completion.create(
	engine="text-davinci-003", prompt="What is the pandas library?", max_tokens=1000
	)
	st.write(completion.choices[0]["text"])