Spaces:

Iker
/

ClickbaitFighter

Running on Zero

App Files Files Community

ClickbaitFighter / app.py

Iker

Fix bug

011d233 over 1 year ago

raw

history blame

8.07 kB

	import os

	import gradio as gr
	import copy
	from llama_cpp import Llama

	# CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
	# CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir

	import json
	import datetime
	from transformers import AutoTokenizer
	from download_url import download_text_and_title
	from prompts import clickbait_prompt, summary_prompt, clickbait_summary_prompt
	from cache_system import CacheHandler
	from huggingface_hub import hf_hub_download

	auth_token = os.environ.get("TOKEN_FROM_SECRET") or True

	print(f"CPU cores: {os.cpu_count()}.")

	llm = Llama(
	model_path=hf_hub_download(
	repo_id=os.environ.get("REPO_ID", "Iker/ClickbaitFighter-10B"),
	filename=os.environ.get("MODEL_FILE", "ClickbaitFighter-10B_q4_k_m.gguf"),
	token=auth_token,
	),
	n_ctx=0,
	n_gpu_layers=-1, # change n_gpu_layers if you have more or less VRAM
	n_threads=8,
	)

	tokenizer = AutoTokenizer.from_pretrained(
	"Iker/ClickbaitFighter-10B",
	add_eos_token=True,
	token=auth_token,
	use_fast=True,
	)


	def generate_prompt(
	tittle: str,
	body: str,
	mode: str = "finetune",
	) -> str:
	"""
	Generate the prompt for the model.

	Args:
	tittle (`str`):
	The tittle of the article.
	body (`str`):
	The body of the article.
	mode (`str`):
	The mode of the model. Can be 'clickbait', 'summary' or 'clickbait-summary'.
	Returns:
	`str`: The formatted prompt.
	"""

	if mode == "clickbait":
	return clickbait_prompt(tittle, body)
	elif mode == "summary":
	return summary_prompt(tittle, body)
	elif mode == "clickbait-summary":
	return clickbait_summary_prompt(tittle, body)
	else:
	raise ValueError(
	"Invalid mode. Valid modes are 'clickbait', 'summary' and 'clickbait-summary'"
	)


	def finish_generation(text: str) -> str:
	return f"{text}\n\n⬇️ Ayuda a mejorar la herramienta marcando si el resumen es correcto o no.⬇️"


	def generate_text(
	url: str, mode: int, progress=gr.Progress(track_tqdm=False)
	) -> (str, str):
	global cache_handler

	# 1) Download the article

	progress(0, desc="🤖 Accediendo a la noticia")

	# First, check if the URL is in the cache
	title, text, temp = cache_handler.get_from_cache(url, mode)
	if title is not None and text is not None and temp is not None:
	temp = finish_generation(temp)
	yield title, temp, text
	else:
	try:
	title, text = download_text_and_title(url)
	except Exception as e:
	title = None
	text = None

	if title is None or text is None:
	yield (
	"🤖 No he podido acceder a la notica, asegurate que la URL es correcta y que es posible acceder a la noticia desde un navegador.",
	"❌❌❌",
	"Error",
	)
	return (
	"🤖 No he podido acceder a la notica, asegurate que la URL es correcta y que es posible acceder a la noticia desde un navegador.",
	"❌❌❌",
	"Error",
	)

	progress(0.5, desc="🤖 Leyendo noticia")

	# 2) Generate the prompt
	if mode == 0:
	mo = "summary"
	elif mode == 100:
	mo = "clickbait"
	else:
	mo = "clickbait-summary"
	input_prompt = generate_prompt(title, text, mo)
	input_prompt = tokenizer.apply_chat_template(
	[{"role": "user", "content": input_prompt}],
	tokenize=False,
	add_generation_prompt=True,
	)

	output = llm(
	input_prompt,
	temperature=0.15,
	top_p=0.1,
	top_k=40,
	repeat_penalty=1.1,
	max_tokens=256,
	stop=[
	"<s>" "</s>" "\n" "[/INST]" "[INST]",
	"### User:",
	"### Assistant:",
	"###",
	],
	stream=True,
	)

	temp = ""
	for out in output:
	stream = copy.deepcopy(out)
	temp += stream["choices"][0]["text"]
	yield title, temp, text

	cache_handler.add_to_cache(
	url=url, title=title, text=text, summary_type=mode, summary=temp
	)
	temp = finish_generation(temp)
	yield title, temp, text

	return title, temp, text


	cache_handler = CacheHandler(max_cache_size=1000)
	hf_writer = gr.HuggingFaceDatasetSaver(auth_token, "Iker/Clickbait-News")

	demo = gr.Interface(
	generate_text,
	inputs=[
	gr.Textbox(
	label="🌐 URL de la noticia",
	info="Introduce la URL de la noticia que deseas resumir.",
	value="https://www.heraldo.es/noticias/salud/2024/01/08/atun-alimento-grasa-muscular-ayuda-combatir-colesterol-1702116.html",
	interactive=True,
	),
	gr.Slider(
	minimum=0,
	maximum=100,
	step=50,
	value=50,
	label="🎚️ Nivel de resumen",
	info="""¿Hasta qué punto quieres resumir la noticia?

	Si solo deseas un resumen, selecciona 0.

	Si buscas un resumen y desmontar el clickbait, elige 50.

	Para obtener solo la respuesta al clickbait, selecciona 100""",
	interactive=True,
	),
	],
	outputs=[
	gr.Textbox(
	label="📰 Titular de la noticia",
	interactive=False,
	placeholder="Aquí aparecerá el título de la noticia",
	),
	gr.Textbox(
	label="🗒️ Resumen",
	interactive=False,
	placeholder="Aquí aparecerá el resumen de la noticia.",
	),
	gr.Textbox(
	label="Noticia completa",
	visible=False,
	render=False,
	interactive=False,
	placeholder="Aquí aparecerá el resumen de la noticia.",
	),
	],
	title="⚔️ Clickbait Fighter! ⚔️",
	thumbnail="logo2.png",
	theme="JohnSmith9982/small_and_pretty",
	description="""Esta Inteligencia Artificial es capaz de generar un resumen de una sola frase que revela la verdad detrás de un titular sensacionalista o clickbait. Solo tienes que introducir la URL de la noticia. La IA accederá a la noticia, la leerá y en cuestión de segundos generará un resumen de una sola frase que revele la verdad detrás del titular.

	🎚 Ajusta el nivel de resumen con el control deslizante. Cuanto maś alto, más corto será el resumen.

	🗒 La IA no es capaz de acceder a todas las webs, por ejemplo, si introduces un enlace a una noticia que requiere suscripción, la IA no podrá acceder a ella. Algunas webs pueden tener tecnologías para bloquear bots.

	⌚ La IA se encuentra corriendo en un hardware bastante modesto, por lo que puede tardar hasta un minuto en generar el resumen. Si muchos usuarios usan la app a la vez, tendrás que esperar tu turno.

	💸 Este es un projecto sin ánimo de lucro, no se genera ningún tipo de ingreso con esta app. Los datos, la IA y el código se publicarán para su uso en la investigación académica. No puedes usar esta app para ningún uso comercial.

	🧪 El modelo se encuentra en fase de desarrollo, si quieres ayudar a mejorarlo puedes usar los botones 👍 y 👎 para valorar el resumen. ¡Gracias por tu ayuda!""",
	article="Esta Inteligencia Artificial ha sido generada por Iker García-Ferrero. Puedes saber más sobre mi trabajo en mi [página web](https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/) o mi perfil de [X](https://twitter.com/iker_garciaf). Puedes ponerte en contacto conmigo a través de correo electrónico (ver web) y X.",
	cache_examples=False,
	concurrency_limit=1,
	allow_flagging="manual",
	flagging_options=[("👍", "correct"), ("👎", "incorrect")],
	flagging_callback=hf_writer,
	)

	demo.queue(max_size=None)
	demo.launch(share=False)