Spaces:

M4sterStudy
/

LLama3

Paused

LLama3 / app.py

Update app.py

14c1ab3 verified 11 months ago

1.28 kB

	import os
	from huggingface_hub import login
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr
	import torch

	# Autenticar usando el token almacenado como secreto
	hf_token = os.getenv("HF_API_TOKEN")
	login(hf_token)

	# Cargar el modelo y el tokenizador
	model_name = "DeepESP/gpt2-spanish"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

	def chat_with_gpt2_spanish(input_text):
	# Comprobar si la GPU está disponible
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
	outputs = model.generate(
	**inputs,
	max_length=100,
	num_beams=1,
	temperature=0.7,
	top_p=0.9,
	no_repeat_ngram_size=2,
	early_stopping=True
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	# Crear la interfaz con Gradio
	iface = gr.Interface(
	fn=chat_with_gpt2_spanish,
	inputs="text",
	outputs="text",
	title="Chat con GPT-2 en Español",
	description="Interfaz simple para comunicarte con el modelo GPT-2 en español."
	)

	iface.launch()