Spaces:
Sleeping
Sleeping
File size: 1,459 Bytes
83e59ed 975cab3 b5ab9e4 cf9dee2 83e59ed c376c52 8c54a40 83e59ed c376c52 cf9dee2 c376c52 83e59ed 975cab3 c376c52 cf9dee2 b5ab9e4 7af39c3 c376c52 cf9dee2 c376c52 cf9dee2 c376c52 7af39c3 c376c52 cf9dee2 c376c52 cf9dee2 b5ab9e4 cf9dee2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
# Obtener el token directamente del secreto de Hugging Face
huggingface_token = os.environ.get('reparfinal')
if huggingface_token is None:
raise ValueError("El token de Hugging Face no está configurado en los secretos del Space")
# Iniciar sesión
login(huggingface_token)
# Configurar modelo
model_id = "meta-llama/Llama-2-7b-chat-hf" # Cambiado al modelo de 7B
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
def respond_to_query(user_input):
# Formato específico para Llama 2 Chat
prompt = f"[INST] {user_input} [/INST]"
inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
inputs,
max_new_tokens=256,
do_sample=True,
top_p=0.95,
top_k=50,
temperature=0.7,
repetition_penalty=1.1
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Interfaz Gradio
interface = gr.Interface(
fn=respond_to_query,
inputs=gr.Textbox(label="Tu pregunta"),
outputs=gr.Textbox(label="Respuesta"),
title="Chatbot con Llama-2-7b",
description="Haz una pregunta y el modelo te responderá"
)
if __name__ == "__main__":
interface.launch()
|