File size: 1,459 Bytes
83e59ed
975cab3
b5ab9e4
cf9dee2
 
83e59ed
c376c52
8c54a40
83e59ed
c376c52
cf9dee2
c376c52
83e59ed
975cab3
c376c52
 
cf9dee2
 
 
 
 
b5ab9e4
7af39c3
 
c376c52
 
 
cf9dee2
 
 
 
 
 
 
 
c376c52
cf9dee2
 
c376c52
7af39c3
c376c52
cf9dee2
 
 
 
c376c52
cf9dee2
 
b5ab9e4
cf9dee2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login

# Obtener el token directamente del secreto de Hugging Face
huggingface_token = os.environ.get('reparfinal')
if huggingface_token is None:
    raise ValueError("El token de Hugging Face no está configurado en los secretos del Space")

# Iniciar sesión
login(huggingface_token)

# Configurar modelo
model_id = "meta-llama/Llama-2-7b-chat-hf"  # Cambiado al modelo de 7B
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

def respond_to_query(user_input):
    # Formato específico para Llama 2 Chat
    prompt = f"[INST] {user_input} [/INST]"
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        inputs,
        max_new_tokens=256,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.7,
        repetition_penalty=1.1
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Interfaz Gradio
interface = gr.Interface(
    fn=respond_to_query,
    inputs=gr.Textbox(label="Tu pregunta"),
    outputs=gr.Textbox(label="Respuesta"),
    title="Chatbot con Llama-2-7b",
    description="Haz una pregunta y el modelo te responderá"
)

if __name__ == "__main__":
    interface.launch()