jorgencio commited on
Commit
95c5b4a
1 Parent(s): c44fcc8

cambiando modelo

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -43,29 +43,31 @@ async def personaje_detalle(request: Request, personaje: str):
43
 
44
 
45
 
46
- model_name = "nakodanei/Blue-Orchid-2x7b"
 
47
 
48
  # Inicializaci贸n global
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
- device_map="auto",
53
- torch_dtype=torch.float16
54
  )
55
 
56
  @app.post("/personajes/{personaje}/chat", response_class=HTMLResponse)
57
  async def chat_with_character(request: Request, personaje: str, user_input: str):
58
- prompt = f"""
59
- ### Instruction:
60
- You are {personaje}, a sexy girl who has been dating the user for 2 months.
61
-
62
- ### Input:
63
- User: {user_input}
64
-
65
- ### Response:
66
- Character:
67
- """
68
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
 
69
  outputs = model.generate(
70
  **inputs,
71
  max_new_tokens=500,
@@ -73,6 +75,12 @@ async def chat_with_character(request: Request, personaje: str, user_input: str)
73
  do_sample=True,
74
  temperature=0.7
75
  )
 
 
76
  generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
77
 
78
- return {"response": generated_response}
 
 
 
 
 
43
 
44
 
45
 
46
+ # Cambia al nuevo modelo
47
+ model_name = "allura-org/MoE-Girl_400MA_1BT"
48
 
49
  # Inicializaci贸n global
50
  tokenizer = AutoTokenizer.from_pretrained(model_name)
51
  model = AutoModelForCausalLM.from_pretrained(
52
  model_name,
53
+ device_map="auto", # Utiliza autom谩ticamente la GPU si est谩 disponible
54
+ torch_dtype=torch.float16 # Usa FP16 para eficiencia en GPUs
55
  )
56
 
57
  @app.post("/personajes/{personaje}/chat", response_class=HTMLResponse)
58
  async def chat_with_character(request: Request, personaje: str, user_input: str):
59
+ # Crear el prompt din谩mico con el formato esperado
60
+ prompt = f"""<|im_start|>system
61
+ You are {personaje}, a sexy girl who has been dating the user for 2 months.<|im_end|>
62
+ <|im_start|>user
63
+ {user_input}<|im_end|>
64
+ <|im_start|>assistant
65
+ """
66
+
67
+ # Tokenizar el prompt
 
68
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
69
+
70
+ # Generar la respuesta
71
  outputs = model.generate(
72
  **inputs,
73
  max_new_tokens=500,
 
75
  do_sample=True,
76
  temperature=0.7
77
  )
78
+
79
+ # Decodificar la respuesta
80
  generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
81
 
82
+ # Filtrar la respuesta para extraer solo el texto del asistente
83
+ response_text = generated_response.split("<|im_start|>assistant")[1].strip().split("<|im_end|>")[0].strip()
84
+
85
+ # Devolver la respuesta al usuario
86
+ return {"response": response_text}