Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -60,14 +60,16 @@ class Llama3Demo:
|
|
60 |
|
61 |
|
62 |
##################################################################
|
63 |
-
def generate_response(self, prompt: str, max_new_tokens: int = 512
|
64 |
-
|
65 |
-
<|
|
66 |
-
|
|
|
|
|
|
|
67 |
|
68 |
inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
|
69 |
|
70 |
-
# Asegurar que tenemos un pad_token_id válido
|
71 |
if self.tokenizer.pad_token_id is None:
|
72 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
73 |
|
@@ -76,10 +78,12 @@ class Llama3Demo:
|
|
76 |
**inputs,
|
77 |
max_new_tokens=max_new_tokens,
|
78 |
num_return_sequences=1,
|
79 |
-
temperature=
|
80 |
do_sample=True,
|
81 |
-
top_p=
|
82 |
-
|
|
|
|
|
83 |
)
|
84 |
|
85 |
torch.cuda.empty_cache()
|
|
|
60 |
|
61 |
|
62 |
##################################################################
|
63 |
+
def generate_response(self, prompt: str, max_new_tokens: int = 512, temperature: float = 0.6,
|
64 |
+
top_p: float = 0.85, repetition_penalty: float = 1.2, top_k: int = 50) -> str:
|
65 |
+
formatted_prompt = f"""<|system|>You are a helpful AI assistant. Always provide accurate,
|
66 |
+
detailed, and well-reasoned responses. If you're unsure about something, acknowledge the uncertainty.
|
67 |
+
Break down complex topics into clear explanations.</s>
|
68 |
+
<|user|>{prompt}</s>
|
69 |
+
<|assistant|>"""
|
70 |
|
71 |
inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
|
72 |
|
|
|
73 |
if self.tokenizer.pad_token_id is None:
|
74 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
75 |
|
|
|
78 |
**inputs,
|
79 |
max_new_tokens=max_new_tokens,
|
80 |
num_return_sequences=1,
|
81 |
+
temperature=temperature,
|
82 |
do_sample=True,
|
83 |
+
top_p=top_p,
|
84 |
+
top_k=top_k,
|
85 |
+
repetition_penalty=repetition_penalty,
|
86 |
+
pad_token_id=self.tokenizer.pad_token_id
|
87 |
)
|
88 |
|
89 |
torch.cuda.empty_cache()
|