Update app.py
Browse files
app.py
CHANGED
@@ -30,11 +30,7 @@ footer {
|
|
30 |
# Initialize the tokenizer
|
31 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
32 |
|
33 |
-
|
34 |
-
# max_tokens is for the maximum length for generation.
|
35 |
-
|
36 |
-
# Input the model name or path. Can be GPTQ or AWQ models.
|
37 |
-
llm = LLM(model=model, kv_cache_dtype="fp8_e5m2")
|
38 |
|
39 |
@spaces.GPU
|
40 |
def generate(message, history, system, max_tokens, temperature, top_p, top_k, penalty):
|
|
|
30 |
# Initialize the tokenizer
|
31 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
32 |
|
33 |
+
llm = LLM(model=model)
|
|
|
|
|
|
|
|
|
34 |
|
35 |
@spaces.GPU
|
36 |
def generate(message, history, system, max_tokens, temperature, top_p, top_k, penalty):
|