vilarin commited on
Commit
98ca206
·
verified ·
1 Parent(s): 8b4873c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -30,11 +30,7 @@ footer {
30
  # Initialize the tokenizer
31
  tokenizer = AutoTokenizer.from_pretrained(model)
32
 
33
- # Pass the default decoding hyperparameters of Qwen2-7B-Instruct
34
- # max_tokens is for the maximum length for generation.
35
-
36
- # Input the model name or path. Can be GPTQ or AWQ models.
37
- llm = LLM(model=model, kv_cache_dtype="fp8_e5m2")
38
 
39
  @spaces.GPU
40
  def generate(message, history, system, max_tokens, temperature, top_p, top_k, penalty):
 
30
  # Initialize the tokenizer
31
  tokenizer = AutoTokenizer.from_pretrained(model)
32
 
33
+ llm = LLM(model=model)
 
 
 
 
34
 
35
  @spaces.GPU
36
  def generate(message, history, system, max_tokens, temperature, top_p, top_k, penalty):