nafisneehal commited on
Commit
d0ef7ad
·
verified ·
1 Parent(s): aff925f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -93,11 +93,22 @@ def generate_response(system_instruction, user_input):
93
  )
94
  ], return_tensors = "pt").to("cuda")
95
 
 
 
 
 
 
 
 
 
 
 
 
96
  with torch.no_grad():
97
- outputs = model.generate(**inputs, max_new_tokens = 1000, use_cache = True)
98
 
99
  decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
100
- assistant_response = decoded_output
101
 
102
  # tokenizer.batch_decode(outputs)
103
  # # Generate model response
 
93
  )
94
  ], return_tensors = "pt").to("cuda")
95
 
96
+ meta_config = {
97
+ "do_sample": True,
98
+ "temperature": 0.1,
99
+ "top_p": 0.9,
100
+ "max_new_tokens": 256,
101
+ "repetition_penalty": 1.2,
102
+ "use_cache": True
103
+ }
104
+
105
+ generation_config = GenerationConfig(**meta_config)
106
+
107
  with torch.no_grad():
108
+ outputs = model.generate(**inputs, generation_config=generation_config)
109
 
110
  decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
111
+ assistant_response = decoded_output.split("### Response:")[-1].strip()
112
 
113
  # tokenizer.batch_decode(outputs)
114
  # # Generate model response