redael commited on
Commit
6f1cdd7
·
verified ·
1 Parent(s): 6765159

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -14,7 +14,7 @@ model.to(device)
14
  if device.type == 'cuda':
15
  model = model.half() # Use FP16 precision
16
 
17
- def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=1.0):
18
  # Prepare the prompt
19
  prompt = f"User: {prompt}\nAssistant:"
20
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
@@ -26,7 +26,7 @@ def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, tem
26
  num_beams=num_beams, # Use a lower number of beams
27
  temperature=temperature,
28
  top_p=top_p,
29
- repetition_penalty=repetition_penalty,
30
  early_stopping=True
31
  )
32
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -50,7 +50,7 @@ def respond(message, history: list[tuple[str, str]]):
50
  conversation += f"User: {message}\nAssistant:"
51
 
52
  # Fixed values for generation parameters
53
- max_tokens = 100 # Reduce max tokens if possible
54
  temperature = 0.7
55
  top_p = 0.9
56
 
 
14
  if device.type == 'cuda':
15
  model = model.half() # Use FP16 precision
16
 
17
+ def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
18
  # Prepare the prompt
19
  prompt = f"User: {prompt}\nAssistant:"
20
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
 
26
  num_beams=num_beams, # Use a lower number of beams
27
  temperature=temperature,
28
  top_p=top_p,
29
+ repetition_penalty=repetition_penalty, # Increased repetition penalty
30
  early_stopping=True
31
  )
32
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
50
  conversation += f"User: {message}\nAssistant:"
51
 
52
  # Fixed values for generation parameters
53
+ max_tokens = 100 # Adjusted max tokens
54
  temperature = 0.7
55
  top_p = 0.9
56