mdacampora commited on
Commit
b2eedf8
·
1 Parent(s): acb3080

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -16,23 +16,34 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
16
  model = PeftModel.from_pretrained(model, peft_model_id)
17
 
18
 
 
 
 
 
 
19
 
20
- def make_inference(conversation):
21
- conversation_history = conversation
22
- response = ""
23
- while True:
24
- batch = tokenizer(
25
- f"### Problem:\n{conversation_history}\n{response}",
26
- return_tensors="pt",
27
- )
28
- with torch.cuda.amp.autocast():
29
- output_tokens = model.generate(**batch, max_new_tokens=50)
30
- new_response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
31
- if new_response.strip() == "":
32
- break
33
- response = f"\n{new_response}"
34
- conversation_history += response
35
- return conversation_history
 
 
 
 
 
 
36
 
37
 
38
  if __name__ == "__main__":
 
16
  model = PeftModel.from_pretrained(model, peft_model_id)
17
 
18
 
19
+ def make_inference(problem):
20
+ batch = tokenizer(
21
+ f"User: {product_name} ,
22
+ return_tensors="pt",
23
+ )
24
 
25
+ with torch.cuda.amp.autocast():
26
+ output_tokens = model.generate(**batch, max_new_tokens=50)
27
+
28
+ return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
29
+
30
+
31
+ # def make_inference(conversation):
32
+ # conversation_history = conversation
33
+ # response = ""
34
+ # while True:
35
+ # batch = tokenizer(
36
+ # f"### Problem:\n{conversation_history}\n{response}",
37
+ # return_tensors="pt",
38
+ # )
39
+ # with torch.cuda.amp.autocast():
40
+ # output_tokens = model.generate(**batch, max_new_tokens=50)
41
+ # new_response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
42
+ # if new_response.strip() == "":
43
+ # break
44
+ # response = f"\n{new_response}"
45
+ # conversation_history += response
46
+ # return conversation_history
47
 
48
 
49
  if __name__ == "__main__":