mdacampora commited on
Commit
67a414c
·
1 Parent(s): 285c2de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -16,30 +16,18 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
16
  model = PeftModel.from_pretrained(model, peft_model_id)
17
 
18
 
19
- # def make_inference(problem, transcript):
20
- # batch = tokenizer(
21
- # f"### Problem:\n{problem}: \n\n### Transcript:",
22
- # return_tensors="pt",
23
- # )
24
-
25
- # with torch.cuda.amp.autocast():
26
- # output_tokens = model.generate(**batch, max_new_tokens=50)
27
-
28
- # return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
29
-
30
- def make_inference(conversation):
31
- batch = tokenizer(
32
- f"User:{conversation}\n",
33
- return_tensors="pt",
34
- )
35
 
 
 
 
 
 
 
36
  with torch.cuda.amp.autocast():
37
  output_tokens = model.generate(**batch, max_new_tokens=50)
38
-
39
  response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
40
-
41
- #updated_conversation = f"{conversation}\n\n{response}"
42
- return response
43
 
44
 
45
  if __name__ == "__main__":
 
16
  model = PeftModel.from_pretrained(model, peft_model_id)
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ def make_inference(conversations):
21
+ context = ""
22
+ for conversation in conversations:
23
+ context += f"{conversation}\n\n"
24
+ prompt = f"### Conversation:\n{context}"
25
+ batch = tokenizer(prompt, return_tensors="pt")
26
  with torch.cuda.amp.autocast():
27
  output_tokens = model.generate(**batch, max_new_tokens=50)
 
28
  response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
29
+ updated_conversation = f"{context}\n{response}"
30
+ return updated_conversation
 
31
 
32
 
33
  if __name__ == "__main__":