mdacampora commited on
Commit
f375845
1 Parent(s): 14d5482

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -36
app.py CHANGED
@@ -17,47 +17,31 @@ model = PeftModel.from_pretrained(model, peft_model_id)
17
 
18
 
19
  def make_inference(conversation):
20
- # Split the conversation into individual turns
21
- turns = conversation.split("\n\n")
22
-
23
- # Initialize an empty list to hold the updated conversation
24
- updated_conversation = []
25
-
26
- # Generate a response for each turn in the conversation
27
- for i, turn in enumerate(turns):
28
- if i % 2 == 0:
29
- # If the turn is from the customer, append it to the updated conversation
30
- updated_conversation.append(f"{turn}\n")
31
- else:
32
- # If the turn is from the chatbot, generate a response and append it to the updated conversation
33
- batch = tokenizer(
34
- f"{turn}",
35
- return_tensors="pt",
36
- max_length=1024,
37
- truncation=True,
38
- padding="max_length",
39
- add_special_tokens=True,
40
- history=updated_conversation[-2:],
41
- )
42
-
43
- with torch.cuda.amp.autocast():
44
- output_tokens = model.generate(
45
- **batch, max_new_tokens=1024, do_sample=True
46
- )
47
-
48
- response = tokenizer.decode(
49
- output_tokens[0], skip_special_tokens=True
50
- )
51
-
52
- updated_conversation.append(f"{turn}\n\n{response}\n")
53
-
54
- # Join the updated conversation into a single string
55
- updated_conversation = "".join(updated_conversation)
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return updated_conversation
58
 
59
 
60
 
 
61
  # def make_inference(conversation):
62
  # conversation_history = conversation
63
  # response = ""
 
17
 
18
 
19
  def make_inference(conversation):
20
+ # split the conversation by newlines and remove empty strings
21
+ context = list(filter(None, conversation.split("\n")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # concatenate the context into a single string with separator "\n"
24
+ context_str = "\n".join(context)
25
+
26
+ # generate response from the model
27
+ batch = tokenizer(
28
+ f"### Problem:\n{context_str}\n",
29
+ return_tensors="pt",
30
+ )
31
+
32
+ with torch.cuda.amp.autocast():
33
+ output_tokens = model.generate(**batch, max_new_tokens=50)
34
+
35
+ response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
36
+
37
+ # update conversation history with bot's response
38
+ updated_conversation = f"{context_str}\n{response}"
39
+
40
  return updated_conversation
41
 
42
 
43
 
44
+
45
  # def make_inference(conversation):
46
  # conversation_history = conversation
47
  # response = ""