Spaces:
Runtime error
Runtime error
mdacampora
commited on
Commit
•
f375845
1
Parent(s):
14d5482
Update app.py
Browse files
app.py
CHANGED
@@ -17,47 +17,31 @@ model = PeftModel.from_pretrained(model, peft_model_id)
|
|
17 |
|
18 |
|
19 |
def make_inference(conversation):
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
# Initialize an empty list to hold the updated conversation
|
24 |
-
updated_conversation = []
|
25 |
-
|
26 |
-
# Generate a response for each turn in the conversation
|
27 |
-
for i, turn in enumerate(turns):
|
28 |
-
if i % 2 == 0:
|
29 |
-
# If the turn is from the customer, append it to the updated conversation
|
30 |
-
updated_conversation.append(f"{turn}\n")
|
31 |
-
else:
|
32 |
-
# If the turn is from the chatbot, generate a response and append it to the updated conversation
|
33 |
-
batch = tokenizer(
|
34 |
-
f"{turn}",
|
35 |
-
return_tensors="pt",
|
36 |
-
max_length=1024,
|
37 |
-
truncation=True,
|
38 |
-
padding="max_length",
|
39 |
-
add_special_tokens=True,
|
40 |
-
history=updated_conversation[-2:],
|
41 |
-
)
|
42 |
-
|
43 |
-
with torch.cuda.amp.autocast():
|
44 |
-
output_tokens = model.generate(
|
45 |
-
**batch, max_new_tokens=1024, do_sample=True
|
46 |
-
)
|
47 |
-
|
48 |
-
response = tokenizer.decode(
|
49 |
-
output_tokens[0], skip_special_tokens=True
|
50 |
-
)
|
51 |
-
|
52 |
-
updated_conversation.append(f"{turn}\n\n{response}\n")
|
53 |
-
|
54 |
-
# Join the updated conversation into a single string
|
55 |
-
updated_conversation = "".join(updated_conversation)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return updated_conversation
|
58 |
|
59 |
|
60 |
|
|
|
61 |
# def make_inference(conversation):
|
62 |
# conversation_history = conversation
|
63 |
# response = ""
|
|
|
17 |
|
18 |
|
19 |
def make_inference(conversation):
|
20 |
+
# split the conversation by newlines and remove empty strings
|
21 |
+
context = list(filter(None, conversation.split("\n")))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# concatenate the context into a single string with separator "\n"
|
24 |
+
context_str = "\n".join(context)
|
25 |
+
|
26 |
+
# generate response from the model
|
27 |
+
batch = tokenizer(
|
28 |
+
f"### Problem:\n{context_str}\n",
|
29 |
+
return_tensors="pt",
|
30 |
+
)
|
31 |
+
|
32 |
+
with torch.cuda.amp.autocast():
|
33 |
+
output_tokens = model.generate(**batch, max_new_tokens=50)
|
34 |
+
|
35 |
+
response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
36 |
+
|
37 |
+
# update conversation history with bot's response
|
38 |
+
updated_conversation = f"{context_str}\n{response}"
|
39 |
+
|
40 |
return updated_conversation
|
41 |
|
42 |
|
43 |
|
44 |
+
|
45 |
# def make_inference(conversation):
|
46 |
# conversation_history = conversation
|
47 |
# response = ""
|