Spaces:

mdacampora
/

tax-convos-demo

Runtime error

App Files Files Community

mdacampora commited on Apr 13, 2023

Commit

f375845

1 Parent(s): 14d5482

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -36

app.py CHANGED Viewed

@@ -17,47 +17,31 @@ model = PeftModel.from_pretrained(model, peft_model_id)
 def make_inference(conversation):
-    # Split the conversation into individual turns
-    turns = conversation.split("\n\n")
-    # Initialize an empty list to hold the updated conversation
-    updated_conversation = []
-    # Generate a response for each turn in the conversation
-    for i, turn in enumerate(turns):
-        if i % 2 == 0:
-            # If the turn is from the customer, append it to the updated conversation
-            updated_conversation.append(f"{turn}\n")
-        else:
-            # If the turn is from the chatbot, generate a response and append it to the updated conversation
-            batch = tokenizer(
-                f"{turn}",
-                return_tensors="pt",
-                max_length=1024,
-                truncation=True,
-                padding="max_length",
-                add_special_tokens=True,
-                history=updated_conversation[-2:],
-            )
-            with torch.cuda.amp.autocast():
-                output_tokens = model.generate(
-                    **batch, max_new_tokens=1024, do_sample=True
-                )
-            response = tokenizer.decode(
-                output_tokens[0], skip_special_tokens=True
-            )
-            updated_conversation.append(f"{turn}\n\n{response}\n")
-    # Join the updated conversation into a single string
-    updated_conversation = "".join(updated_conversation)
     return updated_conversation
 # def make_inference(conversation):
 #     conversation_history = conversation
 #     response = ""

 def make_inference(conversation):
+    # split the conversation by newlines and remove empty strings
+    context = list(filter(None, conversation.split("\n")))
+    # concatenate the context into a single string with separator "\n"
+    context_str = "\n".join(context)
+    # generate response from the model
+    batch = tokenizer(
+        f"### Problem:\n{context_str}\n",
+        return_tensors="pt",
+    )
+    with torch.cuda.amp.autocast():
+        output_tokens = model.generate(**batch, max_new_tokens=50)
+    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
+    # update conversation history with bot's response
+    updated_conversation = f"{context_str}\n{response}"
     return updated_conversation
 # def make_inference(conversation):
 #     conversation_history = conversation
 #     response = ""