Spaces:

pseudolab
/

GaiaMiniMed_ChatWithFalcon

Runtime error

Tonic commited on Oct 28, 2023

Commit

06271b6

1 Parent(s): 6ab3fad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,23 +54,23 @@ class FalconChatBot:
         def predict(self, system_prompt, user_message, assistant_message, history, temperature, max_new_tokens, top_p, repetition_penalty):
         # Process the history to remove special commands
-        processed_history = self.process_history(history)
         # Combine the user and assistant messages into a conversation
-        conversation = f"{system_prompt}\nFalcon: {assistant_message if assistant_message else ''} User: {user_message}\nFalcon:\n"
         # Encode the conversation using the tokenizer
-        input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         # Generate a response using the Falcon model
-        response_text = peft_model.generate(input_ids=input_ids, max_length=max_length, use_cache=True, early_stopping=True, bos_token_id=peft_model.config.bos_token_id, eos_token_id=peft_model.config.eos_token_id, pad_token_id=peft_model.config.eos_token_id, temperature=0.4, do_sample=True)
         # Generate the formatted conversation in Falcon message format
-        conversation = f"{system_prompt}\n"
-        for message in processed_history:
-            user_message = message["user"]
-            assistant_message = message["assistant"]
-            conversation += f"Falcon:{' ' + assistant_message if assistant_message else ''} User: {user_message}\n Falcon:\n"
         return response_text

         def predict(self, system_prompt, user_message, assistant_message, history, temperature, max_new_tokens, top_p, repetition_penalty):
         # Process the history to remove special commands
+            processed_history = self.process_history(history)
         # Combine the user and assistant messages into a conversation
+            conversation = f"{system_prompt}\nFalcon: {assistant_message if assistant_message else ''} User: {user_message}\nFalcon:\n"
         # Encode the conversation using the tokenizer
+            input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         # Generate a response using the Falcon model
+            response_text = peft_model.generate(input_ids=input_ids, max_length=max_length, use_cache=True, early_stopping=True, bos_token_id=peft_model.config.bos_token_id, eos_token_id=peft_model.config.eos_token_id, pad_token_id=peft_model.config.eos_token_id, temperature=0.4, do_sample=True)
         # Generate the formatted conversation in Falcon message format
+            conversation = f"{system_prompt}\n"
+            for message in processed_history:
+                user_message = message["user"]
+                assistant_message = message["assistant"]
+                conversation += f"Falcon:{' ' + assistant_message if assistant_message else ''} User: {user_message}\n Falcon:\n"
         return response_text