Spaces:

rodrisouza
/

demo-chatbot-v3

Configuration error

rodrisouza commited on Aug 14, 2024

Commit

1f73e37

verified ·

1 Parent(s): 5be5f75

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,8 +102,9 @@ def interact(user_input, history, interaction_count, model_name):
         # Determine the device to use (either CUDA if available, or CPU)
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Ensure the model is on the correct device
-        model.to(device)
         if interaction_count >= MAX_INTERACTIONS:
             user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
@@ -117,7 +118,7 @@ def interact(user_input, history, interaction_count, model_name):
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        # Move input tensor to the same device as the model
         input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
         chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
         response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
@@ -137,6 +138,7 @@ def interact(user_input, history, interaction_count, model_name):
         print(f"Error during interaction: {e}")
         raise gr.Error(f"An error occurred during interaction: {str(e)}")
 # Function to send selected story and initial message
 def send_selected_story(title, model_name, system_prompt):
     global chat_history

         # Determine the device to use (either CUDA if available, or CPU)
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Only move the model to the device if it's not a quantized model
+        if model_name not in quantized_models:
+            model = model.to(device)
         if interaction_count >= MAX_INTERACTIONS:
             user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Move input tensor to the correct device
         input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
         chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
         response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
         print(f"Error during interaction: {e}")
         raise gr.Error(f"An error occurred during interaction: {str(e)}")
 # Function to send selected story and initial message
 def send_selected_story(title, model_name, system_prompt):
     global chat_history