Spaces:

rodrisouza
/

demo-chatbot-v3

Configuration error

App Files Files Community

rodrisouza commited on Aug 14, 2024

Commit

f9160fd

verified ·

1 Parent(s): eabbb32

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -8

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import pandas as pd
 from datetime import datetime, timedelta, timezone
 import torch
-from config import hugging_face_token, init_google_sheets_client, models, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
 import spaces
 # Hack for ZeroGPU
@@ -48,14 +48,28 @@ def load_model(model_name):
             del model
             torch.cuda.empty_cache()
-        tokenizer = AutoTokenizer.from_pretrained(models[model_name], padding_side='left', token=hugging_face_token, trust_remote_code=True)
         # Ensure the padding token is set
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
             tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
-        model = AutoModelForCausalLM.from_pretrained(models[model_name], token=hugging_face_token, trust_remote_code=True).to("cuda")
         selected_model = model_name
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
@@ -70,12 +84,18 @@ chat_history = []
 # Function to handle interaction with model
 @spaces.GPU
-def interact(user_input, history, interaction_count):
     global tokenizer, model
     try:
         if tokenizer is None or model is None:
             raise ValueError("Tokenizer or model is not initialized.")
         if interaction_count >= MAX_INTERACTIONS:
             user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
@@ -88,8 +108,8 @@ def interact(user_input, history, interaction_count):
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        # Generate response using selected model
-        input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to("cuda")
         chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
         response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
@@ -131,7 +151,7 @@ Here is the story:
                 chat_history = []  # Reset chat history
                 chat_history.append({"role": "system", "content": combined_message})
                 question_prompt = "Please ask a simple question about the story to encourage interaction."
-                _, formatted_history, chat_history, interaction_count = interact(question_prompt, chat_history, interaction_count)
                 return formatted_history, chat_history, gr.update(value=[]), story["story"]
             else:
@@ -182,6 +202,9 @@ def load_user_guide():
     with open('user_guide.txt', 'r') as file:
         return file.read()
 # Create the chat interface using Gradio Blocks
 with gr.Blocks() as demo:
     with gr.Tabs():
@@ -190,7 +213,7 @@ with gr.Blocks() as demo:
             gr.Markdown("## Context")
             with gr.Group():
-                model_dropdown = gr.Dropdown(choices=list(models.keys()), label="Select Model", value=selected_model)
                 user_dropdown = gr.Dropdown(choices=user_names, label="Select User Name")
                 initial_story = stories[0]["title"] if stories else None
                 story_dropdown = gr.Dropdown(choices=[story["title"] for story in stories], label="Select Story", value=initial_story)

 import pandas as pd
 from datetime import datetime, timedelta, timezone
 import torch
+from config import hugging_face_token, init_google_sheets_client, models, quantized_models, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
 import spaces
 # Hack for ZeroGPU
             del model
             torch.cuda.empty_cache()
+        tokenizer = AutoTokenizer.from_pretrained(
+            models[model_name],
+            padding_side='left',
+            token=hugging_face_token,
+            trust_remote_code=True
+        )
         # Ensure the padding token is set
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
             tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
+        model = AutoModelForCausalLM.from_pretrained(
+            models[model_name],
+            token=hugging_face_token,
+            trust_remote_code=True
+        )
+        # Only move to CUDA if it's not a quantized model
+        if model_name not in quantized_models:
+            model = model.to("cuda")
         selected_model = model_name
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
 # Function to handle interaction with model
 @spaces.GPU
+def interact(user_input, history, interaction_count, model_name):
     global tokenizer, model
     try:
         if tokenizer is None or model is None:
             raise ValueError("Tokenizer or model is not initialized.")
+        # Determine the device to use (either CUDA if available, or CPU)
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Ensure the model is on the correct device
+        model.to(device)
         if interaction_count >= MAX_INTERACTIONS:
             user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Move input tensor to the same device as the model
+        input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
         chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
         response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
                 chat_history = []  # Reset chat history
                 chat_history.append({"role": "system", "content": combined_message})
                 question_prompt = "Please ask a simple question about the story to encourage interaction."
+                _, formatted_history, chat_history, interaction_count = interact(question_prompt, chat_history, interaction_count, model_name)
                 return formatted_history, chat_history, gr.update(value=[]), story["story"]
             else:
     with open('user_guide.txt', 'r') as file:
         return file.read()
+# Combine both model dictionaries
+all_models = {**models, **quantized_models}
 # Create the chat interface using Gradio Blocks
 with gr.Blocks() as demo:
     with gr.Tabs():
             gr.Markdown("## Context")
             with gr.Group():
+                model_dropdown = gr.Dropdown(choices=list(all_models.keys()), label="Select Model", value=default_model_name)
                 user_dropdown = gr.Dropdown(choices=user_names, label="Select User Name")
                 initial_story = stories[0]["title"] if stories else None
                 story_dropdown = gr.Dropdown(choices=[story["title"] for story in stories], label="Select Story", value=initial_story)