nafisneehal commited on
Commit
30332c0
·
verified ·
1 Parent(s): e380cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -31,7 +31,8 @@ class ModelManager:
31
  self.current_model = None
32
  self.current_tokenizer = None
33
  self.current_model_name = None
34
- self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
35
 
36
  def load_model(self, model_name):
37
  """Load model and free previous model's memory"""
@@ -46,16 +47,17 @@ class ModelManager:
46
  model_name,
47
  load_in_4bit=False,
48
  torch_dtype=torch.bfloat16,
49
- device_map={"": self.device} # Changed this line
50
- ).to(self.device) # Added explicit device movement
51
  self.current_model_name = model_name
52
- return f"Successfully loaded model: {model_name} on {self.device}"
53
  except Exception as e:
54
  return f"Error loading model: {str(e)}"
55
 
56
  def generate(self, prompt):
57
  """Helper method for generation"""
58
- inputs = self.current_tokenizer(prompt, return_tensors="pt").to(self.device)
 
59
  return inputs
60
 
61
 
@@ -79,7 +81,7 @@ Each response should be formatted as follows:
79
  }
80
  Ensure EVERY response strictly follows this JSON structure."""
81
 
82
- @spaces.GPU
83
  def generate_response(model_name, system_instruction, user_input):
84
  """Generate response with GPU support and JSON formatting"""
85
  if model_manager.current_model_name != model_name:
@@ -88,7 +90,6 @@ def generate_response(model_name, system_instruction, user_input):
88
  if model_manager.current_model is None:
89
  return json.dumps({"error": "No model loaded. Please load a model first."}, indent=2)
90
 
91
- # Prepare the prompt with explicit JSON formatting
92
  prompt = f"""### Instruction:
93
  {system_instruction}
94
  Remember to ALWAYS format your response as valid JSON.
@@ -98,7 +99,6 @@ Remember to ALWAYS format your response as valid JSON.
98
  {{"""
99
 
100
  try:
101
- # Get tokenized inputs using helper method
102
  inputs = model_manager.generate(prompt)
103
 
104
  meta_config = {
@@ -112,15 +112,14 @@ Remember to ALWAYS format your response as valid JSON.
112
  }
113
  generation_config = GenerationConfig(**meta_config)
114
 
115
- # Generate response
116
  with torch.no_grad():
117
  outputs = model_manager.current_model.generate(
118
  **inputs,
119
  generation_config=generation_config
120
- ).to(model_manager.device) # Ensure outputs are on correct device
121
 
122
  decoded_output = model_manager.current_tokenizer.batch_decode(
123
- outputs.to(model_manager.device),
124
  skip_special_tokens=True
125
  )[0]
126
 
@@ -140,8 +139,7 @@ Remember to ALWAYS format your response as valid JSON.
140
  except Exception as e:
141
  return json.dumps({
142
  "error": f"Error generating response: {str(e)}",
143
- "details": "An unexpected error occurred during generation",
144
- "device_info": f"Model device: {model_manager.device}, Input device: {inputs.input_ids.device if inputs else 'unknown'}"
145
  }, indent=2)
146
 
147
 
 
31
  self.current_model = None
32
  self.current_tokenizer = None
33
  self.current_model_name = None
34
+ # Don't initialize CUDA in __init__
35
+ self.device = None
36
 
37
  def load_model(self, model_name):
38
  """Load model and free previous model's memory"""
 
47
  model_name,
48
  load_in_4bit=False,
49
  torch_dtype=torch.bfloat16,
50
+ device_map="auto" # Let the model decide device mapping
51
+ )
52
  self.current_model_name = model_name
53
+ return f"Successfully loaded model: {model_name}"
54
  except Exception as e:
55
  return f"Error loading model: {str(e)}"
56
 
57
  def generate(self, prompt):
58
  """Helper method for generation"""
59
+ inputs = self.current_tokenizer(prompt, return_tensors="pt")
60
+ # Let device mapping happen automatically
61
  return inputs
62
 
63
 
 
81
  }
82
  Ensure EVERY response strictly follows this JSON structure."""
83
 
84
+ @spaces.GPU # This decorator handles the GPU allocation
85
  def generate_response(model_name, system_instruction, user_input):
86
  """Generate response with GPU support and JSON formatting"""
87
  if model_manager.current_model_name != model_name:
 
90
  if model_manager.current_model is None:
91
  return json.dumps({"error": "No model loaded. Please load a model first."}, indent=2)
92
 
 
93
  prompt = f"""### Instruction:
94
  {system_instruction}
95
  Remember to ALWAYS format your response as valid JSON.
 
99
  {{"""
100
 
101
  try:
 
102
  inputs = model_manager.generate(prompt)
103
 
104
  meta_config = {
 
112
  }
113
  generation_config = GenerationConfig(**meta_config)
114
 
 
115
  with torch.no_grad():
116
  outputs = model_manager.current_model.generate(
117
  **inputs,
118
  generation_config=generation_config
119
+ )
120
 
121
  decoded_output = model_manager.current_tokenizer.batch_decode(
122
+ outputs,
123
  skip_special_tokens=True
124
  )[0]
125
 
 
139
  except Exception as e:
140
  return json.dumps({
141
  "error": f"Error generating response: {str(e)}",
142
+ "details": "An unexpected error occurred during generation"
 
143
  }, indent=2)
144
 
145