VenkateshRoshan commited on
Commit
49130e6
·
1 Parent(s): 7115bd6

instance_type updated

Browse files
Files changed (2) hide show
  1. app.py +3 -42
  2. src/deploy_sagemaker.py +1 -1
app.py CHANGED
@@ -85,6 +85,7 @@ class CustomerSupportBot:
85
  }
86
  return usage
87
 
 
88
  def create_chat_interface():
89
  bot = CustomerSupportBot(model_path="/app/models")
90
 
@@ -163,53 +164,13 @@ def create_chat_interface():
163
  # Add keyboard shortcut for submit
164
  msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
165
 
166
- # Add health check endpoint
167
- @interface.route("/ping", methods=["GET"])
168
- def ping():
169
- try:
170
- # Check if model and tokenizer are loaded
171
- if not hasattr(bot, 'model') or not hasattr(bot, 'tokenizer'):
172
- return {"status": "unhealthy", "reason": "Model or tokenizer not loaded"}, 503
173
-
174
- # Check if CUDA is available and model is on the correct device
175
- if torch.cuda.is_available():
176
- if not bot.model.device.type == 'cuda':
177
- return {"status": "unhealthy", "reason": "Model not on GPU"}, 503
178
-
179
- # Check memory usage
180
- usage = bot.monitor_resources()
181
- if usage["RAM (GB)"] > 30: # Example threshold
182
- return {"status": "unhealthy", "reason": "High memory usage"}, 503
183
-
184
- # Try a quick model inference to ensure it's working
185
- try:
186
- test_response = bot.generate_response("Test message")
187
- if test_response.startswith("An error occurred"):
188
- return {"status": "unhealthy", "reason": "Model inference failed"}, 503
189
- except Exception as e:
190
- return {"status": "unhealthy", "reason": f"Model inference error: {str(e)}"}, 503
191
-
192
- return {
193
- "status": "healthy",
194
- "model_loaded": True,
195
- "device": bot.device,
196
- "resources": usage
197
- }
198
- except Exception as e:
199
- return {"status": "unhealthy", "reason": str(e)}, 503
200
-
201
- # Add secondary health endpoint
202
- @interface.route("/health", methods=["GET"])
203
- def health():
204
- return {"status": "healthy"}
205
-
206
  return interface
207
 
208
  if __name__ == "__main__":
209
  demo = create_chat_interface()
210
  demo.launch(
211
- share=False,
212
  server_name="0.0.0.0", # Makes the server accessible from other machines
213
  server_port=7860, # Specify the port
214
  debug=True
215
- )
 
85
  }
86
  return usage
87
 
88
+
89
  def create_chat_interface():
90
  bot = CustomerSupportBot(model_path="/app/models")
91
 
 
164
  # Add keyboard shortcut for submit
165
  msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  return interface
168
 
169
  if __name__ == "__main__":
170
  demo = create_chat_interface()
171
  demo.launch(
172
+ share=True,
173
  server_name="0.0.0.0", # Makes the server accessible from other machines
174
  server_port=7860, # Specify the port
175
  debug=True
176
+ )
src/deploy_sagemaker.py CHANGED
@@ -38,7 +38,7 @@ def deploy_app(acc_id, region_name, role_arn, ecr_repo_name, endpoint_name="cust
38
  logger.info(f"Starting deployment of Gradio app to SageMaker endpoint {endpoint_name}...")
39
  predictor = model.deploy(
40
  initial_instance_count=1,
41
- instance_type="ml.g4dn.2xlarge",
42
  endpoint_name=endpoint_name
43
  )
44
  logger.info(f"Gradio app deployed successfully to endpoint: {endpoint_name}")
 
38
  logger.info(f"Starting deployment of Gradio app to SageMaker endpoint {endpoint_name}...")
39
  predictor = model.deploy(
40
  initial_instance_count=1,
41
+ instance_type="ml.g4dn.xlarge",
42
  endpoint_name=endpoint_name
43
  )
44
  logger.info(f"Gradio app deployed successfully to endpoint: {endpoint_name}")