Spaces:

abven
/

Customer-Support-Chatbot

Runtime error

App Files Files Community

VenkateshRoshan commited on Nov 11, 2024

Commit

49130e6

1 Parent(s): 7115bd6

instance_type updated

Browse files

Files changed (2) hide show

app.py +3 -42
src/deploy_sagemaker.py +1 -1

app.py CHANGED Viewed

@@ -85,6 +85,7 @@ class CustomerSupportBot:
         }
         return usage
 def create_chat_interface():
     bot = CustomerSupportBot(model_path="/app/models")
@@ -163,53 +164,13 @@ def create_chat_interface():
         # Add keyboard shortcut for submit
         msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
-        # Add health check endpoint
-        @interface.route("/ping", methods=["GET"])
-        def ping():
-            try:
-                # Check if model and tokenizer are loaded
-                if not hasattr(bot, 'model') or not hasattr(bot, 'tokenizer'):
-                    return {"status": "unhealthy", "reason": "Model or tokenizer not loaded"}, 503
-                # Check if CUDA is available and model is on the correct device
-                if torch.cuda.is_available():
-                    if not bot.model.device.type == 'cuda':
-                        return {"status": "unhealthy", "reason": "Model not on GPU"}, 503
-                # Check memory usage
-                usage = bot.monitor_resources()
-                if usage["RAM (GB)"] > 30:  # Example threshold
-                    return {"status": "unhealthy", "reason": "High memory usage"}, 503
-                # Try a quick model inference to ensure it's working
-                try:
-                    test_response = bot.generate_response("Test message")
-                    if test_response.startswith("An error occurred"):
-                        return {"status": "unhealthy", "reason": "Model inference failed"}, 503
-                except Exception as e:
-                    return {"status": "unhealthy", "reason": f"Model inference error: {str(e)}"}, 503
-                return {
-                    "status": "healthy",
-                    "model_loaded": True,
-                    "device": bot.device,
-                    "resources": usage
-                }
-            except Exception as e:
-                return {"status": "unhealthy", "reason": str(e)}, 503
-        # Add secondary health endpoint
-        @interface.route("/health", methods=["GET"])
-        def health():
-            return {"status": "healthy"}
     return interface
 if __name__ == "__main__":
     demo = create_chat_interface()
     demo.launch(
-        share=False,
         server_name="0.0.0.0",  # Makes the server accessible from other machines
         server_port=7860,  # Specify the port
         debug=True
-    )

         }
         return usage
 def create_chat_interface():
     bot = CustomerSupportBot(model_path="/app/models")
         # Add keyboard shortcut for submit
         msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
     return interface
 if __name__ == "__main__":
     demo = create_chat_interface()
     demo.launch(
+        share=True,
         server_name="0.0.0.0",  # Makes the server accessible from other machines
         server_port=7860,  # Specify the port
         debug=True
+    )

src/deploy_sagemaker.py CHANGED Viewed

@@ -38,7 +38,7 @@ def deploy_app(acc_id, region_name, role_arn, ecr_repo_name, endpoint_name="cust
     logger.info(f"Starting deployment of Gradio app to SageMaker endpoint {endpoint_name}...")
     predictor = model.deploy(
         initial_instance_count=1,
-        instance_type="ml.g4dn.2xlarge",
         endpoint_name=endpoint_name
     )
     logger.info(f"Gradio app deployed successfully to endpoint: {endpoint_name}")

     logger.info(f"Starting deployment of Gradio app to SageMaker endpoint {endpoint_name}...")
     predictor = model.deploy(
         initial_instance_count=1,
+        instance_type="ml.g4dn.xlarge",
         endpoint_name=endpoint_name
     )
     logger.info(f"Gradio app deployed successfully to endpoint: {endpoint_name}")