Spaces:

abven
/

Customer-Support-Chatbot

Runtime error

App Files Files Community

VenkateshRoshan commited on Nov 10, 2024

Commit

e1d0160

1 Parent(s): aff35cb

deployment and app files updated

Browse files

Files changed (4) hide show

app.py +43 -28
requirements.txt +4 -4
src/config.py +59 -0
src/deploy_sagemaker.py +91 -0

app.py CHANGED Viewed

@@ -1,38 +1,55 @@
 import psutil
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import os
 from typing import List, Tuple
 class CustomerSupportBot:
-    def __init__(self, model_path="models/customer_support_gpt"):
         self.process = psutil.Process(os.getpid())
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-        self.model = AutoModelForCausalLM.from_pretrained(model_path)
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = self.model.to(self.device)
     def generate_response(self, message: str) -> str:
         try:
             input_text = f"Instruction: {message}\nResponse:"
-            inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    **inputs,
-                    max_length=50,
-                    temperature=0.7,
-                    num_return_sequences=1,
-                    pad_token_id=self.tokenizer.pad_token_id,
-                    eos_token_id=self.tokenizer.eos_token_id,
-                    do_sample=True,
-                    top_p=0.95,
-                    top_k=50
-                )
-            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            return response.split("Response:")[-1].strip()
         except Exception as e:
             return f"An error occurred: {str(e)}"
@@ -41,8 +58,6 @@ class CustomerSupportBot:
             "CPU (%)": self.process.cpu_percent(interval=1),
             "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
         }
-        if torch.cuda.is_available():
-            usage["GPU (GB)"] = torch.cuda.memory_allocated(0) / (1024 ** 3)
         return usage
 def create_chat_interface():
@@ -77,7 +92,7 @@ def create_chat_interface():
         chatbot = gr.Chatbot(
             label="Chat History",
-            height=400,
             elem_classes="message-box"
         )
@@ -128,8 +143,8 @@ def create_chat_interface():
 if __name__ == "__main__":
     demo = create_chat_interface()
     demo.launch(
-        share=False,
         server_name="0.0.0.0",  # Makes the server accessible from other machines
         server_port=7860,  # Specify the port
         debug=True
-    )

+import json  # Add this import
 import psutil
 import torch
+import boto3
+from transformers import AutoTokenizer
 import gradio as gr
 import os
 from typing import List, Tuple
 class CustomerSupportBot:
+    def __init__(self, endpoint_name="customer-support-gpt-2024-11-10-00-30-03-555"):
         self.process = psutil.Process(os.getpid())
+        model_name = "EleutherAI/gpt-neo-125M"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # self.tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Use the tokenizer appropriate to your model
+        self.endpoint_name = endpoint_name
+        self.sagemaker_runtime = boto3.client('runtime.sagemaker')
     def generate_response(self, message: str) -> str:
         try:
             input_text = f"Instruction: {message}\nResponse:"
+            # Prepare payload for SageMaker endpoint
+            payload = {
+                # "inputs": inputs['input_ids'].tolist()[0],
+                'inputs': input_text,
+                # You can include other parameters if needed (e.g., attention_mask)
+            }
+            print(f'Payload: {payload}')
+            # Convert the payload to a JSON string before sending
+            json_payload = json.dumps(payload)  # Use json.dumps() to serialize the payload
+            print(f'JSON Payload: {json_payload}')
+            # Call the SageMaker endpoint for inference
+            response = self.sagemaker_runtime.invoke_endpoint(
+                EndpointName=self.endpoint_name,
+                ContentType='application/json',
+                Body=json_payload  # Send the JSON string here
+            )
+            print(f'Response: {response}')
+            # Process the response
+            result = response['Body'].read().decode('utf-8')
+            parsed_result = json.loads(result)
+            # Extract the generated text from the first element in the list
+            generated_text = parsed_result[0]['generated_text']
+            # Split the string to get the response part after 'Response:'
+            response = generated_text.split('Response:')[1].strip()
+            # return the extracted response
+            return response
         except Exception as e:
             return f"An error occurred: {str(e)}"
             "CPU (%)": self.process.cpu_percent(interval=1),
             "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
         }
         return usage
 def create_chat_interface():
         chatbot = gr.Chatbot(
             label="Chat History",
+            height=500,
             elem_classes="message-box"
         )
 if __name__ == "__main__":
     demo = create_chat_interface()
     demo.launch(
+        share=True,
         server_name="0.0.0.0",  # Makes the server accessible from other machines
         server_port=7860,  # Specify the port
         debug=True
+    )

requirements.txt CHANGED Viewed

@@ -1,10 +1,10 @@
-transformers
 torch
-fastapi
-uvicorn
 mlflow
 boto3
 pytest
 pydantic
 datasets
-psutil

+transformers=4.37
 torch
 mlflow
 boto3
 pytest
 pydantic
 datasets
+psutil
+sagemaker
+gradio

src/config.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from pathlib import Path
+import os
+from typing import Dict, Any
+class Config:
+    # Project structure
+    ROOT_DIR = Path(__file__).parent.parent
+    DATA_DIR = ROOT_DIR / "data"
+    RAW_DATA_DIR = DATA_DIR / "raw"
+    PROCESSED_DATA_DIR = DATA_DIR / "processed"
+    MODELS_DIR = ROOT_DIR / "models"
+    MODEL_PATH = MODELS_DIR / "customer_support_gpt"
+    # Model configurations
+    MODEL_NAME = "EleutherAI/gpt-neo-125M"
+    MAX_LENGTH = 256
+    # Training configurations
+    TRAIN_CONFIG: Dict[str, Any] = {
+        "batch_size": 4,
+        "learning_rate": 2e-5,
+        "epochs": 3,
+        "weight_decay": 0.01,
+        "max_length": MAX_LENGTH,
+    }
+    # Generation configurations
+    GENERATION_CONFIG: Dict[str, Any] = {
+        "max_length": 100,
+        "temperature": 0.7,
+        "top_p": 0.95,
+        "top_k": 50,
+        "do_sample": True
+    }
+    # Gradio configurations
+    GRADIO_CONFIG: Dict[str, Any] = {
+        "title": "Customer Support Chatbot",
+        "description": "Ask your questions to the customer support bot!",
+        "examples": [
+            "How do I reset my password?",
+            "What are your shipping policies?",
+            "I want to return a product."
+        ],
+        "share": False
+    }
+    # MLflow configurations
+    MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
+    EXPERIMENT_NAME = "customer-support-chatbot"
+    # AWS/SageMaker configurations
+    AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
+    S3_BUCKET = os.getenv("S3_BUCKET", "customer-support-chatbot")
+    SAGEMAKER_ROLE = os.getenv("SAGEMAKER_ROLE")
+    # DVC configurations
+    DVC_REMOTE_NAME = "s3-storage"
+    DVC_REMOTE_URL = f"s3://{S3_BUCKET}/dvc"

src/deploy_sagemaker.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import boto3
+from pathlib import Path
+import sagemaker
+from sagemaker.huggingface import HuggingFaceModel
+import transformers
+import torch
+import logging
+import tarfile
+import os
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def create_model_tar():
+    model_path = Path("models/customer_support_gpt")
+    tar_path = "model.tar.gz"
+    with tarfile.open(tar_path, "w:gz") as tar:
+        for file_path in model_path.glob("*"):
+            if file_path.is_file():
+                logger.info(f"Adding {file_path} to tar archive")
+                tar.add(file_path, arcname=file_path.name)
+    return tar_path
+try:
+    # Initialize s3 client
+    s3 = boto3.client("s3")
+    bucket_name = 'customer-support-gpt'
+    # Create and upload tar.gz
+    tar_path = create_model_tar()
+    s3_key = "models/model.tar.gz"  # Changed path
+    logger.info(f"Uploading model.tar.gz to s3://{bucket_name}/{s3_key}")
+    s3.upload_file(tar_path, bucket_name, s3_key)
+    # Initialize SageMaker session
+    sagemaker_session = sagemaker.Session()
+    role = 'arn:aws:iam::841162707028:role/service-role/AmazonSageMaker-ExecutionRole-20241109T160615'
+    # Verify IAM role
+    iam = boto3.client('iam')
+    try:
+        iam.get_role(RoleName=role.split('/')[-1])
+        logger.info(f"Successfully verified IAM role: {role}")
+    except iam.exceptions.NoSuchEntityException:
+        logger.error(f"IAM role not found: {role}")
+        raise
+    # Point to the tar.gz file
+    model_artifacts = f's3://{bucket_name}/{s3_key}'
+    print(f'Model artifacts: {model_artifacts}')
+    env = {
+        "model_path": "/opt/ml/model",
+        "max_length": "256",
+        "generation_config": '{"max_length":100,"temperature":0.7,"top_p":0.95,"top_k":50,"do_sample":true}'
+    }
+    try:
+        huggingface_model = HuggingFaceModel(
+            model_data=model_artifacts,
+            role=role,
+            transformers_version="4.37.0",  # Explicit version
+            pytorch_version="2.1.0",        # Matching your version
+            py_version="py310",             # Keep py310
+            env=env,
+            name="customer-support-gpt"
+        )
+        logger.info("Starting model deployment...")
+        predictor = huggingface_model.deploy(
+            initial_instance_count=1,
+            instance_type="ml.m5.xlarge",
+            wait=True
+        )
+        logger.info("Model deployed successfully!")
+    except Exception as e:
+        logger.error(f"Error during model deployment: {str(e)}")
+        raise
+except Exception as e:
+    logger.error(f"Deployment failed: {str(e)}")
+    raise
+finally:
+    # Clean up the local tar file
+    if os.path.exists(tar_path):
+        os.remove(tar_path)