VenkateshRoshan commited on
Commit
e1d0160
·
1 Parent(s): aff35cb

deployment and app files updated

Browse files
Files changed (4) hide show
  1. app.py +43 -28
  2. requirements.txt +4 -4
  3. src/config.py +59 -0
  4. src/deploy_sagemaker.py +91 -0
app.py CHANGED
@@ -1,38 +1,55 @@
 
1
  import psutil
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
  import gradio as gr
5
  import os
6
  from typing import List, Tuple
7
 
8
  class CustomerSupportBot:
9
- def __init__(self, model_path="models/customer_support_gpt"):
10
  self.process = psutil.Process(os.getpid())
11
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
12
- self.model = AutoModelForCausalLM.from_pretrained(model_path)
13
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
14
- self.model = self.model.to(self.device)
15
-
 
16
  def generate_response(self, message: str) -> str:
17
  try:
18
  input_text = f"Instruction: {message}\nResponse:"
19
- inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
20
 
21
- with torch.no_grad():
22
- outputs = self.model.generate(
23
- **inputs,
24
- max_length=50,
25
- temperature=0.7,
26
- num_return_sequences=1,
27
- pad_token_id=self.tokenizer.pad_token_id,
28
- eos_token_id=self.tokenizer.eos_token_id,
29
- do_sample=True,
30
- top_p=0.95,
31
- top_k=50
32
- )
33
-
34
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
35
- return response.split("Response:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  except Exception as e:
37
  return f"An error occurred: {str(e)}"
38
 
@@ -41,8 +58,6 @@ class CustomerSupportBot:
41
  "CPU (%)": self.process.cpu_percent(interval=1),
42
  "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
43
  }
44
- if torch.cuda.is_available():
45
- usage["GPU (GB)"] = torch.cuda.memory_allocated(0) / (1024 ** 3)
46
  return usage
47
 
48
  def create_chat_interface():
@@ -77,7 +92,7 @@ def create_chat_interface():
77
 
78
  chatbot = gr.Chatbot(
79
  label="Chat History",
80
- height=400,
81
  elem_classes="message-box"
82
  )
83
 
@@ -128,8 +143,8 @@ def create_chat_interface():
128
  if __name__ == "__main__":
129
  demo = create_chat_interface()
130
  demo.launch(
131
- share=False,
132
  server_name="0.0.0.0", # Makes the server accessible from other machines
133
  server_port=7860, # Specify the port
134
  debug=True
135
- )
 
1
+ import json # Add this import
2
  import psutil
3
  import torch
4
+ import boto3
5
+ from transformers import AutoTokenizer
6
  import gradio as gr
7
  import os
8
  from typing import List, Tuple
9
 
10
  class CustomerSupportBot:
11
+ def __init__(self, endpoint_name="customer-support-gpt-2024-11-10-00-30-03-555"):
12
  self.process = psutil.Process(os.getpid())
13
+ model_name = "EleutherAI/gpt-neo-125M"
14
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ # self.tokenizer = AutoTokenizer.from_pretrained("gpt2") # Use the tokenizer appropriate to your model
16
+ self.endpoint_name = endpoint_name
17
+ self.sagemaker_runtime = boto3.client('runtime.sagemaker')
18
+
19
  def generate_response(self, message: str) -> str:
20
  try:
21
  input_text = f"Instruction: {message}\nResponse:"
 
22
 
23
+ # Prepare payload for SageMaker endpoint
24
+ payload = {
25
+ # "inputs": inputs['input_ids'].tolist()[0],
26
+ 'inputs': input_text,
27
+ # You can include other parameters if needed (e.g., attention_mask)
28
+ }
29
+ print(f'Payload: {payload}')
30
+ # Convert the payload to a JSON string before sending
31
+ json_payload = json.dumps(payload) # Use json.dumps() to serialize the payload
32
+ print(f'JSON Payload: {json_payload}')
33
+ # Call the SageMaker endpoint for inference
34
+ response = self.sagemaker_runtime.invoke_endpoint(
35
+ EndpointName=self.endpoint_name,
36
+ ContentType='application/json',
37
+ Body=json_payload # Send the JSON string here
38
+ )
39
+ print(f'Response: {response}')
40
+
41
+ # Process the response
42
+ result = response['Body'].read().decode('utf-8')
43
+ parsed_result = json.loads(result)
44
+
45
+ # Extract the generated text from the first element in the list
46
+ generated_text = parsed_result[0]['generated_text']
47
+
48
+ # Split the string to get the response part after 'Response:'
49
+ response = generated_text.split('Response:')[1].strip()
50
+
51
+ # return the extracted response
52
+ return response
53
  except Exception as e:
54
  return f"An error occurred: {str(e)}"
55
 
 
58
  "CPU (%)": self.process.cpu_percent(interval=1),
59
  "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
60
  }
 
 
61
  return usage
62
 
63
  def create_chat_interface():
 
92
 
93
  chatbot = gr.Chatbot(
94
  label="Chat History",
95
+ height=500,
96
  elem_classes="message-box"
97
  )
98
 
 
143
  if __name__ == "__main__":
144
  demo = create_chat_interface()
145
  demo.launch(
146
+ share=True,
147
  server_name="0.0.0.0", # Makes the server accessible from other machines
148
  server_port=7860, # Specify the port
149
  debug=True
150
+ )
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
- transformers
2
  torch
3
- fastapi
4
- uvicorn
5
  mlflow
6
  boto3
7
  pytest
8
  pydantic
9
  datasets
10
- psutil
 
 
 
1
+ transformers=4.37
2
  torch
 
 
3
  mlflow
4
  boto3
5
  pytest
6
  pydantic
7
  datasets
8
+ psutil
9
+ sagemaker
10
+ gradio
src/config.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import os
3
+ from typing import Dict, Any
4
+
5
+ class Config:
6
+ # Project structure
7
+ ROOT_DIR = Path(__file__).parent.parent
8
+ DATA_DIR = ROOT_DIR / "data"
9
+ RAW_DATA_DIR = DATA_DIR / "raw"
10
+ PROCESSED_DATA_DIR = DATA_DIR / "processed"
11
+ MODELS_DIR = ROOT_DIR / "models"
12
+ MODEL_PATH = MODELS_DIR / "customer_support_gpt"
13
+
14
+ # Model configurations
15
+ MODEL_NAME = "EleutherAI/gpt-neo-125M"
16
+ MAX_LENGTH = 256
17
+
18
+ # Training configurations
19
+ TRAIN_CONFIG: Dict[str, Any] = {
20
+ "batch_size": 4,
21
+ "learning_rate": 2e-5,
22
+ "epochs": 3,
23
+ "weight_decay": 0.01,
24
+ "max_length": MAX_LENGTH,
25
+ }
26
+
27
+ # Generation configurations
28
+ GENERATION_CONFIG: Dict[str, Any] = {
29
+ "max_length": 100,
30
+ "temperature": 0.7,
31
+ "top_p": 0.95,
32
+ "top_k": 50,
33
+ "do_sample": True
34
+ }
35
+
36
+ # Gradio configurations
37
+ GRADIO_CONFIG: Dict[str, Any] = {
38
+ "title": "Customer Support Chatbot",
39
+ "description": "Ask your questions to the customer support bot!",
40
+ "examples": [
41
+ "How do I reset my password?",
42
+ "What are your shipping policies?",
43
+ "I want to return a product."
44
+ ],
45
+ "share": False
46
+ }
47
+
48
+ # MLflow configurations
49
+ MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
50
+ EXPERIMENT_NAME = "customer-support-chatbot"
51
+
52
+ # AWS/SageMaker configurations
53
+ AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
54
+ S3_BUCKET = os.getenv("S3_BUCKET", "customer-support-chatbot")
55
+ SAGEMAKER_ROLE = os.getenv("SAGEMAKER_ROLE")
56
+
57
+ # DVC configurations
58
+ DVC_REMOTE_NAME = "s3-storage"
59
+ DVC_REMOTE_URL = f"s3://{S3_BUCKET}/dvc"
src/deploy_sagemaker.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ from pathlib import Path
3
+ import sagemaker
4
+ from sagemaker.huggingface import HuggingFaceModel
5
+ import transformers
6
+ import torch
7
+ import logging
8
+ import tarfile
9
+ import os
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def create_model_tar():
16
+ model_path = Path("models/customer_support_gpt")
17
+ tar_path = "model.tar.gz"
18
+
19
+ with tarfile.open(tar_path, "w:gz") as tar:
20
+ for file_path in model_path.glob("*"):
21
+ if file_path.is_file():
22
+ logger.info(f"Adding {file_path} to tar archive")
23
+ tar.add(file_path, arcname=file_path.name)
24
+
25
+ return tar_path
26
+
27
+ try:
28
+ # Initialize s3 client
29
+ s3 = boto3.client("s3")
30
+ bucket_name = 'customer-support-gpt'
31
+
32
+ # Create and upload tar.gz
33
+ tar_path = create_model_tar()
34
+ s3_key = "models/model.tar.gz" # Changed path
35
+ logger.info(f"Uploading model.tar.gz to s3://{bucket_name}/{s3_key}")
36
+ s3.upload_file(tar_path, bucket_name, s3_key)
37
+
38
+ # Initialize SageMaker session
39
+ sagemaker_session = sagemaker.Session()
40
+ role = 'arn:aws:iam::841162707028:role/service-role/AmazonSageMaker-ExecutionRole-20241109T160615'
41
+
42
+ # Verify IAM role
43
+ iam = boto3.client('iam')
44
+ try:
45
+ iam.get_role(RoleName=role.split('/')[-1])
46
+ logger.info(f"Successfully verified IAM role: {role}")
47
+ except iam.exceptions.NoSuchEntityException:
48
+ logger.error(f"IAM role not found: {role}")
49
+ raise
50
+
51
+ # Point to the tar.gz file
52
+ model_artifacts = f's3://{bucket_name}/{s3_key}'
53
+ print(f'Model artifacts: {model_artifacts}')
54
+
55
+ env = {
56
+ "model_path": "/opt/ml/model",
57
+ "max_length": "256",
58
+ "generation_config": '{"max_length":100,"temperature":0.7,"top_p":0.95,"top_k":50,"do_sample":true}'
59
+ }
60
+
61
+ try:
62
+ huggingface_model = HuggingFaceModel(
63
+ model_data=model_artifacts,
64
+ role=role,
65
+ transformers_version="4.37.0", # Explicit version
66
+ pytorch_version="2.1.0", # Matching your version
67
+ py_version="py310", # Keep py310
68
+ env=env,
69
+ name="customer-support-gpt"
70
+ )
71
+
72
+ logger.info("Starting model deployment...")
73
+ predictor = huggingface_model.deploy(
74
+ initial_instance_count=1,
75
+ instance_type="ml.m5.xlarge",
76
+ wait=True
77
+ )
78
+ logger.info("Model deployed successfully!")
79
+
80
+ except Exception as e:
81
+ logger.error(f"Error during model deployment: {str(e)}")
82
+ raise
83
+
84
+ except Exception as e:
85
+ logger.error(f"Deployment failed: {str(e)}")
86
+ raise
87
+
88
+ finally:
89
+ # Clean up the local tar file
90
+ if os.path.exists(tar_path):
91
+ os.remove(tar_path)