Spaces:
Runtime error
Runtime error
VenkateshRoshan
commited on
Commit
·
e1d0160
1
Parent(s):
aff35cb
deployment and app files updated
Browse files- app.py +43 -28
- requirements.txt +4 -4
- src/config.py +59 -0
- src/deploy_sagemaker.py +91 -0
app.py
CHANGED
@@ -1,38 +1,55 @@
|
|
|
|
1 |
import psutil
|
2 |
import torch
|
3 |
-
|
|
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
from typing import List, Tuple
|
7 |
|
8 |
class CustomerSupportBot:
|
9 |
-
def __init__(self,
|
10 |
self.process = psutil.Process(os.getpid())
|
11 |
-
|
12 |
-
self.
|
13 |
-
self.
|
14 |
-
self.
|
15 |
-
|
|
|
16 |
def generate_response(self, message: str) -> str:
|
17 |
try:
|
18 |
input_text = f"Instruction: {message}\nResponse:"
|
19 |
-
inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
except Exception as e:
|
37 |
return f"An error occurred: {str(e)}"
|
38 |
|
@@ -41,8 +58,6 @@ class CustomerSupportBot:
|
|
41 |
"CPU (%)": self.process.cpu_percent(interval=1),
|
42 |
"RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
|
43 |
}
|
44 |
-
if torch.cuda.is_available():
|
45 |
-
usage["GPU (GB)"] = torch.cuda.memory_allocated(0) / (1024 ** 3)
|
46 |
return usage
|
47 |
|
48 |
def create_chat_interface():
|
@@ -77,7 +92,7 @@ def create_chat_interface():
|
|
77 |
|
78 |
chatbot = gr.Chatbot(
|
79 |
label="Chat History",
|
80 |
-
height=
|
81 |
elem_classes="message-box"
|
82 |
)
|
83 |
|
@@ -128,8 +143,8 @@ def create_chat_interface():
|
|
128 |
if __name__ == "__main__":
|
129 |
demo = create_chat_interface()
|
130 |
demo.launch(
|
131 |
-
share=
|
132 |
server_name="0.0.0.0", # Makes the server accessible from other machines
|
133 |
server_port=7860, # Specify the port
|
134 |
debug=True
|
135 |
-
)
|
|
|
1 |
+
import json # Add this import
|
2 |
import psutil
|
3 |
import torch
|
4 |
+
import boto3
|
5 |
+
from transformers import AutoTokenizer
|
6 |
import gradio as gr
|
7 |
import os
|
8 |
from typing import List, Tuple
|
9 |
|
10 |
class CustomerSupportBot:
|
11 |
+
def __init__(self, endpoint_name="customer-support-gpt-2024-11-10-00-30-03-555"):
|
12 |
self.process = psutil.Process(os.getpid())
|
13 |
+
model_name = "EleutherAI/gpt-neo-125M"
|
14 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
15 |
+
# self.tokenizer = AutoTokenizer.from_pretrained("gpt2") # Use the tokenizer appropriate to your model
|
16 |
+
self.endpoint_name = endpoint_name
|
17 |
+
self.sagemaker_runtime = boto3.client('runtime.sagemaker')
|
18 |
+
|
19 |
def generate_response(self, message: str) -> str:
|
20 |
try:
|
21 |
input_text = f"Instruction: {message}\nResponse:"
|
|
|
22 |
|
23 |
+
# Prepare payload for SageMaker endpoint
|
24 |
+
payload = {
|
25 |
+
# "inputs": inputs['input_ids'].tolist()[0],
|
26 |
+
'inputs': input_text,
|
27 |
+
# You can include other parameters if needed (e.g., attention_mask)
|
28 |
+
}
|
29 |
+
print(f'Payload: {payload}')
|
30 |
+
# Convert the payload to a JSON string before sending
|
31 |
+
json_payload = json.dumps(payload) # Use json.dumps() to serialize the payload
|
32 |
+
print(f'JSON Payload: {json_payload}')
|
33 |
+
# Call the SageMaker endpoint for inference
|
34 |
+
response = self.sagemaker_runtime.invoke_endpoint(
|
35 |
+
EndpointName=self.endpoint_name,
|
36 |
+
ContentType='application/json',
|
37 |
+
Body=json_payload # Send the JSON string here
|
38 |
+
)
|
39 |
+
print(f'Response: {response}')
|
40 |
+
|
41 |
+
# Process the response
|
42 |
+
result = response['Body'].read().decode('utf-8')
|
43 |
+
parsed_result = json.loads(result)
|
44 |
+
|
45 |
+
# Extract the generated text from the first element in the list
|
46 |
+
generated_text = parsed_result[0]['generated_text']
|
47 |
+
|
48 |
+
# Split the string to get the response part after 'Response:'
|
49 |
+
response = generated_text.split('Response:')[1].strip()
|
50 |
+
|
51 |
+
# return the extracted response
|
52 |
+
return response
|
53 |
except Exception as e:
|
54 |
return f"An error occurred: {str(e)}"
|
55 |
|
|
|
58 |
"CPU (%)": self.process.cpu_percent(interval=1),
|
59 |
"RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
|
60 |
}
|
|
|
|
|
61 |
return usage
|
62 |
|
63 |
def create_chat_interface():
|
|
|
92 |
|
93 |
chatbot = gr.Chatbot(
|
94 |
label="Chat History",
|
95 |
+
height=500,
|
96 |
elem_classes="message-box"
|
97 |
)
|
98 |
|
|
|
143 |
if __name__ == "__main__":
|
144 |
demo = create_chat_interface()
|
145 |
demo.launch(
|
146 |
+
share=True,
|
147 |
server_name="0.0.0.0", # Makes the server accessible from other machines
|
148 |
server_port=7860, # Specify the port
|
149 |
debug=True
|
150 |
+
)
|
requirements.txt
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
-
transformers
|
2 |
torch
|
3 |
-
fastapi
|
4 |
-
uvicorn
|
5 |
mlflow
|
6 |
boto3
|
7 |
pytest
|
8 |
pydantic
|
9 |
datasets
|
10 |
-
psutil
|
|
|
|
|
|
1 |
+
transformers=4.37
|
2 |
torch
|
|
|
|
|
3 |
mlflow
|
4 |
boto3
|
5 |
pytest
|
6 |
pydantic
|
7 |
datasets
|
8 |
+
psutil
|
9 |
+
sagemaker
|
10 |
+
gradio
|
src/config.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import os
|
3 |
+
from typing import Dict, Any
|
4 |
+
|
5 |
+
class Config:
|
6 |
+
# Project structure
|
7 |
+
ROOT_DIR = Path(__file__).parent.parent
|
8 |
+
DATA_DIR = ROOT_DIR / "data"
|
9 |
+
RAW_DATA_DIR = DATA_DIR / "raw"
|
10 |
+
PROCESSED_DATA_DIR = DATA_DIR / "processed"
|
11 |
+
MODELS_DIR = ROOT_DIR / "models"
|
12 |
+
MODEL_PATH = MODELS_DIR / "customer_support_gpt"
|
13 |
+
|
14 |
+
# Model configurations
|
15 |
+
MODEL_NAME = "EleutherAI/gpt-neo-125M"
|
16 |
+
MAX_LENGTH = 256
|
17 |
+
|
18 |
+
# Training configurations
|
19 |
+
TRAIN_CONFIG: Dict[str, Any] = {
|
20 |
+
"batch_size": 4,
|
21 |
+
"learning_rate": 2e-5,
|
22 |
+
"epochs": 3,
|
23 |
+
"weight_decay": 0.01,
|
24 |
+
"max_length": MAX_LENGTH,
|
25 |
+
}
|
26 |
+
|
27 |
+
# Generation configurations
|
28 |
+
GENERATION_CONFIG: Dict[str, Any] = {
|
29 |
+
"max_length": 100,
|
30 |
+
"temperature": 0.7,
|
31 |
+
"top_p": 0.95,
|
32 |
+
"top_k": 50,
|
33 |
+
"do_sample": True
|
34 |
+
}
|
35 |
+
|
36 |
+
# Gradio configurations
|
37 |
+
GRADIO_CONFIG: Dict[str, Any] = {
|
38 |
+
"title": "Customer Support Chatbot",
|
39 |
+
"description": "Ask your questions to the customer support bot!",
|
40 |
+
"examples": [
|
41 |
+
"How do I reset my password?",
|
42 |
+
"What are your shipping policies?",
|
43 |
+
"I want to return a product."
|
44 |
+
],
|
45 |
+
"share": False
|
46 |
+
}
|
47 |
+
|
48 |
+
# MLflow configurations
|
49 |
+
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
|
50 |
+
EXPERIMENT_NAME = "customer-support-chatbot"
|
51 |
+
|
52 |
+
# AWS/SageMaker configurations
|
53 |
+
AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
|
54 |
+
S3_BUCKET = os.getenv("S3_BUCKET", "customer-support-chatbot")
|
55 |
+
SAGEMAKER_ROLE = os.getenv("SAGEMAKER_ROLE")
|
56 |
+
|
57 |
+
# DVC configurations
|
58 |
+
DVC_REMOTE_NAME = "s3-storage"
|
59 |
+
DVC_REMOTE_URL = f"s3://{S3_BUCKET}/dvc"
|
src/deploy_sagemaker.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import boto3
|
2 |
+
from pathlib import Path
|
3 |
+
import sagemaker
|
4 |
+
from sagemaker.huggingface import HuggingFaceModel
|
5 |
+
import transformers
|
6 |
+
import torch
|
7 |
+
import logging
|
8 |
+
import tarfile
|
9 |
+
import os
|
10 |
+
|
11 |
+
# Set up logging
|
12 |
+
logging.basicConfig(level=logging.INFO)
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
def create_model_tar():
|
16 |
+
model_path = Path("models/customer_support_gpt")
|
17 |
+
tar_path = "model.tar.gz"
|
18 |
+
|
19 |
+
with tarfile.open(tar_path, "w:gz") as tar:
|
20 |
+
for file_path in model_path.glob("*"):
|
21 |
+
if file_path.is_file():
|
22 |
+
logger.info(f"Adding {file_path} to tar archive")
|
23 |
+
tar.add(file_path, arcname=file_path.name)
|
24 |
+
|
25 |
+
return tar_path
|
26 |
+
|
27 |
+
try:
|
28 |
+
# Initialize s3 client
|
29 |
+
s3 = boto3.client("s3")
|
30 |
+
bucket_name = 'customer-support-gpt'
|
31 |
+
|
32 |
+
# Create and upload tar.gz
|
33 |
+
tar_path = create_model_tar()
|
34 |
+
s3_key = "models/model.tar.gz" # Changed path
|
35 |
+
logger.info(f"Uploading model.tar.gz to s3://{bucket_name}/{s3_key}")
|
36 |
+
s3.upload_file(tar_path, bucket_name, s3_key)
|
37 |
+
|
38 |
+
# Initialize SageMaker session
|
39 |
+
sagemaker_session = sagemaker.Session()
|
40 |
+
role = 'arn:aws:iam::841162707028:role/service-role/AmazonSageMaker-ExecutionRole-20241109T160615'
|
41 |
+
|
42 |
+
# Verify IAM role
|
43 |
+
iam = boto3.client('iam')
|
44 |
+
try:
|
45 |
+
iam.get_role(RoleName=role.split('/')[-1])
|
46 |
+
logger.info(f"Successfully verified IAM role: {role}")
|
47 |
+
except iam.exceptions.NoSuchEntityException:
|
48 |
+
logger.error(f"IAM role not found: {role}")
|
49 |
+
raise
|
50 |
+
|
51 |
+
# Point to the tar.gz file
|
52 |
+
model_artifacts = f's3://{bucket_name}/{s3_key}'
|
53 |
+
print(f'Model artifacts: {model_artifacts}')
|
54 |
+
|
55 |
+
env = {
|
56 |
+
"model_path": "/opt/ml/model",
|
57 |
+
"max_length": "256",
|
58 |
+
"generation_config": '{"max_length":100,"temperature":0.7,"top_p":0.95,"top_k":50,"do_sample":true}'
|
59 |
+
}
|
60 |
+
|
61 |
+
try:
|
62 |
+
huggingface_model = HuggingFaceModel(
|
63 |
+
model_data=model_artifacts,
|
64 |
+
role=role,
|
65 |
+
transformers_version="4.37.0", # Explicit version
|
66 |
+
pytorch_version="2.1.0", # Matching your version
|
67 |
+
py_version="py310", # Keep py310
|
68 |
+
env=env,
|
69 |
+
name="customer-support-gpt"
|
70 |
+
)
|
71 |
+
|
72 |
+
logger.info("Starting model deployment...")
|
73 |
+
predictor = huggingface_model.deploy(
|
74 |
+
initial_instance_count=1,
|
75 |
+
instance_type="ml.m5.xlarge",
|
76 |
+
wait=True
|
77 |
+
)
|
78 |
+
logger.info("Model deployed successfully!")
|
79 |
+
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"Error during model deployment: {str(e)}")
|
82 |
+
raise
|
83 |
+
|
84 |
+
except Exception as e:
|
85 |
+
logger.error(f"Deployment failed: {str(e)}")
|
86 |
+
raise
|
87 |
+
|
88 |
+
finally:
|
89 |
+
# Clean up the local tar file
|
90 |
+
if os.path.exists(tar_path):
|
91 |
+
os.remove(tar_path)
|