File size: 2,998 Bytes
18caca1 f363f0a 18caca1 f363f0a 18caca1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import json
import logging
import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load configuration settings from a separate file (config.json)
# Example configuration file:
#{
# "architectures": [
# "AceCalisto3"
# ],
# "attention_probs_dropout_prob": 0.1,
# "bos_token_id": 0,
# "eos_token_id": 2,
# "hidden_act": "gelu",
# "hidden_dropout_prob": 0.1,
# "hidden_size": 1024,
# }
try:
with open('config.json') as f:
config = json.load(f)
except FileNotFoundError:
logger.error("Configuration file 'config.json' not found. Using default settings.")
config = {
"model_name": "acecalisto3/InstructiPhi", # Default model name
"max_length": 16788, # Default max length
"logging_level": "INFO" # Default logging level
}
# Load model and tokenizer
model_name = config["model_name"]
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Set logging level from configuration
logging.basicConfig(level=config["logging_level"])
def handle_request(event, context):
"""Handles incoming requests to the deployed model.
Args:
event: The event data from the deployment platform.
context: The context data from the deployment platform.
Returns:
A dictionary containing the response status code and body.
"""
try:
# Extract input text from the event
input_text = event.get('body')
if not input_text:
return {
'statusCode': 400,
'body': json.dumps({'error': 'Missing input text'})
}
# Input validation: Check length
if len(input_text) > 1000: # Set a reasonable limit
return {
'statusCode': 400,
'body': json.dumps({'error': 'Input text is too long'})
}
# Tokenize the input text
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
# Generate the response using the model
output = model.generate(input_ids, max_length=config["max_length"])
# Decode the generated response
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Return a successful response with structured output
return {
'statusCode': 200,
'body': json.dumps({
'response': generated_text,
'model': model_name, # Include model name in the output
'timestamp': datetime.datetime.now().isoformat()
})
}
except Exception as e:
# Log the error with more context
logger.error(f"Error processing request: {e}, input: {input_text}")
return {
'statusCode': 500,
'body': json.dumps({'error': 'Internal server error'})
} |