SkazuHD's picture
init space
d660b02
from pathlib import Path
from huggingface_hub import HfApi
from loguru import logger
try:
from sagemaker.huggingface import HuggingFaceProcessor
except ModuleNotFoundError:
logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
from llm_engineering import settings
evaluation_dir = Path(__file__).resolve().parent
evaluation_requirements_path = evaluation_dir / "requirements.txt"
def run_evaluation_on_sagemaker(is_dummy: bool = True) -> None:
assert settings.HUGGINGFACE_ACCESS_TOKEN, "Hugging Face access token is required."
assert settings.OPENAI_API_KEY, "OpenAI API key is required."
assert settings.AWS_ARN_ROLE, "AWS ARN role is required."
if not evaluation_dir.exists():
raise FileNotFoundError(f"The directory {evaluation_dir} does not exist.")
if not evaluation_requirements_path.exists():
raise FileNotFoundError(f"The file {evaluation_requirements_path} does not exist.")
api = HfApi()
user_info = api.whoami(token=settings.HUGGINGFACE_ACCESS_TOKEN)
huggingface_user = user_info["name"]
logger.info(f"Current Hugging Face user: {huggingface_user}")
env = {
"HUGGING_FACE_HUB_TOKEN": settings.HUGGINGFACE_ACCESS_TOKEN,
"OPENAI_API_KEY": settings.OPENAI_API_KEY,
"DATASET_HUGGINGFACE_WORKSPACE": huggingface_user,
"MODEL_HUGGINGFACE_WORKSPACE": huggingface_user,
}
if is_dummy:
env["IS_DUMMY"] = "True"
# Initialize the HuggingFaceProcessor
hfp = HuggingFaceProcessor(
role=settings.AWS_ARN_ROLE,
instance_count=1,
instance_type="ml.g5.2xlarge",
transformers_version="4.36",
pytorch_version="2.1",
py_version="py310",
base_job_name="evaluate-llm-twin",
env=env,
)
# Run the processing job
hfp.run(
code="evaluate.py",
source_dir=str(evaluation_dir),
)
if __name__ == "__main__":
run_evaluation_on_sagemaker()