```python model_id = "mgoin/starcoderbase-1b-pruned50-quant" # Load model with SparseAutoModel from sparseml.transformers.utils import SparseAutoModel from transformers import AutoConfig config = AutoConfig.from_pretrained(model_id) # Why does SparseAutoModel need config? model = SparseAutoModel.text_generation_from_pretrained(model_id, config=config) # Apply recipe to model # Note: Really annoying we can't grab the recipe.yaml present in the uploaded model # and you need this separate apply_recipe_structure_to_model function from sparseml.pytorch.model_load.helpers import apply_recipe_structure_to_model from huggingface_hub import hf_hub_download import os recipe_path = hf_hub_download(repo_id=model_id, filename="recipe.yaml") apply_recipe_structure_to_model( model=model, recipe_path=recipe_path, model_path=os.path.dirname(recipe_path) ) # Regular HF inference from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(model_id) inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt") outputs = model.generate(inputs) print(tokenizer.decode(outputs[0])) """ def print_hello_world(): print("Hello World!") print_hello_world """ ```