Spaces:
Runtime error
Runtime error
File size: 457 Bytes
f988d53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from transformers import AutoModelForCausalLM
import torch
# Load model
model = AutoModelForCausalLM.from_pretrained("PyaeSoneK/LlamaV2LegalFineTuned")
# Compress model...
# Pruning
import torchprune as prune
pruned_model = prune.ln_structured(model, amount=0.3)
# Quantization
from torchquant import quantize
quantized_model = quantize(pruned_model, dtype=torch.qint8)
# Export smaller model
quantized_model.save_pretrained("/path/to/smaller_model") |