Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM | |
import torch | |
# Load model | |
model = AutoModelForCausalLM.from_pretrained("PyaeSoneK/LlamaV2LegalFineTuned") | |
# Compress model... | |
# Pruning | |
import torchprune as prune | |
pruned_model = prune.ln_structured(model, amount=0.3) | |
# Quantization | |
from torchquant import quantize | |
quantized_model = quantize(pruned_model, dtype=torch.qint8) | |
# Export smaller model | |
quantized_model.save_pretrained("/path/to/smaller_model") |