legalQAcustom / compress.py
PyaeSoneK's picture
Rename app.py to compress.py
f988d53
raw
history blame
457 Bytes
from transformers import AutoModelForCausalLM
import torch
# Load model
model = AutoModelForCausalLM.from_pretrained("PyaeSoneK/LlamaV2LegalFineTuned")
# Compress model...
# Pruning
import torchprune as prune
pruned_model = prune.ln_structured(model, amount=0.3)
# Quantization
from torchquant import quantize
quantized_model = quantize(pruned_model, dtype=torch.qint8)
# Export smaller model
quantized_model.save_pretrained("/path/to/smaller_model")