File size: 457 Bytes
f988d53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from transformers import AutoModelForCausalLM
import torch

# Load model
model = AutoModelForCausalLM.from_pretrained("PyaeSoneK/LlamaV2LegalFineTuned")

# Compress model...

# Pruning
import torchprune as prune 
pruned_model = prune.ln_structured(model, amount=0.3)

# Quantization 
from torchquant import quantize
quantized_model = quantize(pruned_model, dtype=torch.qint8)

# Export smaller model
quantized_model.save_pretrained("/path/to/smaller_model")