I've tried compressing my .pt file using pruning, quantization, and various other methods, but these attempts have doubled the file size 20mb file becomes 40mb. If anyone has any ideas on how to effectively compress a .pt file, your help would be greatly appreciated.
1. pruning method
import torch
import os
from torchvision.models import resnet18
from ultralytics import YOLO
from torch.quantization import quantize_dynamic
model = YOLO("ppe3.pt")
print("Model classes:", model.names)
model_quantized = quantize_dynamic(model.model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8)
torch.save(model_quantized.state_dict(), "model_quantized.pt")
model_quantized_loaded = YOLO("ppe3.pt")
model_quantized_loaded.model.load_state_dict(torch.load("model_quantized.pt"))
model_quantized_loaded.model.eval()
print("Quantized model loaded and ready for inference.")
2.pruning method
from ultralytics import YOLO
import torch.nn.utils.prune as prune
import torch
model = YOLO('ppe2.pt')
def prune_model(model, amount):
for name, module in model.model.named_modules():
if isinstance(module, torch.nn.Conv2d):
prune.l1_unstructured(module, name='weight', amount=amount)
elif isinstance(module, torch.nn.Linear):
prune.l1_unstructured(module, name='weight', amount=amount)
return model
pruned_model = prune_model(model, amount=0.2)
for name, module in pruned_model.model.named_modules():
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
prune.remove(module, 'weight')
pruned_model.save('yolov_pruned.pt')