0

I've tried compressing my .pt file using pruning, quantization, and various other methods, but these attempts have doubled the file size 20mb file becomes 40mb. If anyone has any ideas on how to effectively compress a .pt file, your help would be greatly appreciated.

1. pruning method

import torch
import os
from torchvision.models import resnet18
from ultralytics import YOLO
from torch.quantization import quantize_dynamic


model = YOLO("ppe3.pt")
print("Model classes:", model.names)


model_quantized = quantize_dynamic(model.model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8)


torch.save(model_quantized.state_dict(), "model_quantized.pt")


model_quantized_loaded = YOLO("ppe3.pt")


model_quantized_loaded.model.load_state_dict(torch.load("model_quantized.pt"))

model_quantized_loaded.model.eval()

print("Quantized model loaded and ready for inference.")




2.pruning method

from ultralytics import YOLO
import torch.nn.utils.prune as prune
import torch

model = YOLO('ppe2.pt') 
def prune_model(model, amount):
    for name, module in model.model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=amount)
        elif isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
    return model
pruned_model = prune_model(model, amount=0.2)

for name, module in pruned_model.model.named_modules():
    if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
        prune.remove(module, 'weight')
        pruned_model.save('yolov_pruned.pt')


0