=============================== Compression Pipeline =============================== !pip install sconce -q .. code:: ipython3 !pip install sconce --quiet .. parsed-literal:: [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.1/83.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m .. code:: ipython3 from collections import defaultdict, OrderedDict import numpy as np import torch from torch import nn from torch.optim import * from torch.optim.lr_scheduler import * from torch.utils.data import DataLoader from torchvision.datasets import * from torchvision.transforms import * import torch.optim as optim assert torch.cuda.is_available(), \ "The current runtime does not have CUDA support." \ "Please go to menu bar (Runtime - Change runtime type) and select GPU" Load the Pre-Trained Model Weights .. code:: ipython3 from google.colab import drive drive.mount('/content/drive') model_path = "drive/MyDrive/Efficientml/Efficientml.ai/vgg.cifar.pretrained.pth" .. parsed-literal:: Mounted at /content/drive .. code:: ipython3 class VGG(nn.Module): ARCH = [64, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] def __init__(self) -> None: super().__init__() layers = [] counts = defaultdict(int) def add(name: str, layer: nn.Module) -> None: layers.append((f"{name}{counts[name]}", layer)) counts[name] += 1 in_channels = 3 for x in self.ARCH: if x != 'M': # conv-bn-relu add("conv", nn.Conv2d(in_channels, x, 3, padding=1, bias=False)) add("bn", nn.BatchNorm2d(x)) add("relu", nn.ReLU(True)) in_channels = x else: # maxpool add("pool", nn.MaxPool2d(2)) self.backbone = nn.Sequential(OrderedDict(layers)) self.classifier = nn.Linear(512, 10) def forward(self, x: torch.Tensor) -> torch.Tensor: # backbone: [N, 3, 32, 32] => [N, 512, 2, 2] x = self.backbone(x) # avgpool: [N, 512, 2, 2] => [N, 512] x = x.mean([2, 3]) # classifier: [N, 512] => [N, 10] x = self.classifier(x) return x #load the pretrained model model = VGG().cuda() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) .. parsed-literal:: Setup the Dataset .. code:: ipython3 image_size = 32 transforms = { "train": transforms.Compose([ RandomCrop(image_size, padding=4), RandomHorizontalFlip(), ToTensor(), ]), "test": ToTensor(), } dataset = {} for split in ["train", "test"]: dataset[split] = CIFAR10( root="data/cifar10", train=(split == "train"), download=True, transform=transforms[split], ) dataloader = {} for split in ['train', 'test']: dataloader[split] = DataLoader( dataset[split], batch_size=512, shuffle=(split == 'train'), num_workers=0, pin_memory=True, ) .. parsed-literal:: Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar10/cifar-10-python.tar.gz .. parsed-literal:: 100%|██████████| 170498071/170498071 [00:02<00:00, 83361571.37it/s] .. parsed-literal:: Extracting data/cifar10/cifar-10-python.tar.gz to data/cifar10 Files already downloaded and verified sconce Configurations .. code:: ipython3 from sconce import sconce import copy sconces = sconce() sconces.model= copy.deepcopy(model) sconces.criterion = nn.CrossEntropyLoss() # Loss sconces.optimizer= optim.Adam(sconces.model.parameters(), lr=1e-4) sconces.scheduler = optim.lr_scheduler.CosineAnnealingLR(sconces.optimizer, T_max=200) sconces.dataloader = dataloader sconces.epochs = 1 #Number of time we iterate over the data sconces.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") sconces.experiment_name = "vgg-gmp" sconces.prune_mode = "GMP" # Supports Automated Pruning Ratio Detection .. parsed-literal:: Train and Validated the Model on the given dataset .. code:: ipython3 # Train the model sconces.train() # Evaludate the model sconces.evaluate() .. parsed-literal:: Epoch:1 Train Loss: 0.00000 Validation Accuracy: 92.90581 .. parsed-literal:: 92.90581162324649 Magic Happens here: Compress the model(GMP pruning is set as the prune mode[sconces.prune_mode] above) .. code:: ipython3 sconces.compress() .. parsed-literal:: Original Dense Model Size Model=35.20 MiB .. parsed-literal:: Original Model Validation Accuracy: 92.90581162324649 % Granular-Magnitude Pruning .. parsed-literal:: Sensitivity Scan Time(mins): 2.669245207309723 Sparsity for each Layer: {'backbone.conv0.weight': 0.45000000000000007, 'backbone.conv1.weight': 0.7500000000000002, 'backbone.conv2.weight': 0.7000000000000002, 'backbone.conv3.weight': 0.6500000000000001, 'backbone.conv4.weight': 0.6000000000000002, 'backbone.conv5.weight': 0.7000000000000002, 'backbone.conv6.weight': 0.7000000000000002, 'backbone.conv7.weight': 0.8500000000000002, 'classifier.weight': 0.9500000000000003} Pruning Time Consumed (mins): 6.053447723388672e-05 Total Pruning Time Consumed (mins): 2.669320074717204 .. parsed-literal:: Pruned Model has size=9.77 MiB(non-zeros) = 27.76% of Original model size Pruned Model has Accuracy=84.41 MiB(non-zeros) = -8.50% of Original model Accuracy .. parsed-literal:: ==================== Fine-Tuning ==================== Epoch:1 Train Loss: 0.00000 Validation Accuracy: 91.88377 Epoch:2 Train Loss: 0.00000 Validation Accuracy: 91.81363 Epoch:3 Train Loss: 0.00000 Validation Accuracy: 91.90381 Epoch:4 Train Loss: 0.00000 Validation Accuracy: 91.87375 Epoch:5 Train Loss: 0.00000 Validation Accuracy: 91.94389 Fine-Tuned Sparse model has size=9.77 MiB = 27.76% of Original model size Fine-Tuned Pruned Model Validation Accuracy: 91.9438877755511 .. parsed-literal:: ==================== Quantization-Aware Training(QAT) ==================== train: 0%| | 0/98 [00:00