Is setting model.eval() required while using QuantizationSimModel in PyTorch?

edan840216 · July 29, 2021, 2:48am

Dear AIMET Researchers,

During my experiment, when using QuantizationSimModel to evaluate the quantized version of specific model, setting w/ or w/o model.eval() gets large gap between the quantized accuracy. For example, w/ model.eval() on ResNet18 using cifar-10 dataset, we get 0.9474 on 31a31w quantization setting. However, w/o model.eval() on same model and dataset, we get 0.8588 on same quantization setting. The original FP32 model accuracy is 0.9463.

The question is which setting is correct during using QuantizationSimModel? I hope you can share your experience to me. Thank you for your attention!

P.S. The code I written is attached below:

from __future__ import division
import numpy as np
import timm
import PIL
import numpy as np
from tqdm import tqdm
import copy
import torchvision
import torch
from torchvision import transforms

# Quantization related import
from aimet_torch.quantsim import QuantizationSimModel

from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD

MODEL_PATH = '../model_pth/cifar_resnet.pth' # resnet18's pth

NUM_FINETUNE_CLASSES = 10

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose(
[transforms.Resize((224, 224), interpolation=PIL.Image.BICUBIC),
  transforms.ToTensor(),
  transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)])
batch_size = 512
testset = torchvision.datasets.CIFAR10(root='../data', train=False,
                                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                      shuffle=False, num_workers=32)

def evaluate_model(model: torch.nn.Module, eval_iterations: int, use_cuda: bool = True) -> float:
  """
  This is intended to be the user-defined model evaluation function.
  AIMET requires the above signature. So if the user's eval function does not
  match this signature, please create a simple wrapper.

  Note: Honoring the number of iterations is not absolutely necessary.
  However if all evaluations run over an entire epoch of validation data,
  the runtime for AIMET compression will obviously be higher.

  :param model: Model to evaluate
  :param eval_iterations: Number of iterations to use for evaluation.
          None for entire epoch.
  :param use_cuda: If true, evaluate using gpu acceleration
  :return: single float number (accuracy) representing model's performance
  """
  if (eval_iterations is not None):
    target_sample_number = eval_iterations * batch_size
    num_smaple_data = min(target_sample_number, len(testset))
  else: 
    num_smaple_data = len(testset)
  
  subdataset = torch.utils.data.Subset(testset, range(len(testset))[:num_smaple_data])
  subtestloader = torch.utils.data.DataLoader(subdataset, batch_size=batch_size,
                                      shuffle=False, num_workers=32)
  if (use_cuda):
    model.to(device)
 
  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  with torch.no_grad():
    for i, data in tqdm(enumerate(subtestloader, 0), total=len(subtestloader)):
      if (use_cuda):
        images, labels = data[0].to(device), data[1].to(device)
      else:
        images, labels = data
      # calculate outputs by running images through the network 
      outputs = model(images)
      # the class with the highest energy is what we choose as prediction
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()\
  return (correct / total)

def quantize_model(model, output_bw: int, param_bw: int):
  acc_before_quant = evaluate_model(model, 20, True)
  print ("FP32 model accuracy: %f" % (acc_before_quant))

  model_copy = copy.deepcopy(model)
  sim = QuantizationSimModel(model_copy, default_output_bw=output_bw, default_param_bw=param_bw, dummy_input=torch.rand(1, 3, 224, 224).to(device))
  sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5)
  acc_after_quant_train = evaluate_model(sim.model, 20, True)
  print ("Without model.eval(), Output bit: %d, params bit: %d, model accuracy: %f" %(output_bw, param_bw, acc_after_quant_train))
  
  model_copy = copy.deepcopy(model)
  model_copy.eval()
  sim = QuantizationSimModel(model_copy, default_output_bw=output_bw, default_param_bw=param_bw, dummy_input=torch.rand(1, 3, 224, 224).to(device))
  sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5)
  acc_after_quant_eval = evaluate_model(sim.model, 20, True)
  print ("With model.eval(), Output bit: %d, params bit: %d, model accuracy: %f" %(output_bw, param_bw, acc_after_quant_eval))

if __name__ == '__main__':
  model = timm.create_model('resnet18', pretrained=True, num_classes=NUM_FINETUNE_CLASSES).to(device)
  model.load_state_dict(torch.load(MODEL_PATH))
  quantize_model(model, 31, 31)

Best regards,
Edan

quic_ssiddego · August 18, 2021, 7:06pm

@edan840216 We do set the model.eval() before performing quantization simulation. This sets the model in evaluation (inference) mode and it has specific effect on certain type of modules such as Batchnorm etc.

Topic		Replies	Views
Bitwidth in AIMET PyTorch Quantization	6	1271	March 7, 2021
How to check whether the layers of model is Quantized?	1	855	May 28, 2021
Saving and loading compressed model	3	933	November 20, 2020
Efficient-Net-B0 AIMET quantization	2	1138	July 28, 2021
Exception while exporting Quantized model	4	1758	August 6, 2020

Is setting model.eval() required while using QuantizationSimModel in PyTorch?

Related Topics