| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import copy
- from paddle.distributed import fleet
- from paddle.nn import Layer
- from .config import QuantConfig
- from .quantize import Quantization
- class PTQ(Quantization):
- """
- Applying post training quantization to the model.
- """
- def __init__(self, config: QuantConfig):
- super().__init__(config)
- def _is_parallel_training(self):
- try:
- if fleet.worker_num() > 2:
- return True
- else:
- return False
- except Exception: # fleet is not initialized
- return False
- def quantize(self, model: Layer, inplace=False):
- r"""
- Create a model for post-training quantization.
- The quantization configuration will be propagated in the model.
- And it will insert observers into the model to collect and compute
- quantization parameters.
- Args:
- model(Layer): The model to be quantized.
- inplace(bool): Whether to modify the model in-place.
- Return: The prepared model for post-training quantization.
- Examples:
- .. code-block:: python
- >>> from paddle.quantization import PTQ, QuantConfig
- >>> from paddle.quantization.observers import AbsmaxObserver
- >>> from paddle.vision.models import LeNet
- >>> observer = AbsmaxObserver()
- >>> q_config = QuantConfig(activation=observer, weight=observer)
- >>> ptq = PTQ(q_config)
- >>> model = LeNet()
- >>> model.eval()
- >>> quant_model = ptq.quantize(model)
- >>> print(quant_model)
- LeNet(
- (features): Sequential(
- (0): QuantedConv2D(
- (weight_quanter): AbsmaxObserverLayer()
- (activation_quanter): AbsmaxObserverLayer()
- )
- (1): ObserveWrapper(
- (_observer): AbsmaxObserverLayer()
- (_observed): ReLU()
- )
- (2): ObserveWrapper(
- (_observer): AbsmaxObserverLayer()
- (_observed): MaxPool2D(kernel_size=2, stride=2, padding=0)
- )
- (3): QuantedConv2D(
- (weight_quanter): AbsmaxObserverLayer()
- (activation_quanter): AbsmaxObserverLayer()
- )
- (4): ObserveWrapper(
- (_observer): AbsmaxObserverLayer()
- (_observed): ReLU()
- )
- (5): ObserveWrapper(
- (_observer): AbsmaxObserverLayer()
- (_observed): MaxPool2D(kernel_size=2, stride=2, padding=0)
- )
- )
- (fc): Sequential(
- (0): QuantedLinear(
- (weight_quanter): AbsmaxObserverLayer()
- (activation_quanter): AbsmaxObserverLayer()
- )
- (1): QuantedLinear(
- (weight_quanter): AbsmaxObserverLayer()
- (activation_quanter): AbsmaxObserverLayer()
- )
- (2): QuantedLinear(
- (weight_quanter): AbsmaxObserverLayer()
- (activation_quanter): AbsmaxObserverLayer()
- )
- )
- )
- """
- _model = model
- if not inplace:
- assert (
- not self._is_parallel_training()
- ), "'inplace' is not compatible with parallel training."
- _model = copy.deepcopy(model)
- _model.eval()
- assert (
- not model.training
- ), "Post-Training Quantization should not work on training models. Please set evaluation mode by model.eval()."
- self._config._specify(_model)
- self._convert_to_quant_layers(_model, self._config)
- self._insert_activation_observers(_model, self._config)
- return _model
|