| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713 |
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This code is refer from:
- https://github.com/PaddlePaddle/PaddleClas/blob/2f36cab604e439b59d1a854df34ece3b10d888e3/ppcls/arch/backbone/legendary_models/pp_hgnet_v2.py
- """
- from __future__ import absolute_import, division, print_function
- import math
- import numpy as np
- import paddle
- import paddle.nn as nn
- import paddle.nn.functional as F
- from paddle import ParamAttr
- from paddle.nn import Conv2D, BatchNorm, Linear, BatchNorm2D, MaxPool2D, AvgPool2D
- from paddle.nn.initializer import Uniform
- from paddle.regularizer import L2Decay
- from typing import Tuple, List, Dict, Union, Callable, Any
- from ppocr.modeling.backbones.rec_donut_swin import DonutSwinModelOutput
- class IdentityBasedConv1x1(nn.Conv2D):
- def __init__(self, channels, groups=1):
- super(IdentityBasedConv1x1, self).__init__(
- in_channels=channels,
- out_channels=channels,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=groups,
- bias_attr=False,
- )
- assert channels % groups == 0
- input_dim = channels // groups
- id_value = np.zeros((channels, input_dim, 1, 1))
- for i in range(channels):
- id_value[i, i % input_dim, 0, 0] = 1
- self.id_tensor = paddle.to_tensor(id_value)
- self.weight.set_value(paddle.zeros_like(self.weight))
- def forward(self, input):
- kernel = self.weight + self.id_tensor
- result = F.conv2d(
- input,
- kernel,
- None,
- stride=1,
- padding=0,
- dilation=self._dilation,
- groups=self._groups,
- )
- return result
- def get_actual_kernel(self):
- return self.weight + self.id_tensor
- class BNAndPad(nn.Layer):
- def __init__(
- self,
- pad_pixels,
- num_features,
- epsilon=1e-5,
- momentum=0.1,
- last_conv_bias=None,
- bn=nn.BatchNorm2D,
- ):
- super().__init__()
- self.bn = bn(num_features, momentum=momentum, epsilon=epsilon)
- self.pad_pixels = pad_pixels
- self.last_conv_bias = last_conv_bias
- def forward(self, input):
- output = self.bn(input)
- if self.pad_pixels > 0:
- bias = -self.bn._mean
- if self.last_conv_bias is not None:
- bias += self.last_conv_bias
- pad_values = self.bn.bias + self.bn.weight * (
- bias / paddle.sqrt(self.bn._variance + self.bn._epsilon)
- )
- """ pad """
- # TODO: n,h,w,c format is not supported yet
- n, c, h, w = output.shape
- values = pad_values.reshape([1, -1, 1, 1])
- w_values = values.expand([n, -1, self.pad_pixels, w])
- x = paddle.concat([w_values, output, w_values], axis=2)
- h = h + self.pad_pixels * 2
- h_values = values.expand([n, -1, h, self.pad_pixels])
- x = paddle.concat([h_values, x, h_values], axis=3)
- output = x
- return output
- @property
- def weight(self):
- return self.bn.weight
- @property
- def bias(self):
- return self.bn.bias
- @property
- def _mean(self):
- return self.bn._mean
- @property
- def _variance(self):
- return self.bn._variance
- @property
- def _epsilon(self):
- return self.bn._epsilon
- def conv_bn(
- in_channels,
- out_channels,
- kernel_size,
- stride=1,
- padding=0,
- dilation=1,
- groups=1,
- padding_mode="zeros",
- ):
- conv_layer = nn.Conv2D(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups,
- bias_attr=False,
- padding_mode=padding_mode,
- )
- bn_layer = nn.BatchNorm2D(num_features=out_channels)
- se = nn.Sequential()
- se.add_sublayer("conv", conv_layer)
- se.add_sublayer("bn", bn_layer)
- return se
- def transI_fusebn(kernel, bn):
- gamma = bn.weight
- std = (bn._variance + bn._epsilon).sqrt()
- return (
- kernel * ((gamma / std).reshape([-1, 1, 1, 1])),
- bn.bias - bn._mean * gamma / std,
- )
- def transII_addbranch(kernels, biases):
- return sum(kernels), sum(biases)
- def transIII_1x1_kxk(k1, b1, k2, b2, groups):
- if groups == 1:
- k = F.conv2d(k2, k1.transpose([1, 0, 2, 3]))
- b_hat = (k2 * b1.reshape([1, -1, 1, 1])).sum((1, 2, 3))
- else:
- k_slices = []
- b_slices = []
- k1_T = k1.transpose([1, 0, 2, 3])
- k1_group_width = k1.shape[0] // groups
- k2_group_width = k2.shape[0] // groups
- for g in range(groups):
- k1_T_slice = k1_T[:, g * k1_group_width : (g + 1) * k1_group_width, :, :]
- k2_slice = k2[g * k2_group_width : (g + 1) * k2_group_width, :, :, :]
- k_slices.append(F.conv2d(k2_slice, k1_T_slice))
- b_slices.append(
- (
- k2_slice
- * b1[g * k1_group_width : (g + 1) * k1_group_width].reshape(
- [1, -1, 1, 1]
- )
- ).sum((1, 2, 3))
- )
- k, b_hat = transIV_depthconcat(k_slices, b_slices)
- return k, b_hat + b2
- def transIV_depthconcat(kernels, biases):
- return paddle.cat(kernels, axis=0), paddle.cat(biases)
- def transV_avg(channels, kernel_size, groups):
- input_dim = channels // groups
- k = paddle.zeros((channels, input_dim, kernel_size, kernel_size))
- k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = (
- 1.0 / kernel_size**2
- )
- return k
- def transVI_multiscale(kernel, target_kernel_size):
- H_pixels_to_pad = (target_kernel_size - kernel.shape[2]) // 2
- W_pixels_to_pad = (target_kernel_size - kernel.shape[3]) // 2
- return F.pad(
- kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad]
- )
- class DiverseBranchBlock(nn.Layer):
- def __init__(
- self,
- num_channels,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- is_repped=False,
- single_init=False,
- **kwargs,
- ):
- super().__init__()
- padding = (filter_size - 1) // 2
- dilation = 1
- in_channels = num_channels
- out_channels = num_filters
- kernel_size = filter_size
- internal_channels_1x1_3x3 = None
- nonlinear = act
- self.is_repped = is_repped
- if nonlinear is None:
- self.nonlinear = nn.Identity()
- else:
- self.nonlinear = nn.ReLU()
- self.kernel_size = kernel_size
- self.out_channels = out_channels
- self.groups = groups
- assert padding == kernel_size // 2
- if is_repped:
- self.dbb_reparam = nn.Conv2D(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups,
- bias_attr=True,
- )
- else:
- self.dbb_origin = conv_bn(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups,
- )
- self.dbb_avg = nn.Sequential()
- if groups < out_channels:
- self.dbb_avg.add_sublayer(
- "conv",
- nn.Conv2D(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=groups,
- bias_attr=False,
- ),
- )
- self.dbb_avg.add_sublayer(
- "bn", BNAndPad(pad_pixels=padding, num_features=out_channels)
- )
- self.dbb_avg.add_sublayer(
- "avg",
- nn.AvgPool2D(kernel_size=kernel_size, stride=stride, padding=0),
- )
- self.dbb_1x1 = conv_bn(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=1,
- stride=stride,
- padding=0,
- groups=groups,
- )
- else:
- self.dbb_avg.add_sublayer(
- "avg",
- nn.AvgPool2D(
- kernel_size=kernel_size, stride=stride, padding=padding
- ),
- )
- self.dbb_avg.add_sublayer("avgbn", nn.BatchNorm2D(out_channels))
- if internal_channels_1x1_3x3 is None:
- internal_channels_1x1_3x3 = (
- in_channels if groups < out_channels else 2 * in_channels
- ) # For mobilenet, it is better to have 2X internal channels
- self.dbb_1x1_kxk = nn.Sequential()
- if internal_channels_1x1_3x3 == in_channels:
- self.dbb_1x1_kxk.add_sublayer(
- "idconv1", IdentityBasedConv1x1(channels=in_channels, groups=groups)
- )
- else:
- self.dbb_1x1_kxk.add_sublayer(
- "conv1",
- nn.Conv2D(
- in_channels=in_channels,
- out_channels=internal_channels_1x1_3x3,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=groups,
- bias_attr=False,
- ),
- )
- self.dbb_1x1_kxk.add_sublayer(
- "bn1",
- BNAndPad(pad_pixels=padding, num_features=internal_channels_1x1_3x3),
- )
- self.dbb_1x1_kxk.add_sublayer(
- "conv2",
- nn.Conv2D(
- in_channels=internal_channels_1x1_3x3,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=0,
- groups=groups,
- bias_attr=False,
- ),
- )
- self.dbb_1x1_kxk.add_sublayer("bn2", nn.BatchNorm2D(out_channels))
- # The experiments reported in the paper used the default initialization of bn.weight (all as 1). But changing the initialization may be useful in some cases.
- if single_init:
- # Initialize the bn.weight of dbb_origin as 1 and others as 0. This is not the default setting.
- self.single_init()
- def forward(self, inputs):
- if self.is_repped:
- return self.nonlinear(self.dbb_reparam(inputs))
- out = self.dbb_origin(inputs)
- if hasattr(self, "dbb_1x1"):
- out += self.dbb_1x1(inputs)
- out += self.dbb_avg(inputs)
- out += self.dbb_1x1_kxk(inputs)
- return self.nonlinear(out)
- def init_gamma(self, gamma_value):
- if hasattr(self, "dbb_origin"):
- paddle.nn.init.constant_(self.dbb_origin.bn.weight, gamma_value)
- if hasattr(self, "dbb_1x1"):
- paddle.nn.init.constant_(self.dbb_1x1.bn.weight, gamma_value)
- if hasattr(self, "dbb_avg"):
- paddle.nn.init.constant_(self.dbb_avg.avgbn.weight, gamma_value)
- if hasattr(self, "dbb_1x1_kxk"):
- paddle.nn.init.constant_(self.dbb_1x1_kxk.bn2.weight, gamma_value)
- def single_init(self):
- self.init_gamma(0.0)
- if hasattr(self, "dbb_origin"):
- paddle.nn.init.constant_(self.dbb_origin.bn.weight, 1.0)
- def get_equivalent_kernel_bias(self):
- k_origin, b_origin = transI_fusebn(
- self.dbb_origin.conv.weight, self.dbb_origin.bn
- )
- if hasattr(self, "dbb_1x1"):
- k_1x1, b_1x1 = transI_fusebn(self.dbb_1x1.conv.weight, self.dbb_1x1.bn)
- k_1x1 = transVI_multiscale(k_1x1, self.kernel_size)
- else:
- k_1x1, b_1x1 = 0, 0
- if hasattr(self.dbb_1x1_kxk, "idconv1"):
- k_1x1_kxk_first = self.dbb_1x1_kxk.idconv1.get_actual_kernel()
- else:
- k_1x1_kxk_first = self.dbb_1x1_kxk.conv1.weight
- k_1x1_kxk_first, b_1x1_kxk_first = transI_fusebn(
- k_1x1_kxk_first, self.dbb_1x1_kxk.bn1
- )
- k_1x1_kxk_second, b_1x1_kxk_second = transI_fusebn(
- self.dbb_1x1_kxk.conv2.weight, self.dbb_1x1_kxk.bn2
- )
- k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(
- k_1x1_kxk_first,
- b_1x1_kxk_first,
- k_1x1_kxk_second,
- b_1x1_kxk_second,
- groups=self.groups,
- )
- k_avg = transV_avg(self.out_channels, self.kernel_size, self.groups)
- k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg, self.dbb_avg.avgbn)
- if hasattr(self.dbb_avg, "conv"):
- k_1x1_avg_first, b_1x1_avg_first = transI_fusebn(
- self.dbb_avg.conv.weight, self.dbb_avg.bn
- )
- k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(
- k_1x1_avg_first,
- b_1x1_avg_first,
- k_1x1_avg_second,
- b_1x1_avg_second,
- groups=self.groups,
- )
- else:
- k_1x1_avg_merged, b_1x1_avg_merged = k_1x1_avg_second, b_1x1_avg_second
- return transII_addbranch(
- (k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged),
- (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged),
- )
- def re_parameterize(self):
- if self.is_repped:
- return
- kernel, bias = self.get_equivalent_kernel_bias()
- self.dbb_reparam = nn.Conv2D(
- in_channels=self.dbb_origin.conv._in_channels,
- out_channels=self.dbb_origin.conv._out_channels,
- kernel_size=self.dbb_origin.conv._kernel_size,
- stride=self.dbb_origin.conv._stride,
- padding=self.dbb_origin.conv._padding,
- dilation=self.dbb_origin.conv._dilation,
- groups=self.dbb_origin.conv._groups,
- bias_attr=True,
- )
- self.dbb_reparam.weight.set_value(kernel)
- self.dbb_reparam.bias.set_value(bias)
- self.__delattr__("dbb_origin")
- self.__delattr__("dbb_avg")
- if hasattr(self, "dbb_1x1"):
- self.__delattr__("dbb_1x1")
- self.__delattr__("dbb_1x1_kxk")
- self.is_repped = True
- class Identity(nn.Layer):
- def __init__(self):
- super(Identity, self).__init__()
- def forward(self, inputs):
- return inputs
- class TheseusLayer(nn.Layer):
- def __init__(self, *args, **kwargs):
- super().__init__()
- self.res_dict = {}
- self.res_name = self.full_name()
- self.pruner = None
- self.quanter = None
- self.init_net(*args, **kwargs)
- def _return_dict_hook(self, layer, input, output):
- res_dict = {"logits": output}
- # 'list' is needed to avoid error raised by popping self.res_dict
- for res_key in list(self.res_dict):
- # clear the res_dict because the forward process may change according to input
- res_dict[res_key] = self.res_dict.pop(res_key)
- return res_dict
- def init_net(
- self,
- stages_pattern=None,
- return_patterns=None,
- return_stages=None,
- freeze_befor=None,
- stop_after=None,
- *args,
- **kwargs,
- ):
- # init the output of net
- if return_patterns or return_stages:
- if return_patterns and return_stages:
- msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
- return_stages = None
- if return_stages is True:
- return_patterns = stages_pattern
- # return_stages is int or bool
- if type(return_stages) is int:
- return_stages = [return_stages]
- if isinstance(return_stages, list):
- if max(return_stages) > len(stages_pattern) or min(return_stages) < 0:
- msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
- return_stages = [
- val
- for val in return_stages
- if val >= 0 and val < len(stages_pattern)
- ]
- return_patterns = [stages_pattern[i] for i in return_stages]
- if return_patterns:
- # call update_res function after the __init__ of the object has completed execution, that is, the constructing of layer or model has been completed.
- def update_res_hook(layer, input):
- self.update_res(return_patterns)
- self.register_forward_pre_hook(update_res_hook)
- # freeze subnet
- if freeze_befor is not None:
- self.freeze_befor(freeze_befor)
- # set subnet to Identity
- if stop_after is not None:
- self.stop_after(stop_after)
- def init_res(self, stages_pattern, return_patterns=None, return_stages=None):
- if return_patterns and return_stages:
- return_stages = None
- if return_stages is True:
- return_patterns = stages_pattern
- # return_stages is int or bool
- if type(return_stages) is int:
- return_stages = [return_stages]
- if isinstance(return_stages, list):
- if max(return_stages) > len(stages_pattern) or min(return_stages) < 0:
- return_stages = [
- val
- for val in return_stages
- if val >= 0 and val < len(stages_pattern)
- ]
- return_patterns = [stages_pattern[i] for i in return_stages]
- if return_patterns:
- self.update_res(return_patterns)
- def replace_sub(self, *args, **kwargs) -> None:
- msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead."
- raise DeprecationWarning(msg)
- def upgrade_sublayer(
- self,
- layer_name_pattern: Union[str, List[str]],
- handle_func: Callable[[nn.Layer, str], nn.Layer],
- ) -> Dict[str, nn.Layer]:
- """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'.
- Args:
- layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'.
- handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed.
- Returns:
- Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'.
- Examples:
- from paddle import nn
- import paddleclas
- def rep_func(layer: nn.Layer, pattern: str):
- new_layer = nn.Conv2D(
- in_channels=layer._in_channels,
- out_channels=layer._out_channels,
- kernel_size=5,
- padding=2
- )
- return new_layer
- net = paddleclas.MobileNetV1()
- res = net.upgrade_sublayer(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func)
- print(res)
- # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer}
- """
- if not isinstance(layer_name_pattern, list):
- layer_name_pattern = [layer_name_pattern]
- hit_layer_pattern_list = []
- for pattern in layer_name_pattern:
- # parse pattern to find target layer and its parent
- layer_list = parse_pattern_str(pattern=pattern, parent_layer=self)
- if not layer_list:
- continue
- sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self
- sub_layer = layer_list[-1]["layer"]
- sub_layer_name = layer_list[-1]["name"]
- sub_layer_index_list = layer_list[-1]["index_list"]
- new_sub_layer = handle_func(sub_layer, pattern)
- if sub_layer_index_list:
- if len(sub_layer_index_list) > 1:
- sub_layer_parent = getattr(sub_layer_parent, sub_layer_name)[
- sub_layer_index_list[0]
- ]
- for sub_layer_index in sub_layer_index_list[1:-1]:
- sub_layer_parent = sub_layer_parent[sub_layer_index]
- sub_layer_parent[sub_layer_index_list[-1]] = new_sub_layer
- else:
- getattr(sub_layer_parent, sub_layer_name)[
- sub_layer_index_list[0]
- ] = new_sub_layer
- else:
- setattr(sub_layer_parent, sub_layer_name, new_sub_layer)
- hit_layer_pattern_list.append(pattern)
- return hit_layer_pattern_list
- def stop_after(self, stop_layer_name: str) -> bool:
- """stop forward and backward after 'stop_layer_name'.
- Args:
- stop_layer_name (str): The name of layer that stop forward and backward after this layer.
- Returns:
- bool: 'True' if successful, 'False' otherwise.
- """
- layer_list = parse_pattern_str(stop_layer_name, self)
- if not layer_list:
- return False
- parent_layer = self
- for layer_dict in layer_list:
- name, index_list = layer_dict["name"], layer_dict["index_list"]
- if not set_identity(parent_layer, name, index_list):
- msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'."
- return False
- parent_layer = layer_dict["layer"]
- return True
- def freeze_befor(self, layer_name: str) -> bool:
- """freeze the layer named layer_name and its previous layer.
- Args:
- layer_name (str): The name of layer that would be freezed.
- Returns:
- bool: 'True' if successful, 'False' otherwise.
- """
- def stop_grad(layer, pattern):
- class StopGradLayer(nn.Layer):
- def __init__(self):
- super().__init__()
- self.layer = layer
- def forward(self, x):
- x = self.layer(x)
- x.stop_gradient = True
- return x
- new_layer = StopGradLayer()
- return new_layer
- res = self.upgrade_sublayer(layer_name, stop_grad)
- if len(res) == 0:
- msg = "Failed to stop the gradient before the layer named '{layer_name}'"
- return False
- return True
- def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]:
- """update the result(s) to be returned.
- Args:
- return_patterns (Union[str, List[str]]): The name of layer to return output.
- Returns:
- Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully.
- """
- # clear res_dict that could have been set
- self.res_dict = {}
- class Handler(object):
- def __init__(self, res_dict):
- # res_dict is a reference
- self.res_dict = res_dict
- def __call__(self, layer, pattern):
- layer.res_dict = self.res_dict
- layer.res_name = pattern
- if hasattr(layer, "hook_remove_helper"):
- layer.hook_remove_helper.remove()
- layer.hook_remove_helper = layer.register_forward_post_hook(
- save_sub_res_hook
- )
- return layer
- handle_func = Handler(self.res_dict)
- hit_layer_pattern_list = self.upgrade_sublayer(
- return_patterns, handle_func=handle_func
- )
- if hasattr(self, "hook_remove_helper"):
- self.hook_remove_helper.remove()
- self.hook_remove_helper = self.register_forward_post_hook(
- self._return_dict_hook
- )
- return hit_layer_pattern_list
- def save_sub_res_hook(layer, input, output):
- layer.res_dict[layer.res_name] = output
- def set_identity(
- parent_layer: nn.Layer, layer_name: str, layer_index_list: str = None
- ) -> bool:
- """set the layer specified by layer_name and layer_index_list to Identity.
- Args:
- parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index_list.
- layer_name (str): The name of target layer to be set to Identity.
- layer_index_list (str, optional): The index of target layer to be set to Identity in parent_layer. Defaults to None.
- Returns:
- bool: True if successfully, False otherwise.
- """
- stop_after = False
- for sub_layer_name in parent_layer._sub_layers:
- if stop_after:
- parent_layer._sub_layers[sub_layer_name] = Identity()
- continue
- if sub_layer_name == layer_name:
- stop_after = True
- if layer_index_list and stop_after:
- layer_container = parent_layer._sub_layers[layer_name]
- for num, layer_index in enumerate(layer_index_list):
- stop_after = False
- for i in range(num):
- layer_container = layer_container[layer_index_list[i]]
- for sub_layer_index in layer_container._sub_layers:
- if stop_after:
- parent_layer._sub_layers[layer_name][sub_layer_index] = Identity()
- continue
- if layer_index == sub_layer_index:
- stop_after = True
- return stop_after
- def parse_pattern_str(
- pattern: str, parent_layer: nn.Layer
- ) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]:
- """parse the string type pattern.
- Args:
- pattern (str): The pattern to describe layer.
- parent_layer (nn.Layer): The root layer relative to the pattern.
- Returns:
- Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order:
- [
- {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist},
- {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist},
- ...
- ]
- """
- pattern_list = pattern.split(".")
- if not pattern_list:
- msg = f"The pattern('{pattern}') is illegal. Please check and retry."
- return None
- layer_list = []
- while len(pattern_list) > 0:
- if "[" in pattern_list[0]:
- target_layer_name = pattern_list[0].split("[")[0]
- target_layer_index_list = list(
- index.split("]")[0] for index in pattern_list[0].split("[")[1:]
- )
- else:
- target_layer_name = pattern_list[0]
- target_layer_index_list = None
- target_layer = getattr(parent_layer, target_layer_name, None)
- if target_layer is None:
- msg = f"Not found layer named('{target_layer_name}') specified in pattern('{pattern}')."
- return None
- if target_layer_index_list:
- for target_layer_index in target_layer_index_list:
- if int(target_layer_index) < 0 or int(target_layer_index) >= len(
- target_layer
- ):
- msg = f"Not found layer by index('{target_layer_index}') specified in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
- return None
- target_layer = target_layer[target_layer_index]
- layer_list.append(
- {
- "layer": target_layer,
- "name": target_layer_name,
- "index_list": target_layer_index_list,
- }
- )
- pattern_list = pattern_list[1:]
- parent_layer = target_layer
- return layer_list
- class AdaptiveAvgPool2D(nn.AdaptiveAvgPool2D):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- if paddle.device.get_device().startswith("npu"):
- self.device = "npu"
- else:
- self.device = None
- if isinstance(self._output_size, int) and self._output_size == 1:
- self._gap = True
- elif (
- isinstance(self._output_size, tuple)
- and self._output_size[0] == 1
- and self._output_size[1] == 1
- ):
- self._gap = True
- else:
- self._gap = False
- def forward(self, x):
- if self.device == "npu" and self._gap:
- # Global Average Pooling
- N, C, _, _ = x.shape
- x_mean = paddle.mean(x, axis=[2, 3])
- x_mean = paddle.reshape(x_mean, [N, C, 1, 1])
- return x_mean
- else:
- return F.adaptive_avg_pool2d(
- x,
- output_size=self._output_size,
- data_format=self._data_format,
- name=self._name,
- )
- # copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import paddle
- import paddle.nn as nn
- import paddle.nn.functional as F
- from paddle.nn.initializer import KaimingNormal, Constant
- from paddle.nn import Conv2D, BatchNorm2D, ReLU, AdaptiveAvgPool2D, MaxPool2D
- from paddle.regularizer import L2Decay
- from paddle import ParamAttr
- MODEL_URLS = {
- "PPHGNetV2_B0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B0_ssld_pretrained.pdparams",
- "PPHGNetV2_B1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B1_ssld_pretrained.pdparams",
- "PPHGNetV2_B2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B2_ssld_pretrained.pdparams",
- "PPHGNetV2_B3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B3_ssld_pretrained.pdparams",
- "PPHGNetV2_B4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B4_ssld_pretrained.pdparams",
- "PPHGNetV2_B5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B5_ssld_pretrained.pdparams",
- "PPHGNetV2_B6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B6_ssld_pretrained.pdparams",
- }
- __all__ = list(MODEL_URLS.keys())
- kaiming_normal_ = KaimingNormal()
- zeros_ = Constant(value=0.0)
- ones_ = Constant(value=1.0)
- class LearnableAffineBlock(TheseusLayer):
- """
- Create a learnable affine block module. This module can significantly improve accuracy on smaller models.
- Args:
- scale_value (float): The initial value of the scale parameter, default is 1.0.
- bias_value (float): The initial value of the bias parameter, default is 0.0.
- lr_mult (float): The learning rate multiplier, default is 1.0.
- lab_lr (float): The learning rate, default is 0.01.
- """
- def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.01):
- super().__init__()
- self.scale = self.create_parameter(
- shape=[
- 1,
- ],
- default_initializer=Constant(value=scale_value),
- attr=ParamAttr(learning_rate=lr_mult * lab_lr),
- )
- self.add_parameter("scale", self.scale)
- self.bias = self.create_parameter(
- shape=[
- 1,
- ],
- default_initializer=Constant(value=bias_value),
- attr=ParamAttr(learning_rate=lr_mult * lab_lr),
- )
- self.add_parameter("bias", self.bias)
- def forward(self, x):
- return self.scale * x + self.bias
- class ConvBNAct(TheseusLayer):
- """
- ConvBNAct is a combination of convolution and batchnorm layers.
- Args:
- in_channels (int): Number of input channels.
- out_channels (int): Number of output channels.
- kernel_size (int): Size of the convolution kernel. Defaults to 3.
- stride (int): Stride of the convolution. Defaults to 1.
- padding (int/str): Padding or padding type for the convolution. Defaults to 1.
- groups (int): Number of groups for the convolution. Defaults to 1.
- use_act: (bool): Whether to use activation function. Defaults to True.
- use_lab (bool): Whether to use the LAB operation. Defaults to False.
- lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
- """
- def __init__(
- self,
- in_channels,
- out_channels,
- kernel_size=3,
- stride=1,
- padding=1,
- groups=1,
- use_act=True,
- use_lab=False,
- lr_mult=1.0,
- ):
- super().__init__()
- self.use_act = use_act
- self.use_lab = use_lab
- self.conv = Conv2D(
- in_channels,
- out_channels,
- kernel_size,
- stride,
- padding=padding if isinstance(padding, str) else (kernel_size - 1) // 2,
- groups=groups,
- weight_attr=ParamAttr(learning_rate=lr_mult),
- bias_attr=False,
- )
- self.bn = BatchNorm2D(
- out_channels,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult),
- )
- if self.use_act:
- self.act = ReLU()
- if self.use_lab:
- self.lab = LearnableAffineBlock(lr_mult=lr_mult)
- def forward(self, x):
- x = self.conv(x)
- x = self.bn(x)
- if self.use_act:
- x = self.act(x)
- if self.use_lab:
- x = self.lab(x)
- return x
- class LightConvBNAct(TheseusLayer):
- """
- LightConvBNAct is a combination of pw and dw layers.
- Args:
- in_channels (int): Number of input channels.
- out_channels (int): Number of output channels.
- kernel_size (int): Size of the depth-wise convolution kernel.
- use_lab (bool): Whether to use the LAB operation. Defaults to False.
- lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
- """
- def __init__(
- self,
- in_channels,
- out_channels,
- kernel_size,
- use_lab=False,
- lr_mult=1.0,
- **kwargs,
- ):
- super().__init__()
- self.conv1 = ConvBNAct(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=1,
- use_act=False,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.conv2 = ConvBNAct(
- in_channels=out_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- groups=out_channels,
- use_act=True,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- def forward(self, x):
- x = self.conv1(x)
- x = self.conv2(x)
- return x
- class StemBlock(TheseusLayer):
- """
- StemBlock for PP-HGNetV2.
- Args:
- in_channels (int): Number of input channels.
- mid_channels (int): Number of middle channels.
- out_channels (int): Number of output channels.
- use_lab (bool): Whether to use the LAB operation. Defaults to False.
- lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
- """
- def __init__(
- self,
- in_channels,
- mid_channels,
- out_channels,
- use_lab=False,
- lr_mult=1.0,
- text_rec=False,
- ):
- super().__init__()
- self.stem1 = ConvBNAct(
- in_channels=in_channels,
- out_channels=mid_channels,
- kernel_size=3,
- stride=2,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.stem2a = ConvBNAct(
- in_channels=mid_channels,
- out_channels=mid_channels // 2,
- kernel_size=2,
- stride=1,
- padding="SAME",
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.stem2b = ConvBNAct(
- in_channels=mid_channels // 2,
- out_channels=mid_channels,
- kernel_size=2,
- stride=1,
- padding="SAME",
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.stem3 = ConvBNAct(
- in_channels=mid_channels * 2,
- out_channels=mid_channels,
- kernel_size=3,
- stride=1 if text_rec else 2,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.stem4 = ConvBNAct(
- in_channels=mid_channels,
- out_channels=out_channels,
- kernel_size=1,
- stride=1,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.pool = nn.MaxPool2D(
- kernel_size=2, stride=1, ceil_mode=True, padding="SAME"
- )
- def forward(self, x):
- x = self.stem1(x)
- x2 = self.stem2a(x)
- x2 = self.stem2b(x2)
- x1 = self.pool(x)
- x = paddle.concat([x1, x2], 1)
- x = self.stem3(x)
- x = self.stem4(x)
- return x
- class HGV2_Block(TheseusLayer):
- """
- HGV2_Block, the basic unit that constitutes the HGV2_Stage.
- Args:
- in_channels (int): Number of input channels.
- mid_channels (int): Number of middle channels.
- out_channels (int): Number of output channels.
- kernel_size (int): Size of the convolution kernel. Defaults to 3.
- layer_num (int): Number of layers in the HGV2 block. Defaults to 6.
- stride (int): Stride of the convolution. Defaults to 1.
- padding (int/str): Padding or padding type for the convolution. Defaults to 1.
- groups (int): Number of groups for the convolution. Defaults to 1.
- use_act (bool): Whether to use activation function. Defaults to True.
- use_lab (bool): Whether to use the LAB operation. Defaults to False.
- lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
- """
- def __init__(
- self,
- in_channels,
- mid_channels,
- out_channels,
- kernel_size=3,
- layer_num=6,
- identity=False,
- light_block=True,
- use_lab=False,
- lr_mult=1.0,
- ):
- super().__init__()
- self.identity = identity
- self.layers = nn.LayerList()
- block_type = "LightConvBNAct" if light_block else "ConvBNAct"
- for i in range(layer_num):
- self.layers.append(
- eval(block_type)(
- in_channels=in_channels if i == 0 else mid_channels,
- out_channels=mid_channels,
- stride=1,
- kernel_size=kernel_size,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- )
- # feature aggregation
- total_channels = in_channels + layer_num * mid_channels
- self.aggregation_squeeze_conv = ConvBNAct(
- in_channels=total_channels,
- out_channels=out_channels // 2,
- kernel_size=1,
- stride=1,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- self.aggregation_excitation_conv = ConvBNAct(
- in_channels=out_channels // 2,
- out_channels=out_channels,
- kernel_size=1,
- stride=1,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- def forward(self, x):
- identity = x
- output = []
- output.append(x)
- for layer in self.layers:
- x = layer(x)
- output.append(x)
- x = paddle.concat(output, axis=1)
- x = self.aggregation_squeeze_conv(x)
- x = self.aggregation_excitation_conv(x)
- if self.identity:
- x += identity
- return x
- class HGV2_Stage(TheseusLayer):
- """
- HGV2_Stage, the basic unit that constitutes the PPHGNetV2.
- Args:
- in_channels (int): Number of input channels.
- mid_channels (int): Number of middle channels.
- out_channels (int): Number of output channels.
- block_num (int): Number of blocks in the HGV2 stage.
- layer_num (int): Number of layers in the HGV2 block. Defaults to 6.
- is_downsample (bool): Whether to use downsampling operation. Defaults to False.
- light_block (bool): Whether to use light block. Defaults to True.
- kernel_size (int): Size of the convolution kernel. Defaults to 3.
- use_lab (bool, optional): Whether to use the LAB operation. Defaults to False.
- lr_mult (float, optional): Learning rate multiplier for the layer. Defaults to 1.0.
- """
- def __init__(
- self,
- in_channels,
- mid_channels,
- out_channels,
- block_num,
- layer_num=6,
- is_downsample=True,
- light_block=True,
- kernel_size=3,
- use_lab=False,
- stride=2,
- lr_mult=1.0,
- ):
- super().__init__()
- self.is_downsample = is_downsample
- if self.is_downsample:
- self.downsample = ConvBNAct(
- in_channels=in_channels,
- out_channels=in_channels,
- kernel_size=3,
- stride=stride,
- groups=in_channels,
- use_act=False,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- blocks_list = []
- for i in range(block_num):
- blocks_list.append(
- HGV2_Block(
- in_channels=in_channels if i == 0 else out_channels,
- mid_channels=mid_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- layer_num=layer_num,
- identity=False if i == 0 else True,
- light_block=light_block,
- use_lab=use_lab,
- lr_mult=lr_mult,
- )
- )
- self.blocks = nn.Sequential(*blocks_list)
- def forward(self, x):
- if self.is_downsample:
- x = self.downsample(x)
- x = self.blocks(x)
- return x
- class PPHGNetV2(TheseusLayer):
- """
- PPHGNetV2
- Args:
- stage_config (dict): Config for PPHGNetV2 stages. such as the number of channels, stride, etc.
- stem_channels: (list): Number of channels of the stem of the PPHGNetV2.
- use_lab (bool): Whether to use the LAB operation. Defaults to False.
- use_last_conv (bool): Whether to use the last conv layer as the output channel. Defaults to True.
- class_expand (int): Number of channels for the last 1x1 convolutional layer.
- drop_prob (float): Dropout probability for the last 1x1 convolutional layer. Defaults to 0.0.
- class_num (int): The number of classes for the classification layer. Defaults to 1000.
- lr_mult_list (list): Learning rate multiplier for the stages. Defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
- Returns:
- model: nn.Layer. Specific PPHGNetV2 model depends on args.
- """
- def __init__(
- self,
- stage_config,
- stem_channels=[3, 32, 64],
- use_lab=False,
- use_last_conv=True,
- class_expand=2048,
- dropout_prob=0.0,
- class_num=1000,
- lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
- det=False,
- text_rec=False,
- out_indices=None,
- **kwargs,
- ):
- super().__init__()
- self.det = det
- self.text_rec = text_rec
- self.use_lab = use_lab
- self.use_last_conv = use_last_conv
- self.class_expand = class_expand
- self.class_num = class_num
- self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
- self.out_channels = []
- # stem
- self.stem = StemBlock(
- in_channels=stem_channels[0],
- mid_channels=stem_channels[1],
- out_channels=stem_channels[2],
- use_lab=use_lab,
- lr_mult=lr_mult_list[0],
- text_rec=text_rec,
- )
- # stages
- self.stages = nn.LayerList()
- for i, k in enumerate(stage_config):
- (
- in_channels,
- mid_channels,
- out_channels,
- block_num,
- is_downsample,
- light_block,
- kernel_size,
- layer_num,
- stride,
- ) = stage_config[k]
- self.stages.append(
- HGV2_Stage(
- in_channels,
- mid_channels,
- out_channels,
- block_num,
- layer_num,
- is_downsample,
- light_block,
- kernel_size,
- use_lab,
- stride,
- lr_mult=lr_mult_list[i + 1],
- )
- )
- if i in self.out_indices:
- self.out_channels.append(out_channels)
- if not self.det:
- self.out_channels = stage_config["stage4"][2]
- self.avg_pool = AdaptiveAvgPool2D(1)
- if self.use_last_conv:
- self.last_conv = Conv2D(
- in_channels=out_channels,
- out_channels=self.class_expand,
- kernel_size=1,
- stride=1,
- padding=0,
- bias_attr=False,
- )
- self.act = ReLU()
- if self.use_lab:
- self.lab = LearnableAffineBlock()
- self.dropout = nn.Dropout(p=dropout_prob, mode="downscale_in_infer")
- self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
- if not self.det:
- self.fc = nn.Linear(
- self.class_expand if self.use_last_conv else out_channels,
- self.class_num,
- )
- self._init_weights()
- def _init_weights(self):
- for m in self.sublayers():
- if isinstance(m, nn.Conv2D):
- kaiming_normal_(m.weight)
- elif isinstance(m, (nn.BatchNorm2D)):
- ones_(m.weight)
- zeros_(m.bias)
- elif isinstance(m, nn.Linear):
- zeros_(m.bias)
- def forward(self, x):
- x = self.stem(x)
- out = []
- for i, stage in enumerate(self.stages):
- x = stage(x)
- if self.det and i in self.out_indices:
- out.append(x)
- if self.det:
- return out
- if self.text_rec:
- if self.training:
- x = F.adaptive_avg_pool2d(x, [1, 40])
- else:
- x = F.avg_pool2d(x, [3, 2])
- return x
- def PPHGNetV2_B0(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B0
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B0` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [16, 16, 64, 1, False, False, 3, 3],
- "stage2": [64, 32, 256, 1, True, False, 3, 3],
- "stage3": [256, 64, 512, 2, True, True, 5, 3],
- "stage4": [512, 128, 1024, 1, True, True, 5, 3],
- }
- model = PPHGNetV2(
- stem_channels=[3, 16, 16], stage_config=stage_config, use_lab=True, **kwargs
- )
- return model
- def PPHGNetV2_B1(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B1
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B1` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [32, 32, 64, 1, False, False, 3, 3],
- "stage2": [64, 48, 256, 1, True, False, 3, 3],
- "stage3": [256, 96, 512, 2, True, True, 5, 3],
- "stage4": [512, 192, 1024, 1, True, True, 5, 3],
- }
- model = PPHGNetV2(
- stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
- )
- return model
- def PPHGNetV2_B2(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B2
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B2` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [32, 32, 96, 1, False, False, 3, 4],
- "stage2": [96, 64, 384, 1, True, False, 3, 4],
- "stage3": [384, 128, 768, 3, True, True, 5, 4],
- "stage4": [768, 256, 1536, 1, True, True, 5, 4],
- }
- model = PPHGNetV2(
- stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
- )
- return model
- def PPHGNetV2_B3(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B3
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B3` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [32, 32, 128, 1, False, False, 3, 5],
- "stage2": [128, 64, 512, 1, True, False, 3, 5],
- "stage3": [512, 128, 1024, 3, True, True, 5, 5],
- "stage4": [1024, 256, 2048, 1, True, True, 5, 5],
- }
- model = PPHGNetV2(
- stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
- )
- return model
- def PPHGNetV2_B4(pretrained=False, use_ssld=False, det=False, text_rec=False, **kwargs):
- """
- PPHGNetV2_B4
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B4` model depends on args.
- """
- stage_config_rec = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num, stride
- "stage1": [48, 48, 128, 1, True, False, 3, 6, [2, 1]],
- "stage2": [128, 96, 512, 1, True, False, 3, 6, [1, 2]],
- "stage3": [512, 192, 1024, 3, True, True, 5, 6, [2, 1]],
- "stage4": [1024, 384, 2048, 1, True, True, 5, 6, [2, 1]],
- }
- stage_config_det = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [48, 48, 128, 1, False, False, 3, 6, 2],
- "stage2": [128, 96, 512, 1, True, False, 3, 6, 2],
- "stage3": [512, 192, 1024, 3, True, True, 5, 6, 2],
- "stage4": [1024, 384, 2048, 1, True, True, 5, 6, 2],
- }
- model = PPHGNetV2(
- stem_channels=[3, 32, 48],
- stage_config=stage_config_det if det else stage_config_rec,
- use_lab=False,
- det=det,
- text_rec=text_rec,
- **kwargs,
- )
- return model
- def PPHGNetV2_B5(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B5
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B5` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [64, 64, 128, 1, False, False, 3, 6],
- "stage2": [128, 128, 512, 2, True, False, 3, 6],
- "stage3": [512, 256, 1024, 5, True, True, 5, 6],
- "stage4": [1024, 512, 2048, 2, True, True, 5, 6],
- }
- model = PPHGNetV2(
- stem_channels=[3, 32, 64], stage_config=stage_config, use_lab=False, **kwargs
- )
- return model
- def PPHGNetV2_B6(pretrained=False, use_ssld=False, **kwargs):
- """
- PPHGNetV2_B6
- Args:
- pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
- If str, means the path of the pretrained model.
- use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B6` model depends on args.
- """
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [96, 96, 192, 2, False, False, 3, 6],
- "stage2": [192, 192, 512, 3, True, False, 3, 6],
- "stage3": [512, 384, 1024, 6, True, True, 5, 6],
- "stage4": [1024, 768, 2048, 3, True, True, 5, 6],
- }
- model = PPHGNetV2(
- stem_channels=[3, 48, 96], stage_config=stage_config, use_lab=False, **kwargs
- )
- return model
- class PPHGNetV2_B4_Formula(nn.Layer):
- """
- PPHGNetV2_B4_Formula
- Args:
- in_channels (int): Number of input channels. Default is 3 (for RGB images).
- class_num (int): Number of classes for classification. Default is 1000.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B4` model with defined architecture.
- """
- def __init__(self, in_channels=3, class_num=1000):
- super().__init__()
- self.in_channels = in_channels
- self.out_channels = 2048
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [48, 48, 128, 1, False, False, 3, 6, 2],
- "stage2": [128, 96, 512, 1, True, False, 3, 6, 2],
- "stage3": [512, 192, 1024, 3, True, True, 5, 6, 2],
- "stage4": [1024, 384, 2048, 1, True, True, 5, 6, 2],
- }
- self.pphgnet_b4 = PPHGNetV2(
- stem_channels=[3, 32, 48],
- stage_config=stage_config,
- class_num=class_num,
- use_lab=False,
- )
- def forward(self, input_data):
- if self.training:
- pixel_values, label, attention_mask = input_data
- else:
- if isinstance(input_data, list):
- pixel_values = input_data[0]
- else:
- pixel_values = input_data
- num_channels = pixel_values.shape[1]
- if num_channels == 1:
- pixel_values = paddle.repeat_interleave(pixel_values, repeats=3, axis=1)
- pphgnet_b4_output = self.pphgnet_b4(pixel_values)
- b, c, h, w = pphgnet_b4_output.shape
- pphgnet_b4_output = pphgnet_b4_output.reshape([b, c, h * w]).transpose(
- [0, 2, 1]
- )
- pphgnet_b4_output = DonutSwinModelOutput(
- last_hidden_state=pphgnet_b4_output,
- pooler_output=None,
- hidden_states=None,
- attentions=False,
- reshaped_hidden_states=None,
- )
- if self.training:
- return pphgnet_b4_output, label, attention_mask
- else:
- return pphgnet_b4_output
- class PPHGNetV2_B6_Formula(nn.Layer):
- """
- PPHGNetV2_B6_Formula
- Args:
- in_channels (int): Number of input channels. Default is 3 (for RGB images).
- class_num (int): Number of classes for classification. Default is 1000.
- Returns:
- model: nn.Layer. Specific `PPHGNetV2_B6` model with defined architecture.
- """
- def __init__(self, in_channels=3, class_num=1000):
- super().__init__()
- self.in_channels = in_channels
- self.out_channels = 2048
- stage_config = {
- # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
- "stage1": [96, 96, 192, 2, False, False, 3, 6, 2],
- "stage2": [192, 192, 512, 3, True, False, 3, 6, 2],
- "stage3": [512, 384, 1024, 6, True, True, 5, 6, 2],
- "stage4": [1024, 768, 2048, 3, True, True, 5, 6, 2],
- }
- self.pphgnet_b6 = PPHGNetV2(
- stem_channels=[3, 48, 96],
- class_num=class_num,
- stage_config=stage_config,
- use_lab=False,
- )
- def forward(self, input_data):
- if self.training:
- pixel_values, label, attention_mask = input_data
- else:
- if isinstance(input_data, list):
- pixel_values = input_data[0]
- else:
- pixel_values = input_data
- num_channels = pixel_values.shape[1]
- if num_channels == 1:
- pixel_values = paddle.repeat_interleave(pixel_values, repeats=3, axis=1)
- pphgnet_b6_output = self.pphgnet_b6(pixel_values)
- b, c, h, w = pphgnet_b6_output.shape
- pphgnet_b6_output = pphgnet_b6_output.reshape([b, c, h * w]).transpose(
- [0, 2, 1]
- )
- pphgnet_b6_output = DonutSwinModelOutput(
- last_hidden_state=pphgnet_b6_output,
- pooler_output=None,
- hidden_states=None,
- attentions=False,
- reshaped_hidden_states=None,
- )
- if self.training:
- return pphgnet_b6_output, label, attention_mask
- else:
- return pphgnet_b6_output
|