layers.py 97 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545
  1. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import collections
  15. import copy
  16. import inspect
  17. import re
  18. import warnings
  19. import weakref
  20. import numpy as np
  21. import paddle
  22. from paddle import nn, profiler
  23. from paddle.autograd.backward_utils import ValueSet
  24. from paddle.base import core, framework, unique_name
  25. from paddle.base.core import VarDesc
  26. from paddle.base.dygraph import no_grad
  27. from paddle.base.dygraph.base import (
  28. _convert_into_variable,
  29. in_declarative_mode, # noqa: F401
  30. in_to_static_mode,
  31. )
  32. from paddle.base.dygraph_utils import _append_activation_in_dygraph
  33. from paddle.base.executor import Executor, global_scope
  34. from paddle.base.framework import (
  35. Parameter,
  36. Program,
  37. _current_expected_place as _get_device,
  38. convert_np_dtype_to_dtype_,
  39. default_main_program,
  40. in_dygraph_mode,
  41. in_pir_mode,
  42. name_struct,
  43. paddle_type_to_proto_type,
  44. )
  45. from paddle.base.layer_helper_base import LayerHelperBase
  46. from paddle.base.param_attr import ParamAttr
  47. from paddle.profiler.utils import in_profiler_mode
  48. from paddle.utils import deprecated
  49. __all__ = []
  50. _first_cap_re = re.compile('(.)([A-Z][a-z]+)')
  51. _all_cap_re = re.compile('([a-z])([A-Z])')
  52. def record_program_ops_pre_hook(layer, inputs):
  53. """
  54. A pre-hook to mark op numbers before enter layer.forward.
  55. """
  56. if not in_dygraph_mode():
  57. if layer._op_recorder.start < 0:
  58. layer._op_recorder.start = len(
  59. default_main_program().current_block().ops
  60. )
  61. layer._op_recorder.is_valid = True
  62. else:
  63. layer._op_recorder.is_valid = False
  64. warnings.warn(
  65. f"{layer._full_name} has recorded the op information before. Please check whether you call this layer twice."
  66. )
  67. def set_op_customized_attrs_post_hook(layer, inputs, outputs):
  68. """
  69. A post-hook to append customized attributes into all operators generated in current layer.
  70. """
  71. if not in_dygraph_mode() and layer._op_recorder.is_valid:
  72. start = layer._op_recorder.start
  73. end = len(default_main_program().current_block().ops)
  74. assert start >= 0 and end >= start
  75. ops = default_main_program().current_block().ops[start:end]
  76. layer._op_recorder.end = end
  77. layer._op_recorder.ops = ops
  78. for op in ops:
  79. for attr_name, val in layer._customized_attrs.items():
  80. op._set_attr(attr_name, val)
  81. # remove pre-hook and post-hook
  82. for hook_helper in layer._op_recorder.hooks:
  83. hook_helper.remove()
  84. def _scope_dist2single(dist_scope):
  85. mapping = {
  86. "row_parallel_linear": "linear",
  87. "column_parallel_linear": "linear",
  88. "vocab_parallel_embedding": "embedding",
  89. # "parallel_cross_entropy": "cross_entropy", while mp_layer has parallel_cross_entropy,
  90. # but there is no parameters so the mapping of parallel_cross_entropy is not necessary.
  91. }
  92. return mapping.get(dist_scope, dist_scope)
  93. def _convert_camel_to_snake(name):
  94. s1 = _first_cap_re.sub(r'\1_\2', name)
  95. return _all_cap_re.sub(r'\1_\2', s1).lower()
  96. def _addindent(string, indent):
  97. s1 = string.split('\n')
  98. if len(s1) == 1:
  99. return string
  100. s2 = []
  101. for idx, line in enumerate(s1):
  102. if idx > 0:
  103. s2.append(str((indent * ' ') + line))
  104. return s1[0] + '\n' + '\n'.join(s2)
  105. def _layer_trans_dtype(layer, dtype, excluded_layers):
  106. if type(layer) in excluded_layers:
  107. return
  108. layer._to_impl(dtype=dtype, floating_only=True, include_sublayers=False)
  109. class LayerObjectHelper(LayerHelperBase):
  110. def __init__(self, name):
  111. super().__init__(name, layer_type=name)
  112. def append_op(
  113. self,
  114. type=None,
  115. inputs=None,
  116. outputs=None,
  117. attrs=None,
  118. stop_gradient=None,
  119. ):
  120. """append an operator for this layer object.
  121. Args:
  122. type: operator type
  123. inputs: input variable of the operator
  124. dtype: data type of this parameter
  125. is_bias: if this is a bias parameter
  126. default_initializer: set the default initializer for this parameter
  127. Returns created parameter Variable.
  128. """
  129. return self.main_program.current_block().append_op(
  130. type=type,
  131. inputs=inputs,
  132. outputs=outputs,
  133. attrs=attrs,
  134. stop_gradient=stop_gradient,
  135. )
  136. def _multiple_input(self, inputs_in):
  137. inputs = inputs_in
  138. ret = []
  139. if isinstance(inputs, (list, tuple)):
  140. for inp in inputs:
  141. ret.append(self.to_variable(inp))
  142. else:
  143. ret.append(self.to_variable(inputs))
  144. return ret
  145. # TODO: make it public when we need it
  146. def _input(self, inputs_in):
  147. inputs = self._multiple_input(inputs_in)
  148. if len(inputs) != 1:
  149. raise f"{self.layer_type} layer only takes one input in"
  150. return inputs[0]
  151. def _multiple_param_attr(self, length, param_attr_in=None):
  152. param_attr = param_attr_in
  153. if isinstance(param_attr, ParamAttr):
  154. param_attr = [param_attr]
  155. if len(param_attr) != 1 and len(param_attr) != length:
  156. raise ValueError(f"parameter number mismatch in {self.name}")
  157. elif len(param_attr) == 1 and length != 1:
  158. tmp = [None] * length
  159. for i in range(length):
  160. tmp[i] = copy.deepcopy(param_attr[0])
  161. param_attr = tmp
  162. return param_attr
  163. def iter_inputs_and_params(self, inputs_in, param_attr_in=None):
  164. """Access all inputs and params one by one
  165. Args:
  166. inputs_in: inputs to be iter
  167. param_attr_in: param_attr to be iter
  168. Returns input, param_attr
  169. """
  170. param_attr_in = ParamAttr._to_attr(param_attr_in)
  171. if isinstance(param_attr_in, bool):
  172. raise ValueError(f'Param_attr should not be False in {self.name}')
  173. inputs = inputs_in if (inputs_in is not None) else []
  174. inputs = self._multiple_input(inputs)
  175. param_attrs = self._multiple_param_attr(len(inputs), param_attr_in)
  176. yield from zip(inputs, param_attrs)
  177. def input_dtype(self, inputs_in):
  178. """Get input data type
  179. Args:
  180. inputs_in: inputs wanted know the data type
  181. Returns dtype of the input
  182. """
  183. inputs_in = inputs_in if (inputs_in is not None) else []
  184. inputs = self._multiple_input(inputs_in)
  185. dtype = None
  186. for each in inputs:
  187. if dtype is None:
  188. dtype = each.dtype
  189. elif dtype != each.dtype:
  190. raise ValueError(
  191. "Data Type mismatch: %d to %d in %s"
  192. % (dtype, each.dtype, self.name)
  193. )
  194. return dtype
  195. def get_parameter(self, name):
  196. """Get parameter specifically
  197. Args:
  198. name: parameter's name
  199. Returns target parameter
  200. """
  201. param = self.main_program.global_block().var(name)
  202. if not isinstance(param, Parameter):
  203. raise ValueError(f"no Parameter name {name} found in {self.name}")
  204. return param
  205. # TODO: this should not be called anymore after all activation func move to Layers
  206. def append_activation(self, input_var, act=None, use_cudnn=None):
  207. """Append activation
  208. Args:
  209. input_var: the input variable. The len(input_var.shape) is
  210. larger or equal than 2.
  211. act: activation type
  212. use_cudnn: if use cudnn
  213. Return the Variable of after append activation
  214. """
  215. act = act
  216. if act is None:
  217. return input_var
  218. if isinstance(act, str):
  219. act = {'type': act}
  220. else:
  221. raise TypeError(
  222. str(act) + " should be unicode or str in %s ", self.name
  223. )
  224. if (use_cudnn is not None) and use_cudnn:
  225. act['use_cudnn'] = use_cudnn
  226. act_type = act.pop('type')
  227. if in_dygraph_mode():
  228. res = _append_activation_in_dygraph(input_var, act_type, use_cudnn)
  229. return res
  230. else:
  231. tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
  232. self.append_op(
  233. type=act_type,
  234. inputs={"X": [input_var]},
  235. outputs={"Out": [tmp]},
  236. attrs=act,
  237. )
  238. return tmp
  239. def is_instance(self, param, cls):
  240. """Check if the input parameter is instance of input class
  241. Args:
  242. param: parameter to be check
  243. cls: class of the parameter
  244. Return result of the check (True or False)
  245. """
  246. param = param
  247. if not isinstance(param, cls):
  248. raise TypeError(
  249. "The input {0} parameter of method {1} must be {2}, in layer {3}",
  250. param,
  251. self.layer_type,
  252. cls.__name__,
  253. self.name,
  254. )
  255. class LayerOpsRecorder:
  256. """
  257. Record generated operators information in nn.Layer.
  258. """
  259. def __init__(self, start=-1, end=-1, ops=None, is_valid=False, hooks=None):
  260. self.start = start
  261. self.end = end
  262. self.ops = ops
  263. self.is_valid = is_valid
  264. self.hooks = hooks
  265. class HookRemoveHelper:
  266. """A HookRemoveHelper that can be used to remove hook."""
  267. next_hook_id = 0
  268. def __init__(self, hooks):
  269. self._hooks_ref = weakref.ref(hooks)
  270. self._hook_id = HookRemoveHelper.next_hook_id
  271. HookRemoveHelper.next_hook_id += 1
  272. def remove(self):
  273. hooks = self._hooks_ref()
  274. if hooks is not None and self._hook_id in hooks:
  275. del hooks[self._hook_id]
  276. class Layer:
  277. """
  278. Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
  279. Parameters:
  280. name_scope (str, optional): prefix name used by the layer to name parameters.
  281. If prefix is "my_layer", parameter name in MyLayer
  282. can be "my_layer_0.w_n", where "w" is the parameter
  283. base name and "n" is an unique suffix auto-generated.
  284. If None, prefix name will be snake cased class name. Default: None.
  285. dtype(str, optional): data type of this parameter.
  286. If set str, it can be "bool", "float16", "float32", "float64",
  287. "int8", "int16", "int32", "int64", "uint8" or "uint16".
  288. Default: "float32"
  289. Returns:
  290. None
  291. Examples:
  292. .. code-block:: python
  293. >>> import paddle
  294. >>> paddle.seed(100)
  295. >>> class MyLayer(paddle.nn.Layer):
  296. ... def __init__(self):
  297. ... super().__init__()
  298. ... self._linear = paddle.nn.Linear(1, 1)
  299. ... self._dropout = paddle.nn.Dropout(p=0.5)
  300. ...
  301. ... def forward(self, input):
  302. ... temp = self._linear(input)
  303. ... temp = self._dropout(temp)
  304. ... return temp
  305. ...
  306. >>> x = paddle.randn([10, 1], 'float32')
  307. >>> mylayer = MyLayer()
  308. >>> mylayer.eval() # set mylayer._dropout to eval mode
  309. >>> out = mylayer(x)
  310. >>> mylayer.train() # set mylayer._dropout to train mode
  311. >>> out = mylayer(x)
  312. >>> print(out)
  313. Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  314. [[-3.44879317],
  315. [ 0. ],
  316. [ 0. ],
  317. [-0.73825276],
  318. [ 0. ],
  319. [ 0. ],
  320. [ 0.64444798],
  321. [-3.22185946],
  322. [ 0. ],
  323. [-0.68077987]])
  324. """
  325. def __init__(self, name_scope=None, dtype="float32"):
  326. self.training = True
  327. if name_scope is None:
  328. name_scope = _convert_camel_to_snake(self.__class__.__name__)
  329. name_scope = _scope_dist2single(name_scope)
  330. self._full_name = unique_name.generate(name_scope)
  331. self._helper = LayerObjectHelper(self._full_name)
  332. self._built = False
  333. self._dtype = dtype
  334. self._init_in_dynamic_mode = in_dygraph_mode()
  335. self._parameters = collections.OrderedDict()
  336. # Buffers the variable (not parameter) created in layer
  337. self._buffers = collections.OrderedDict()
  338. self._non_persistable_buffer_names_set = set()
  339. self._sub_layers = collections.OrderedDict()
  340. self._loaddict_holder = collections.OrderedDict()
  341. # Record generated op_descs in this layer
  342. self._op_recorder = LayerOpsRecorder(ops=[], hooks=[])
  343. self._customized_attrs = {}
  344. self._forward_pre_hooks = collections.OrderedDict()
  345. self._forward_post_hooks = collections.OrderedDict()
  346. # only used in AMP Training
  347. self._cast_to_low_precision = True
  348. self._state_dict_hooks = collections.OrderedDict()
  349. # Records original functions after @to_static to support to rollback
  350. self._original_funcs = collections.OrderedDict()
  351. def train(self):
  352. """
  353. Sets this Layer and all its sublayers to training mode.
  354. This only effects certain modules like `Dropout` and `BatchNorm`.
  355. Returns:
  356. None
  357. Examples:
  358. .. code-block:: python
  359. >>> import paddle
  360. >>> paddle.seed(100)
  361. >>> class MyLayer(paddle.nn.Layer):
  362. ... def __init__(self):
  363. ... super().__init__()
  364. ... self._linear = paddle.nn.Linear(1, 1)
  365. ... self._dropout = paddle.nn.Dropout(p=0.5)
  366. ...
  367. ... def forward(self, input):
  368. ... temp = self._linear(input)
  369. ... temp = self._dropout(temp)
  370. ... return temp
  371. ...
  372. >>> x = paddle.randn([10, 1], 'float32')
  373. >>> mylayer = MyLayer()
  374. >>> mylayer.eval() # set mylayer._dropout to eval mode
  375. >>> out = mylayer(x)
  376. >>> mylayer.train() # set mylayer._dropout to train mode
  377. >>> out = mylayer(x)
  378. >>> print(out)
  379. Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  380. [[-3.44879317],
  381. [ 0. ],
  382. [ 0. ],
  383. [-0.73825276],
  384. [ 0. ],
  385. [ 0. ],
  386. [ 0.64444798],
  387. [-3.22185946],
  388. [ 0. ],
  389. [-0.68077987]])
  390. """
  391. # global setting in dygraph
  392. # NOTE(chenweihang): nn.Layer also can be used in static mode,
  393. # but _dygraph_tracer() can not be called in static mode
  394. if in_dygraph_mode():
  395. framework._dygraph_tracer().train_mode()
  396. # Layer-level setting
  397. self.training = True
  398. for layer in self.sublayers():
  399. layer.training = True
  400. def eval(self):
  401. """
  402. Sets this Layer and all its sublayers to evaluation mode.
  403. This only effects certain modules like `Dropout` and `BatchNorm`.
  404. Returns:
  405. None
  406. Example::
  407. .. code-block:: python
  408. >>> import paddle
  409. >>> paddle.seed(100)
  410. >>> class MyLayer(paddle.nn.Layer):
  411. ... def __init__(self):
  412. ... super().__init__()
  413. ... self._linear = paddle.nn.Linear(1, 1)
  414. ... self._dropout = paddle.nn.Dropout(p=0.5)
  415. ...
  416. ... def forward(self, input):
  417. ... temp = self._linear(input)
  418. ... temp = self._dropout(temp)
  419. ... return temp
  420. ...
  421. >>> x = paddle.randn([10, 1], 'float32')
  422. >>> mylayer = MyLayer()
  423. >>> mylayer.eval() # set mylayer._dropout to eval mode
  424. >>> out = mylayer(x)
  425. >>> print(out)
  426. Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  427. [[-1.72439659],
  428. [ 0.31532824],
  429. [ 0.01192369],
  430. [-0.36912638],
  431. [-1.63426113],
  432. [-0.93169814],
  433. [ 0.32222399],
  434. [-1.61092973],
  435. [ 0.77209264],
  436. [-0.34038994]])
  437. """
  438. # global setting in dygraph
  439. # NOTE(chenweihang): nn.Layer also can be used in static mode,
  440. # but _dygraph_tracer() can not be called in static mode
  441. if in_dygraph_mode():
  442. framework._dygraph_tracer().eval_mode()
  443. # Layer-level setting
  444. self.training = False
  445. for layer in self.sublayers():
  446. layer.training = False
  447. def apply(self, fn):
  448. """
  449. Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
  450. as well as self. Typical use includes initializing the parameters of a model.
  451. Parameters:
  452. fn (function): a function to be applied to each sublayer
  453. Returns:
  454. Layer, self
  455. Example::
  456. .. code-block:: python
  457. >>> import paddle
  458. >>> import paddle.nn as nn
  459. >>> paddle.seed(2023)
  460. >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
  461. >>> def init_weights(layer):
  462. ... if type(layer) == nn.Linear:
  463. ... print('before init weight:', layer.weight.numpy())
  464. ... new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
  465. ... layer.weight.set_value(new_weight)
  466. ... print('after init weight:', layer.weight.numpy())
  467. ...
  468. >>> net.apply(init_weights)
  469. >>> print(net.state_dict())
  470. before init weight: [[ 0.89611185 0.04935038]
  471. [-0.5888344 0.99266374]]
  472. after init weight: [[0.9 0.9]
  473. [0.9 0.9]]
  474. before init weight: [[-0.18615901 -0.22924072]
  475. [ 1.1517721 0.59859073]]
  476. after init weight: [[0.9 0.9]
  477. [0.9 0.9]]
  478. OrderedDict([('0.weight', Parameter containing:
  479. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
  480. [[0.89999998, 0.89999998],
  481. [0.89999998, 0.89999998]])), ('0.bias', Parameter containing:
  482. Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
  483. [0., 0.])), ('1.weight', Parameter containing:
  484. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
  485. [[0.89999998, 0.89999998],
  486. [0.89999998, 0.89999998]])), ('1.bias', Parameter containing:
  487. Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
  488. [0., 0.]))])
  489. """
  490. for layer in self.children():
  491. layer.apply(fn)
  492. fn(self)
  493. return self
  494. def full_name(self):
  495. """
  496. Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
  497. Returns:
  498. str, full name of this layer.
  499. Example::
  500. .. code-block:: python
  501. >>> import paddle
  502. >>> class LinearNet(paddle.nn.Layer):
  503. ... def __init__(self):
  504. ... super().__init__(name_scope = "demo_linear_net")
  505. ... self._linear = paddle.nn.Linear(1, 1)
  506. ...
  507. ... def forward(self, x):
  508. ... return self._linear(x)
  509. ...
  510. >>> linear_net = LinearNet()
  511. >>> print(linear_net.full_name())
  512. demo_linear_net_0
  513. """
  514. return self._full_name
  515. def register_forward_post_hook(self, hook):
  516. """
  517. Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
  518. It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
  519. User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
  520. hook(Layer, input, output) -> None or modified output
  521. Parameters:
  522. hook(function): a function registered as a forward post-hook
  523. Returns:
  524. HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
  525. Examples:
  526. .. code-block:: python
  527. >>> import paddle
  528. >>> import numpy as np
  529. >>> # the forward_post_hook change the output of the layer: output = output * 2
  530. >>> def forward_post_hook(layer, input, output):
  531. ... # user can use layer, input and output for information statistics tasks
  532. ...
  533. ... # change the output
  534. ... return output * 2
  535. ...
  536. >>> linear = paddle.nn.Linear(13, 5)
  537. >>> # register the hook
  538. >>> forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)
  539. >>> value1 = np.arange(26).reshape(2, 13).astype("float32")
  540. >>> in1 = paddle.to_tensor(value1)
  541. >>> out0 = linear(in1)
  542. >>> # remove the hook
  543. >>> forward_post_hook_handle.remove()
  544. >>> out1 = linear(in1)
  545. >>> # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
  546. >>> assert (out0.numpy() == (out1.numpy()) * 2).any()
  547. """
  548. hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
  549. self._forward_post_hooks[hook_remove_helper._hook_id] = hook
  550. return hook_remove_helper
  551. def register_forward_pre_hook(self, hook):
  552. """
  553. Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
  554. It should have the following form, `input` of the `hook` is `input` of the `Layer`,
  555. hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
  556. a single value is returned(unless that value is already a tuple).
  557. User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.
  558. hook(Layer, input) -> None or modified input
  559. Parameters:
  560. hook(function): a function registered as a forward pre-hook
  561. Returns:
  562. HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
  563. Examples:
  564. .. code-block:: python
  565. >>> import paddle
  566. >>> import numpy as np
  567. >>> # the forward_pre_hook change the input of the layer: input = input * 2
  568. >>> def forward_pre_hook(layer, input):
  569. ... # user can use layer and input for information statistics tasks
  570. ...
  571. ... # change the input
  572. ... input_return = (input[0] * 2)
  573. ... return input_return
  574. ...
  575. >>> linear = paddle.nn.Linear(13, 5)
  576. >>> # register the hook
  577. >>> forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)
  578. >>> value0 = np.arange(26).reshape(2, 13).astype("float32")
  579. >>> in0 = paddle.to_tensor(value0)
  580. >>> out0 = linear(in0)
  581. >>> # remove the hook
  582. >>> forward_pre_hook_handle.remove()
  583. >>> value1 = value0 * 2
  584. >>> in1 = paddle.to_tensor(value1)
  585. >>> out1 = linear(in1)
  586. >>> # hook change the linear's input to input * 2, so out0 is equal to out1.
  587. >>> assert (out0.numpy() == out1.numpy()).any()
  588. """
  589. hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks)
  590. self._forward_pre_hooks[hook_remove_helper._hook_id] = hook
  591. return hook_remove_helper
  592. def create_parameter(
  593. self,
  594. shape,
  595. attr=None,
  596. dtype=None,
  597. is_bias=False,
  598. default_initializer=None,
  599. ):
  600. """Create parameters for this layer.
  601. Parameters:
  602. shape(list): Shape of the parameter. The data type in the list must be int.
  603. attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None.
  604. dtype(str, optional): Data type of this parameter.
  605. If set str, it can be "bool", "float16", "float32", "float64",
  606. "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
  607. is_bias(bool, optional): if this is a bias parameter. Default: False.
  608. default_initializer(Initializer, optional): the default initializer for this parameter.
  609. If set None, default initializer will be set to paddle.nn.initializer.Xavier and paddle.nn.initializer.Constant
  610. for non-bias and bias parameter, respectively. Default: None.
  611. Returns:
  612. :Tensor, created parameter.
  613. Examples:
  614. .. code-block:: python
  615. >>> import paddle
  616. >>> paddle.seed(2023)
  617. >>> class MyLayer(paddle.nn.Layer):
  618. ... def __init__(self):
  619. ... super().__init__()
  620. ... self._linear = paddle.nn.Linear(1, 1)
  621. ... w_tmp = self.create_parameter([1,1])
  622. ... self.add_parameter("w_tmp", w_tmp)
  623. ...
  624. ... def forward(self, input):
  625. ... return self._linear(input)
  626. ...
  627. >>> mylayer = MyLayer()
  628. >>> for name, param in mylayer.named_parameters():
  629. ... print(name, param) # will print w_tmp,_linear.weight,_linear.bias
  630. w_tmp Parameter containing:
  631. Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  632. [[0.06979191]])
  633. _linear.weight Parameter containing:
  634. Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  635. [[1.26729357]])
  636. _linear.bias Parameter containing:
  637. Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
  638. [0.])
  639. """
  640. temp_attr = copy.deepcopy(attr)
  641. if isinstance(temp_attr, str) and temp_attr == "":
  642. temp_attr = None
  643. return self._helper.create_parameter(
  644. temp_attr, shape, dtype, is_bias, default_initializer
  645. )
  646. @deprecated(
  647. since="2.0.0",
  648. update_to="paddle.nn.Layer.create_tensor",
  649. reason="New api in create_tensor, easier to use.",
  650. )
  651. def create_variable(self, name=None, persistable=None, dtype=None):
  652. """
  653. Create Tensor for this layer.
  654. Parameters:
  655. name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None
  656. persistable(bool, optional): if set this tensor persistable. Default: False
  657. dtype(str, optional): data type of this parameter. If set str, it can be "bool", "float16", "float32", "float64","int8", "int16", "int32", "int64", "uint8" or "uint16". If set None, it will be "float32". Default: None
  658. Returns:
  659. Tensor, created Tensor.
  660. Examples:
  661. .. code-block:: python
  662. >>> import paddle
  663. >>> class MyLinear(paddle.nn.Layer):
  664. ... def __init__(self,
  665. ... in_features,
  666. ... out_features):
  667. ... super().__init__()
  668. ... self.linear = paddle.nn.Linear( 10, 10)
  669. ...
  670. ... self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
  671. ...
  672. ... def forward(self, input):
  673. ... out = self.linear(input)
  674. ... paddle.assign( out, self.back_var)
  675. ...
  676. ... return out
  677. """
  678. if name is not None:
  679. var_name = ".".join([self._full_name, name])
  680. else:
  681. var_name = unique_name.generate(
  682. ".".join([self._full_name, "_generated_var"])
  683. )
  684. return self._helper.main_program.current_block().create_var(
  685. name=var_name,
  686. persistable=persistable,
  687. dtype=dtype,
  688. type=core.VarDesc.VarType.LOD_TENSOR,
  689. )
  690. # TODO: Add more parameter list when we need them
  691. def create_tensor(self, name=None, persistable=None, dtype=None):
  692. """
  693. Create Tensor for this layer.
  694. Parameters:
  695. name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None.
  696. persistable(bool, optional): if set this tensor persistable. Default: False.
  697. dtype(str, optional): data type of this parameter.
  698. If set str, it can be "bool", "float16", "float32", "float64",
  699. "int8", "int16", "int32", "int64", "uint8" or "uint16".
  700. If set None, it will be "float32". Default: None.
  701. Returns:
  702. Tensor, created Tensor.
  703. Examples:
  704. .. code-block:: python
  705. >>> import paddle
  706. >>> class MyLinear(paddle.nn.Layer):
  707. ... def __init__(self,
  708. ... in_features,
  709. ... out_features):
  710. ... super().__init__()
  711. ... self.linear = paddle.nn.Linear(10, 10)
  712. ...
  713. ... self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
  714. ...
  715. ... def forward(self, input):
  716. ... out = self.linear(input)
  717. ... paddle.assign(out, self.back_var)
  718. ...
  719. ... return out
  720. """
  721. if name is not None:
  722. var_name = ".".join([self._full_name, name])
  723. else:
  724. var_name = unique_name.generate(
  725. ".".join([self._full_name, "_generated_var"])
  726. )
  727. return self._helper.main_program.current_block().create_var(
  728. name=var_name,
  729. persistable=persistable,
  730. dtype=dtype,
  731. type=core.VarDesc.VarType.LOD_TENSOR,
  732. )
  733. def parameters(self, include_sublayers=True):
  734. """
  735. Returns a list of all Parameters from current layer and its sub-layers.
  736. Parameters:
  737. include_sublayers (bool, optional): Whether to return the parameters of the sublayer.
  738. If True, the returned list contains the parameters of the sublayer.
  739. Default: True.
  740. Returns:
  741. list of Tensor, a list of Parameters.
  742. Examples:
  743. .. code-block:: python
  744. >>> import paddle
  745. >>> paddle.seed(100)
  746. >>> linear = paddle.nn.Linear(1, 1)
  747. >>> print(linear.parameters())
  748. [Parameter containing:
  749. Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  750. [[0.18551230]]), Parameter containing:
  751. Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
  752. [0.])]
  753. """
  754. ret = [
  755. param
  756. for _, param in self.named_parameters(
  757. include_sublayers=include_sublayers
  758. )
  759. ]
  760. return ret
  761. def astype(self, dtype=None):
  762. """
  763. Casts all parameters and buffers to dtype and then return the Layer.
  764. Parameters:
  765. dtype(str|paddle.dtype|numpy.dtype): target data type of layer.
  766. If set str, it can be "bool", "bfloat16", "float16", "float32", "float64",
  767. "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
  768. Default: None
  769. Returns:
  770. Layer, self
  771. Examples:
  772. .. code-block:: python
  773. >>> import paddle
  774. >>> import paddle.nn as nn
  775. >>> weight_attr = paddle.ParamAttr(name="weight",initializer=paddle.nn.initializer.Constant(value=1.5))
  776. >>> bias_attr = paddle.ParamAttr(name="bias",initializer=paddle.nn.initializer.Constant(value=2.5))
  777. >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr).to(device="cpu",dtype="float32")
  778. >>> print(linear)
  779. Linear(in_features=2, out_features=2, dtype=float32)
  780. >>> print(linear.parameters())
  781. [Parameter containing:
  782. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
  783. [[1.50000000, 1.50000000],
  784. [1.50000000, 1.50000000]]), Parameter containing:
  785. Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
  786. [2.50000000, 2.50000000])]
  787. >>> linear=linear.astype("int8")
  788. >>> print(linear)
  789. Linear(in_features=2, out_features=2, dtype=paddle.int8)
  790. >>> print(linear.parameters())
  791. [Parameter containing:
  792. Tensor(shape=[2, 2], dtype=int8, place=Place(cpu), stop_gradient=False,
  793. [[1, 1],
  794. [1, 1]]), Parameter containing:
  795. Tensor(shape=[2], dtype=int8, place=Place(cpu), stop_gradient=False,
  796. [2, 2])]
  797. """
  798. valid_dtypes = [
  799. "bfloat16",
  800. "float16",
  801. "float32",
  802. "float64",
  803. "int8",
  804. "int16",
  805. "int32",
  806. "int64",
  807. "uint8",
  808. "complex64",
  809. "complex128",
  810. "bool",
  811. ]
  812. if (
  813. isinstance(dtype, (paddle.dtype, np.dtype))
  814. or type(dtype) is str
  815. and dtype in valid_dtypes
  816. ):
  817. if isinstance(dtype, (str, np.dtype)):
  818. dtype = framework.convert_np_dtype_to_dtype_(dtype)
  819. self._dtype = dtype
  820. for layer in self.sublayers():
  821. layer._dtype = dtype
  822. for _, param in self.named_parameters(include_sublayers=True):
  823. param._to(None, dtype)
  824. for _, buffer in self.named_buffers(include_sublayers=True):
  825. buffer.to(None, dtype)
  826. return self
  827. else:
  828. raise ValueError(
  829. "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'complex64', 'complex128', 'bool', or paddle.dtype, numpy.dtype, but receive "
  830. + str(dtype)
  831. )
  832. def children(self):
  833. """
  834. Returns an iterator over immediate children layers.
  835. Yields:
  836. Layer: a child layer
  837. Examples:
  838. .. code-block:: python
  839. >>> import paddle
  840. >>> linear1 = paddle.nn.Linear(10, 3)
  841. >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
  842. >>> model = paddle.nn.Sequential(linear1, linear2)
  843. >>> layer_list = list(model.children())
  844. >>> print(layer_list)
  845. [Linear(in_features=10, out_features=3, dtype=float32), Linear(in_features=3, out_features=10, dtype=float32)]
  846. """
  847. for _, layer in self.named_children():
  848. yield layer
  849. def named_children(self):
  850. """Returns an iterator over immediate children layers, yielding both
  851. the name of the layer as well as the layer itself.
  852. Yields:
  853. (string, Layer): Tuple containing a name and child layer
  854. Examples:
  855. .. code-block:: python
  856. >>> import paddle
  857. >>> linear1 = paddle.nn.Linear(10, 3)
  858. >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
  859. >>> model = paddle.nn.Sequential(linear1, linear2)
  860. >>> for prefix, layer in model.named_children():
  861. ... print(prefix, layer)
  862. 0 Linear(in_features=10, out_features=3, dtype=float32)
  863. 1 Linear(in_features=3, out_features=10, dtype=float32)
  864. """
  865. memo = set()
  866. for name, layer in self._sub_layers.items():
  867. if layer is not None and layer not in memo:
  868. memo.add(layer)
  869. yield name, layer
  870. def sublayers(self, include_self=False):
  871. """
  872. Returns a list of sub layers.
  873. Parameters:
  874. include_self(bool, optional): Whether return self as sublayers. Default: False.
  875. Returns:
  876. list of Layer, a list of sub layers.
  877. Examples:
  878. .. code-block:: python
  879. >>> import paddle
  880. >>> class MyLayer(paddle.nn.Layer):
  881. ... def __init__(self):
  882. ... super().__init__()
  883. ... self._linear = paddle.nn.Linear(1, 1)
  884. ... self._dropout = paddle.nn.Dropout(p=0.5)
  885. ...
  886. ... def forward(self, input):
  887. ... temp = self._linear(input)
  888. ... temp = self._dropout(temp)
  889. ... return temp
  890. ...
  891. >>> mylayer = MyLayer()
  892. >>> print(mylayer.sublayers())
  893. [Linear(in_features=1, out_features=1, dtype=float32), Dropout(p=0.5, axis=None, mode=upscale_in_train)]
  894. """
  895. ret = [
  896. layer
  897. for _, layer in self.named_sublayers(include_self=include_self)
  898. ]
  899. return ret
  900. def named_parameters(self, prefix='', include_sublayers=True):
  901. """
  902. Returns an iterator over all parameters in the Layer, yielding tuple of name and parameter.
  903. Parameters:
  904. prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
  905. include_sublayers(bool, optional): Whether include the parameters of sublayers.
  906. If True, also include the named parameters from sublayers. Default: True.
  907. Yields:
  908. (string, Parameter): Tuple of name and Parameter
  909. Examples:
  910. .. code-block:: python
  911. >>> import paddle
  912. >>> paddle.seed(100)
  913. >>> fc1 = paddle.nn.Linear(10, 3)
  914. >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
  915. >>> model = paddle.nn.Sequential(fc1, fc2)
  916. >>> for name, param in model.named_parameters():
  917. ... print(name, param)
  918. 0.weight Parameter containing:
  919. Tensor(shape=[10, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
  920. [[ 0.07276392, -0.39791510, -0.66356444],
  921. [ 0.02143478, -0.18519843, -0.32485050],
  922. [-0.42249614, 0.08450919, -0.66838276],
  923. [ 0.38208580, -0.24303678, 0.55127048],
  924. [ 0.47745085, 0.62117910, -0.08336520],
  925. [-0.28653207, 0.47237599, -0.05868882],
  926. [-0.14385653, 0.29945642, 0.12832761],
  927. [-0.21237159, 0.38539791, -0.62760031],
  928. [ 0.02637231, 0.20621127, 0.43255770],
  929. [-0.19984481, -0.26259184, -0.29696006]])
  930. 0.bias Parameter containing:
  931. Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=False,
  932. [0., 0., 0.])
  933. 1.weight Parameter containing:
  934. Tensor(shape=[3, 10], dtype=float32, place=Place(cpu), stop_gradient=False,
  935. [[ 0.01985580, -0.40268910, 0.41172385, -0.47249708, -0.09002256,
  936. -0.00533628, -0.52048630, 0.62360322, 0.20848787, -0.02033746],
  937. [ 0.58281910, 0.12841827, 0.12907702, 0.02325618, -0.07746267,
  938. 0.31950659, -0.37924835, -0.59209681, -0.11732036, -0.58378261],
  939. [-0.62100595, 0.22293305, 0.28229684, -0.03687060, -0.59323978,
  940. 0.08411229, 0.53275704, 0.40431368, 0.03171402, -0.17922515]])
  941. """
  942. params_set = (
  943. ValueSet() if in_pir_mode() and not in_to_static_mode() else set()
  944. )
  945. named_sublayers = (
  946. self.named_sublayers(prefix=prefix, include_self=True)
  947. if include_sublayers
  948. else zip([prefix], [self])
  949. )
  950. for layer_prefix, sublayer in named_sublayers:
  951. params = sublayer._parameters.items()
  952. for key, param in params:
  953. if param is None or param in params_set:
  954. continue
  955. params_set.add(param)
  956. name = layer_prefix + ('.' if layer_prefix else '') + key
  957. yield name, param
  958. def named_sublayers(self, prefix='', include_self=False, layers_set=None):
  959. """
  960. Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
  961. The duplicate sublayer will only be yielded once.
  962. Parameters:
  963. prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
  964. include_self(bool, optional): Whether include the Layer itself. Default: False.
  965. layers_set(set, optional): The set to record duplicate sublayers. Default: None.
  966. Yields:
  967. (string, Layer): Tuple of name and Layer
  968. Examples:
  969. .. code-block:: python
  970. >>> import paddle
  971. >>> fc1 = paddle.nn.Linear(10, 3)
  972. >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
  973. >>> model = paddle.nn.Sequential(fc1, fc2)
  974. >>> for prefix, layer in model.named_sublayers():
  975. ... print(prefix, layer)
  976. 0 Linear(in_features=10, out_features=3, dtype=float32)
  977. 1 Linear(in_features=3, out_features=10, dtype=float32)
  978. """
  979. if layers_set is None:
  980. layers_set = set()
  981. if include_self and self not in layers_set:
  982. layers_set.add(self)
  983. yield prefix, self
  984. for key, layer in self._sub_layers.items():
  985. if layer is None:
  986. continue
  987. layer_prefix = prefix + ('.' if prefix else '') + key
  988. yield from layer.named_sublayers(
  989. prefix=layer_prefix, include_self=True, layers_set=layers_set
  990. )
  991. def register_buffer(self, name, tensor, persistable=True):
  992. """
  993. Registers a tensor as buffer into the layer.
  994. `buffer` is a non-trainable tensor and will not be updated by optimizer,
  995. but is necessary for evaluation and inference. For example, the mean and variance in BatchNorm layers.
  996. The registered buffer is persistable by default, and will be saved into
  997. `state_dict` alongside parameters. If set persistable=False, it registers
  998. a non-persistable buffer, so that it will not be a part of `state_dict` .
  999. Buffers can be accessed as attributes using given names.
  1000. Parameters:
  1001. name (string): name of the buffer. The buffer can be accessed
  1002. from this layer using the given name
  1003. tensor (Tensor): the tensor to be registered as buffer.
  1004. persistable (bool): whether the buffer is part of this layer's
  1005. state_dict.
  1006. Returns:
  1007. None
  1008. Examples:
  1009. .. code-block:: python
  1010. >>> import numpy as np
  1011. >>> import paddle
  1012. >>> linear = paddle.nn.Linear(10, 3)
  1013. >>> value = np.array([0]).astype("float32")
  1014. >>> buffer = paddle.to_tensor(value)
  1015. >>> linear.register_buffer("buf_name", buffer, persistable=True)
  1016. >>> # get the buffer by attribute.
  1017. >>> print(linear.buf_name)
  1018. Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
  1019. [0.])
  1020. """
  1021. if '_buffers' not in self.__dict__:
  1022. raise ValueError("super().__init__() should be called first")
  1023. elif not isinstance(name, str):
  1024. raise TypeError(
  1025. f"The name of buffer should be a string, but received {type(name).__name__}."
  1026. )
  1027. elif '.' in name:
  1028. raise KeyError(
  1029. "The name of buffer can not contain `.`, "
  1030. "because when you access the newly added buffer in the "
  1031. "form of `self.**.**`, it will cause AttributeError."
  1032. )
  1033. elif name == '':
  1034. raise KeyError("The name of buffer can not be empty.")
  1035. elif hasattr(self, name) and name not in self._buffers:
  1036. raise KeyError(f"attribute '{name}' already exists.")
  1037. elif tensor is not None and not (type(tensor) == core.eager.Tensor):
  1038. raise TypeError(
  1039. f"The registered buffer should be a Paddle.Tensor, but received {type(tensor).__name__}."
  1040. )
  1041. else:
  1042. self._buffers[name] = tensor
  1043. if persistable:
  1044. self._non_persistable_buffer_names_set.discard(name)
  1045. else:
  1046. self._non_persistable_buffer_names_set.add(name)
  1047. def buffers(self, include_sublayers=True):
  1048. """
  1049. Returns a list of all buffers from current layer and its sub-layers.
  1050. Parameters:
  1051. include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True.
  1052. Returns:
  1053. list of Tensor, a list of buffers.
  1054. Examples:
  1055. .. code-block:: python
  1056. >>> import numpy as np
  1057. >>> import paddle
  1058. >>> linear = paddle.nn.Linear(10, 3)
  1059. >>> value = np.array([0]).astype("float32")
  1060. >>> buffer = paddle.to_tensor(value)
  1061. >>> linear.register_buffer("buf_name", buffer, persistable=True)
  1062. >>> print(linear.buffers())
  1063. [Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
  1064. [0.])]
  1065. """
  1066. ret = [
  1067. buffer
  1068. for _, buffer in self.named_buffers(
  1069. include_sublayers=include_sublayers
  1070. )
  1071. ]
  1072. return ret
  1073. def named_buffers(self, prefix='', include_sublayers=True):
  1074. """
  1075. Returns an iterator over all buffers in the Layer, yielding tuple of name and Tensor.
  1076. Parameters:
  1077. prefix(str, optional): Prefix to prepend to all buffer names. Default: ''.
  1078. include_sublayers(bool, optional): Whether include the buffers of sublayers.
  1079. If True, also include the named buffers from sublayers. Default: True.
  1080. Yields:
  1081. (string, Tensor): Tuple of name and tensor
  1082. Examples:
  1083. .. code-block:: python
  1084. >>> import numpy as np
  1085. >>> import paddle
  1086. >>> fc1 = paddle.nn.Linear(10, 3)
  1087. >>> buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
  1088. >>> # register a tensor as buffer by specific `persistable`
  1089. >>> fc1.register_buffer("buf_name_1", buffer1, persistable=True)
  1090. >>> fc2 = paddle.nn.Linear(3, 10)
  1091. >>> buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
  1092. >>> # register a buffer by assigning an attribute with Tensor.
  1093. >>> # The `persistable` can only be False by this way.
  1094. >>> fc2.buf_name_2 = buffer2
  1095. >>> model = paddle.nn.Sequential(fc1, fc2)
  1096. >>> # get all named buffers
  1097. >>> for name, buffer in model.named_buffers():
  1098. ... print(name, buffer)
  1099. 0.buf_name_1 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
  1100. [0.])
  1101. 1.buf_name_2 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
  1102. [1.])
  1103. """
  1104. buffers_set = set()
  1105. named_sublayers = (
  1106. self.named_sublayers(prefix=prefix, include_self=True)
  1107. if include_sublayers
  1108. else zip([prefix], [self])
  1109. )
  1110. for layer_prefix, sublayer in named_sublayers:
  1111. buffers = sublayer._buffers.items()
  1112. for key, buffer in buffers:
  1113. if buffer is None or buffer in buffers_set:
  1114. continue
  1115. buffers_set.add(buffer)
  1116. name = layer_prefix + ('.' if layer_prefix else '') + key
  1117. yield name, buffer
  1118. def clear_gradients(self, set_to_zero=True):
  1119. """
  1120. Clear the gradients of all parameters for this layer.
  1121. Args:
  1122. set_to_zero (bool, optional): Whether to set the trainable parameters'
  1123. gradients to zero or None. Default is True.
  1124. Returns:
  1125. None
  1126. Examples:
  1127. .. code-block:: python
  1128. >>> import paddle
  1129. >>> import numpy as np
  1130. >>> value = np.arange(26).reshape(2, 13).astype("float32")
  1131. >>> a = paddle.to_tensor(value)
  1132. >>> linear = paddle.nn.Linear(13, 5)
  1133. >>> adam = paddle.optimizer.Adam(learning_rate=0.01,
  1134. ... parameters=linear.parameters())
  1135. >>> out = linear(a)
  1136. >>> out.backward()
  1137. >>> adam.step()
  1138. >>> linear.clear_gradients()
  1139. """
  1140. for p in self.parameters():
  1141. if p.trainable:
  1142. p.clear_gradient(set_to_zero)
  1143. def _build_once(self, *args, **kwargs):
  1144. pass
  1145. def _dygraph_call_func(self, *inputs, **kwargs):
  1146. for forward_pre_hook in self._forward_pre_hooks.values():
  1147. hook_result = forward_pre_hook(self, inputs)
  1148. if hook_result is not None:
  1149. if not isinstance(hook_result, tuple):
  1150. hook_result = (hook_result,)
  1151. inputs = hook_result
  1152. if not self._built:
  1153. self._build_once(*inputs, **kwargs)
  1154. self._built = True
  1155. if in_profiler_mode():
  1156. with profiler.RecordEvent(
  1157. self.__class__.__name__, profiler.TracerEventType.Forward
  1158. ):
  1159. outputs = self.forward(*inputs, **kwargs)
  1160. else:
  1161. with name_struct(self.__class__.__name__):
  1162. outputs = self.forward(*inputs, **kwargs)
  1163. for forward_post_hook in self._forward_post_hooks.values():
  1164. hook_result = forward_post_hook(self, inputs, outputs)
  1165. if hook_result is not None:
  1166. outputs = hook_result
  1167. return outputs
  1168. def __call__(self, *inputs, **kwargs):
  1169. if (
  1170. (not in_to_static_mode())
  1171. and (not self._forward_pre_hooks)
  1172. and (not self._forward_post_hooks)
  1173. and (not self._built)
  1174. and in_dygraph_mode()
  1175. and (not in_profiler_mode())
  1176. ):
  1177. self._build_once(*inputs, **kwargs)
  1178. return self.forward(*inputs, **kwargs)
  1179. else:
  1180. return self._dygraph_call_func(*inputs, **kwargs)
  1181. def forward(self, *inputs, **kwargs):
  1182. """
  1183. Defines the computation performed at every call.
  1184. Should be overridden by all subclasses.
  1185. Parameters:
  1186. *inputs(tuple): unpacked tuple arguments
  1187. **kwargs(dict): unpacked dict arguments
  1188. """
  1189. raise NotImplementedError
  1190. def backward(self, *inputs):
  1191. raise ValueError("Layer shouldn't implement backward")
  1192. def add_sublayer(self, name, sublayer):
  1193. """
  1194. Adds a sub Layer instance.
  1195. Added sublayer can be accessed by self.name
  1196. Parameters:
  1197. name(str): name of this sublayer.
  1198. sublayer(Layer): an instance of Layer.
  1199. Returns:
  1200. Layer, the sublayer passed in.
  1201. Examples:
  1202. .. code-block:: python
  1203. >>> import paddle
  1204. >>> class MySequential(paddle.nn.Layer):
  1205. ... def __init__(self, *layers):
  1206. ... super().__init__()
  1207. ... if len(layers) > 0 and isinstance(layers[0], tuple):
  1208. ... for name, layer in layers:
  1209. ... self.add_sublayer(name, layer)
  1210. ... else:
  1211. ... for idx, layer in enumerate(layers):
  1212. ... self.add_sublayer(str(idx), layer)
  1213. ...
  1214. ... def forward(self, input):
  1215. ... for layer in self._sub_layers.values():
  1216. ... input = layer(input)
  1217. ... return input
  1218. ...
  1219. >>> fc1 = paddle.nn.Linear(10, 3)
  1220. >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
  1221. >>> model = MySequential(fc1, fc2)
  1222. >>> for prefix, layer in model.named_sublayers():
  1223. ... print(prefix, layer)
  1224. 0 Linear(in_features=10, out_features=3, dtype=float32)
  1225. 1 Linear(in_features=3, out_features=10, dtype=float32)
  1226. """
  1227. assert isinstance(sublayer, Layer) or sublayer is None
  1228. self._sub_layers[name] = sublayer
  1229. return sublayer
  1230. def add_parameter(self, name, parameter):
  1231. """Adds a Parameter instance.
  1232. Added parameter can be accessed by self.name
  1233. Parameters:
  1234. name(str): name of this sublayer.
  1235. parameter(Parameter): an instance of Parameter.
  1236. Returns:
  1237. Parameter, the parameter passed in.
  1238. Examples:
  1239. .. code-block:: python
  1240. >>> import paddle
  1241. >>> paddle.seed(100)
  1242. >>> class MyLayer(paddle.nn.Layer):
  1243. ... def __init__(self):
  1244. ... super().__init__()
  1245. ... self._linear = paddle.nn.Linear(1, 1)
  1246. ... w_tmp = self.create_parameter([1,1])
  1247. ... self.add_parameter("w_tmp", w_tmp)
  1248. ...
  1249. ... def forward(self, input):
  1250. ... return self._linear(input)
  1251. ...
  1252. >>> mylayer = MyLayer()
  1253. >>> for name, param in mylayer.named_parameters():
  1254. ... print(name, param)
  1255. w_tmp Parameter containing:
  1256. Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  1257. [[-1.01448846]])
  1258. _linear.weight Parameter containing:
  1259. Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
  1260. [[0.18551230]])
  1261. _linear.bias Parameter containing:
  1262. Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
  1263. [0.])
  1264. """
  1265. if '_parameters' not in self.__dict__:
  1266. raise RuntimeError("super().__init__() should be called firstly.")
  1267. elif not isinstance(name, str):
  1268. raise TypeError(
  1269. f"The name of parameter should be a string, but received {type(name).__name__}."
  1270. )
  1271. elif '.' in name:
  1272. raise KeyError(
  1273. "The name of parameter can not contain `.`, "
  1274. "because when you access the newly added parameter in the "
  1275. "form of `self.**.**`, it will cause AttributeError."
  1276. )
  1277. elif name == '':
  1278. raise KeyError("The name of parameter can not be empty.")
  1279. elif hasattr(self, name) and name not in self._parameters:
  1280. raise KeyError(f"The parameter '{name}' already exists.")
  1281. elif parameter is not None and not isinstance(
  1282. parameter, (framework.Parameter, paddle.pir.Value)
  1283. ):
  1284. raise TypeError(
  1285. f"The parameter to be added should be a Parameter, but received {type(parameter).__name__}."
  1286. )
  1287. else:
  1288. if parameter is None:
  1289. self._parameters[name] = None
  1290. if len(self._loaddict_holder) > 0:
  1291. assert (
  1292. parameter.name in self._loaddict_holder
  1293. ), f"Parameter not found, Can't not find [ {parameter.name} ] in state_dict"
  1294. parameter.set_value(self._loaddict_holder[parameter.name])
  1295. self._parameters[name] = parameter
  1296. return parameter
  1297. def _set_op_attrs(self, attrs):
  1298. """
  1299. Add customized attribute while append_op. In case of quantization, we want to save
  1300. some attributes into op_desc while exporting inference model by @to_static.
  1301. Arguments:
  1302. attrs(dict): customized attributes that will be added into op_descs.
  1303. NOTE: The interface is only exposed to developers.
  1304. """
  1305. def is_already_registered(is_pre_hook):
  1306. layers_hooks = (
  1307. self._forward_pre_hooks
  1308. if is_pre_hook
  1309. else self._forward_post_hooks
  1310. )
  1311. candidate_hook = (
  1312. record_program_ops_pre_hook
  1313. if is_pre_hook
  1314. else set_op_customized_attrs_post_hook
  1315. )
  1316. already_registed = False
  1317. if layers_hooks:
  1318. last_key = next(reversed(layers_hooks))
  1319. already_registed = layers_hooks[last_key] == candidate_hook
  1320. return already_registed
  1321. if not isinstance(attrs, dict):
  1322. raise TypeError(
  1323. f"attrs should be type(dict), but received {type(attrs).__name__}"
  1324. )
  1325. # NOTE: Overwrite behavior for same key.
  1326. self._customized_attrs.update(attrs)
  1327. if not is_already_registered(is_pre_hook=True):
  1328. pre_hook_helper = self.register_forward_pre_hook(
  1329. record_program_ops_pre_hook
  1330. )
  1331. assert len(self._op_recorder.hooks) == 0
  1332. self._op_recorder.hooks = [pre_hook_helper]
  1333. # manually register post_hook to ensure it is inserted into the head.
  1334. if not is_already_registered(is_pre_hook=False):
  1335. post_hook_helper = self.register_forward_post_hook(
  1336. set_op_customized_attrs_post_hook
  1337. )
  1338. if len(self._forward_post_hooks) > 1:
  1339. self._forward_post_hooks.move_to_end(
  1340. post_hook_helper._hook_id, last=False
  1341. )
  1342. assert len(self._op_recorder.hooks) == 1
  1343. # hooks that need to be removed once we finish executing them.
  1344. self._op_recorder.hooks.append(post_hook_helper)
  1345. def __getstate__(self):
  1346. return self.__dict__
  1347. def __setstate__(self, state):
  1348. self.__dict__.update(state)
  1349. def __getattr__(self, name):
  1350. if '_parameters' in self.__dict__:
  1351. _parameters = self.__dict__['_parameters']
  1352. if name in self._parameters:
  1353. if in_to_static_mode():
  1354. return _convert_into_variable(self._parameters[name])
  1355. return self._parameters[name]
  1356. if '_sub_layers' in self.__dict__:
  1357. _sub_layers = self.__dict__['_sub_layers']
  1358. if name in self._sub_layers:
  1359. return self._sub_layers[name]
  1360. if '_buffers' in self.__dict__:
  1361. _buffers = self.__dict__['_buffers']
  1362. if name in _buffers:
  1363. if in_to_static_mode():
  1364. return _convert_into_variable(_buffers[name])
  1365. return _buffers[name]
  1366. return object.__getattribute__(self, name)
  1367. def __setattr__(self, name, value):
  1368. def _remove_if_exist(*dicts):
  1369. for d in dicts:
  1370. if name in d:
  1371. del d[name]
  1372. if isinstance(getattr(type(self), name, None), property):
  1373. object.__setattr__(self, name, value)
  1374. params = self.__dict__.get('_parameters', None)
  1375. if isinstance(value, framework.Parameter):
  1376. if params is None:
  1377. raise ValueError("super().__init__() should be called first")
  1378. if len(self._loaddict_holder) > 0:
  1379. assert (
  1380. value.name in self._loaddict_holder
  1381. ), f"Parameter not found, Can't not find [ {value.name} ] in state_dict"
  1382. value.set_value(self._loaddict_holder[value.name])
  1383. _remove_if_exist(self.__dict__, self._buffers, self._sub_layers)
  1384. params[name] = value
  1385. elif (
  1386. isinstance(value, paddle.pir.Value)
  1387. and value.get_defining_op().name() == 'builtin.parameter'
  1388. ):
  1389. if params is None:
  1390. raise ValueError("super().__init__() should be called first")
  1391. _remove_if_exist(self.__dict__, self._buffers, self._sub_layers)
  1392. params[name] = value
  1393. elif params is not None and name in params:
  1394. if value is not None:
  1395. raise TypeError(
  1396. f"assignment to parameter '{name}' should be of type Parameter or None, but got '{type(value).__name__}'"
  1397. )
  1398. params[name] = None
  1399. else:
  1400. layers = self.__dict__.get('_sub_layers', None)
  1401. if isinstance(value, Layer):
  1402. if layers is None:
  1403. raise ValueError(
  1404. "super().__init__() should be called first"
  1405. )
  1406. _remove_if_exist(self.__dict__, self._parameters, self._buffers)
  1407. layers[name] = value
  1408. elif layers is not None and name in layers:
  1409. if value is not None:
  1410. raise TypeError(
  1411. f"assignment to sublayer '{name}' should be of type Layer or None, but got '{type(value).__name__}'"
  1412. )
  1413. layers[name] = None
  1414. else:
  1415. _buffers = self.__dict__.get('_buffers', None)
  1416. if isinstance(value, core.eager.Tensor):
  1417. if _buffers is None:
  1418. raise ValueError(
  1419. "super().__init__() should be called first"
  1420. )
  1421. _remove_if_exist(
  1422. self.__dict__, self._parameters, self._sub_layers
  1423. )
  1424. # Set persistable=False by default. Only `register_buffer` can
  1425. # add a persistable buffer.
  1426. if name not in self._buffers:
  1427. self._non_persistable_buffer_names_set.add(name)
  1428. if not value.name:
  1429. value.name = unique_name.generate('_buffers_' + name)
  1430. _buffers[name] = value
  1431. elif _buffers is not None and name in _buffers:
  1432. # Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
  1433. # decorated function, such as `self.buffer = new_tensor`. So we update its
  1434. # value via `assign`.
  1435. if type(value) == framework.Variable or isinstance(
  1436. value, paddle.pir.Value
  1437. ):
  1438. from paddle import assign
  1439. # Note(zhhsplendid): the condition below happens in PaddleGan model,
  1440. # but should all non-Variable _buffers[name] be re-assign? We
  1441. # should consider it in the future. I current wrote this as
  1442. # conservative code.
  1443. if in_to_static_mode() and _buffers[name] is None:
  1444. raise RuntimeError(
  1445. f'In Dy2stat, self.{name} is a buffer and self.{name} is '
  1446. f'not allowed to be set to Variable when self.{name} is None.'
  1447. )
  1448. elif (
  1449. _buffers[name] is None
  1450. or type(getattr(self, name)) == core.eager.Tensor
  1451. ):
  1452. _buffers[name] = assign(value)
  1453. else:
  1454. assign(value, getattr(self, name))
  1455. elif value is not None:
  1456. raise TypeError(
  1457. f"assignment to buffers '{name}' should be of type core.Tensor or None, but got '{type(value).__name__}'"
  1458. )
  1459. else:
  1460. # Assigning None will remove the buffer, but if re-assign a new varBase to it,
  1461. # it will be remarked as a buffer with same `persistable` attribute.
  1462. _buffers[name] = None
  1463. else:
  1464. object.__setattr__(self, name, value)
  1465. def __delattr__(self, name):
  1466. if name in self._parameters:
  1467. del self._parameters[name]
  1468. elif name in self._sub_layers:
  1469. del self._sub_layers[name]
  1470. elif name in self._buffers:
  1471. del self._buffers[name]
  1472. self._non_persistable_buffer_names_set.discard(name)
  1473. else:
  1474. object.__delattr__(self, name)
  1475. def __dir__(self):
  1476. """
  1477. Return a list. Get all parameters, buffers(non-parameter tensors), sublayers, method and attr of Layer.
  1478. Examples:
  1479. .. code-block:: python
  1480. >>> import paddle
  1481. >>> import numpy as np
  1482. >>> class Mylayer(paddle.nn.Layer):
  1483. ... def __init__(self):
  1484. ... super().__init__()
  1485. ... self.linear1 = paddle.nn.Linear(10, 10)
  1486. ... self.linear2 = paddle.nn.Linear(5, 5)
  1487. ... self.conv2d = paddle.nn.Conv2D(3, 2, 3)
  1488. ... self.embedding = paddle.nn.Embedding(128, 16)
  1489. ... self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))
  1490. ...
  1491. >>> mylayer = Mylayer()
  1492. >>> print(dir(mylayer))
  1493. ['__call__', '__class__', '__delattr__', '__dict__', ..., 'training']
  1494. """
  1495. method = dir(self.__class__)
  1496. attrs = list(self.__dict__.keys())
  1497. parameters = list(self._parameters.keys())
  1498. sublayers = list(self._sub_layers.keys())
  1499. buffers = list(self._buffers.keys())
  1500. keys = method + attrs + parameters + sublayers + buffers
  1501. return keys
  1502. def extra_repr(self):
  1503. """
  1504. Extra representation of this layer, you can have custom implementation
  1505. of your own layer.
  1506. """
  1507. return ''
  1508. def __repr__(self):
  1509. extra_lines = []
  1510. extra_repr = self.extra_repr()
  1511. extra_lines = extra_repr.split('\n')
  1512. sublayer_lines = []
  1513. for name, layer in self._sub_layers.items():
  1514. sublayer_str = repr(layer)
  1515. sublayer_str = _addindent(sublayer_str, 2)
  1516. sublayer_lines.append('(' + name + '): ' + sublayer_str)
  1517. final_str = self.__class__.__name__ + '('
  1518. if extra_lines:
  1519. if len(extra_lines) > 1:
  1520. final_str += '\n ' + '\n '.join(extra_lines) + '\n'
  1521. elif len(extra_lines) == 1:
  1522. final_str += extra_lines[0]
  1523. if sublayer_lines:
  1524. final_str += '\n ' + '\n '.join(sublayer_lines) + '\n'
  1525. final_str += ')'
  1526. return final_str
  1527. def register_state_dict_hook(self, hook):
  1528. hook_remove_helper = HookRemoveHelper(self._state_dict_hooks)
  1529. self._state_dict_hooks[hook_remove_helper._hook_id] = hook
  1530. return hook_remove_helper
  1531. def _obtain_parameters_buffers(
  1532. self,
  1533. destination=None,
  1534. include_sublayers=True,
  1535. structured_name_prefix="",
  1536. ):
  1537. """
  1538. The difference from state_dict() is that state_dict_hook will not be called,
  1539. but the original types of parameters and buffers will be maintained.
  1540. """
  1541. if destination is None:
  1542. destination = collections.OrderedDict()
  1543. for name, data in self._parameters.items():
  1544. if data is not None:
  1545. destination[structured_name_prefix + name] = data
  1546. for name, buffer in self._buffers.items():
  1547. if (
  1548. buffer is not None
  1549. and name not in self._non_persistable_buffer_names_set
  1550. ):
  1551. destination[structured_name_prefix + name] = buffer
  1552. if include_sublayers:
  1553. for layer_name, layer_item in self._sub_layers.items():
  1554. if layer_item is not None:
  1555. destination_temp = destination.copy()
  1556. destination_temp.update(
  1557. layer_item._obtain_parameters_buffers(
  1558. destination_temp,
  1559. include_sublayers,
  1560. structured_name_prefix + layer_name + ".",
  1561. )
  1562. )
  1563. destination = destination_temp
  1564. return destination
  1565. def _state_dict_impl(
  1566. self,
  1567. destination=None,
  1568. include_sublayers=True,
  1569. structured_name_prefix="",
  1570. include_non_persistable_buffer=False,
  1571. use_hook=True,
  1572. keep_vars=True,
  1573. ):
  1574. """
  1575. Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
  1576. Parameters:
  1577. destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
  1578. include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
  1579. include_non_persistable_buffer(bool, optional): If true, include non persistable buffers of current layer and its sub-layers, it is used in pure fp16 and jit.save. Default: False.
  1580. use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
  1581. keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.
  1582. """
  1583. if destination is None:
  1584. destination = collections.OrderedDict()
  1585. for name, data in self._parameters.items():
  1586. if data is not None:
  1587. destination[structured_name_prefix + name] = (
  1588. data if keep_vars else data.detach()
  1589. )
  1590. for name, buffer in self._buffers.items():
  1591. if not include_non_persistable_buffer:
  1592. if (
  1593. buffer is not None
  1594. and name not in self._non_persistable_buffer_names_set
  1595. ):
  1596. destination[structured_name_prefix + name] = (
  1597. buffer if keep_vars else buffer.detach()
  1598. )
  1599. else:
  1600. if buffer is not None:
  1601. destination[structured_name_prefix + name] = (
  1602. buffer if keep_vars else buffer.detach()
  1603. )
  1604. if include_sublayers:
  1605. for layer_name, layer_item in self._sub_layers.items():
  1606. if layer_item is not None:
  1607. destination_temp = destination.copy()
  1608. destination_temp.update(
  1609. layer_item._state_dict_impl(
  1610. destination_temp,
  1611. include_sublayers,
  1612. structured_name_prefix + layer_name + ".",
  1613. include_non_persistable_buffer,
  1614. use_hook,
  1615. keep_vars,
  1616. )
  1617. )
  1618. destination = destination_temp
  1619. if use_hook:
  1620. for state_dict_hook in self._state_dict_hooks.values():
  1621. hook_result = state_dict_hook(destination)
  1622. if hook_result is not None:
  1623. destination = hook_result
  1624. return destination
  1625. def to_static_state_dict(
  1626. self,
  1627. destination=None,
  1628. include_sublayers=True,
  1629. structured_name_prefix="",
  1630. use_hook=True,
  1631. keep_vars=True,
  1632. ):
  1633. '''
  1634. Get all parameters and buffers of current layer and its sub-layers. And set them into a dict
  1635. Parameters:
  1636. destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
  1637. include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
  1638. use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
  1639. keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.
  1640. Returns:
  1641. dict, a dict contains all the parameters and persistable buffers.
  1642. Examples:
  1643. .. code-block:: python
  1644. >>> import paddle
  1645. >>> emb = paddle.nn.Embedding(10, 10)
  1646. >>> state_dict = emb.to_static_state_dict()
  1647. >>> paddle.save( state_dict, "paddle_dy.pdparams")
  1648. '''
  1649. return self._state_dict_impl(
  1650. destination=destination,
  1651. include_sublayers=include_sublayers,
  1652. structured_name_prefix=structured_name_prefix,
  1653. include_non_persistable_buffer=True,
  1654. use_hook=use_hook,
  1655. keep_vars=keep_vars,
  1656. )
  1657. def state_dict(
  1658. self,
  1659. destination=None,
  1660. include_sublayers=True,
  1661. structured_name_prefix="",
  1662. use_hook=True,
  1663. keep_vars=True,
  1664. ):
  1665. '''
  1666. Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
  1667. Parameters:
  1668. destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None.
  1669. include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True.
  1670. use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True.
  1671. keep_vars(bool, optional) : If false, the returned tensors in the state dict are detached from autograd. Default: True.
  1672. Returns:
  1673. dict: a dict contains all the parameters and persistable buffers.
  1674. Examples:
  1675. .. code-block:: python
  1676. >>> import paddle
  1677. >>> emb = paddle.nn.Embedding(10, 10)
  1678. >>> state_dict = emb.state_dict()
  1679. >>> paddle.save( state_dict, "paddle_dy.pdparams")
  1680. '''
  1681. return self._state_dict_impl(
  1682. destination=destination,
  1683. include_sublayers=include_sublayers,
  1684. structured_name_prefix=structured_name_prefix,
  1685. include_non_persistable_buffer=False,
  1686. use_hook=use_hook,
  1687. keep_vars=keep_vars,
  1688. )
  1689. @framework.deprecate_stat_dict
  1690. def set_state_dict(self, state_dict, use_structured_name=True):
  1691. '''
  1692. Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
  1693. Parameters:
  1694. state_dict(dict) : Dict contains all the parameters and persistable buffers.
  1695. use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
  1696. Default: True.
  1697. Returns:
  1698. missing_keys(list):A list of str containing the missing keys
  1699. unexpected_keys(list):A list of str containing the unexpected keys
  1700. Examples:
  1701. .. code-block:: python
  1702. >>> import paddle
  1703. >>> emb = paddle.nn.Embedding(10, 10)
  1704. >>> state_dict = emb.state_dict()
  1705. >>> paddle.save(state_dict, "paddle_dy.pdparams")
  1706. >>> para_state_dict = paddle.load("paddle_dy.pdparams")
  1707. >>> emb.set_state_dict(para_state_dict)
  1708. '''
  1709. missing_keys = []
  1710. match_keys = set()
  1711. unexpected_keys = []
  1712. def _check_match(key, param):
  1713. state = state_dict.get(key, None)
  1714. if state is None:
  1715. missing_keys.append(key)
  1716. raise ValueError(f"{key} is not found in the provided dict.")
  1717. if isinstance(state, (dict, list)):
  1718. if len(state) != len(param):
  1719. missing_keys.append(key)
  1720. raise ValueError(
  1721. f"{key} receives the length of {len(state)}, "
  1722. f"but the expected shape is {len(param)}"
  1723. )
  1724. else:
  1725. match_keys.add(key)
  1726. return param, state
  1727. else:
  1728. state_shape = (
  1729. state.shape()
  1730. if inspect.ismethod(state.shape)
  1731. else state.shape
  1732. )
  1733. if list(state_shape) != list(param.shape):
  1734. missing_keys.append(key)
  1735. raise ValueError(
  1736. f"{key} receives a shape {list(state_shape)}, but the expected shape is {list(param.shape)}."
  1737. )
  1738. match_keys.add(key)
  1739. return param, state
  1740. matched_param_state = []
  1741. for key, param in self._state_dict_impl(use_hook=False).items():
  1742. if isinstance(param, paddle.Tensor) and not param._is_initialized():
  1743. continue
  1744. key_name = key if use_structured_name else param.name
  1745. try:
  1746. match_res = _check_match(key_name, param)
  1747. matched_param_state.append(match_res)
  1748. except ValueError as err:
  1749. warnings.warn(f"Skip loading for {key}. " + str(err))
  1750. for key in state_dict.keys():
  1751. if key not in match_keys:
  1752. unexpected_keys.append(key)
  1753. if in_dygraph_mode():
  1754. for param, state in matched_param_state:
  1755. param.set_value(state)
  1756. else:
  1757. def _set_var(var, ndarray):
  1758. t = global_scope().find_var(var.name).get_tensor()
  1759. p = t._place()
  1760. if p.is_cpu_place():
  1761. place = core.CPUPlace()
  1762. elif p.is_cuda_pinned_place():
  1763. place = core.CUDAPinnedPlace()
  1764. elif p.is_xpu_place():
  1765. p = core.Place()
  1766. p.set_place(t._place())
  1767. place = core.XPUPlace(p.xpu_device_id())
  1768. elif p.is_custom_place():
  1769. p = core.Place()
  1770. p.set_place(t._place())
  1771. place = core.CustomPlace(
  1772. paddle.device.get_device().split(':')[0],
  1773. p.custom_device_id(),
  1774. )
  1775. else:
  1776. p = core.Place()
  1777. p.set_place(t._place())
  1778. place = core.CUDAPlace(p.gpu_device_id())
  1779. t.set(ndarray, place)
  1780. try:
  1781. executor = Executor(_get_device())._default_executor
  1782. # restore parameter states
  1783. core._create_loaded_parameter(
  1784. [param for param, state in matched_param_state],
  1785. global_scope(),
  1786. executor,
  1787. )
  1788. for param, state in matched_param_state:
  1789. _set_var(param, state)
  1790. except ValueError as e:
  1791. raise ValueError(
  1792. "This error might happens in dy2static, while calling 'set_state_dict' dynamically in 'forward', which is not supported. If you only need call 'set_state_dict' once, move it to '__init__'."
  1793. )
  1794. return missing_keys, unexpected_keys
  1795. def to(self, device=None, dtype=None, blocking=None):
  1796. '''
  1797. Cast the parameters and buffers of Layer by the give device, dtype and blocking.
  1798. Parameters:
  1799. device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
  1800. If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
  1801. index of the GPUs or XPUs. Default: None.
  1802. dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.
  1803. blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
  1804. asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
  1805. Returns:
  1806. self
  1807. Examples:
  1808. .. code-block:: python
  1809. >>> import paddle
  1810. >>> paddle.seed(2023)
  1811. >>> linear=paddle.nn.Linear(2, 2)
  1812. >>> linear.weight
  1813. >>> print(linear.weight)
  1814. Parameter containing:
  1815. Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
  1816. [[ 0.89611185, 0.04935038],
  1817. [-0.58883440, 0.99266374]])
  1818. >>> linear.to(dtype='float64')
  1819. >>> linear.weight
  1820. >>> print(linear.weight)
  1821. Parameter containing:
  1822. Tensor(shape=[2, 2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
  1823. [[ 0.89611185, 0.04935038],
  1824. [-0.58883440, 0.99266374]])
  1825. >>> linear.to(device='cpu')
  1826. >>> linear.weight
  1827. >>> print(linear.weight)
  1828. Parameter containing:
  1829. Tensor(shape=[2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
  1830. [[ 0.89611185, 0.04935038],
  1831. [-0.58883440, 0.99266374]])
  1832. >>> # doctest: +REQUIRES(env:GPU)
  1833. >>> linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
  1834. >>> linear.weight
  1835. >>> print(linear.weight)
  1836. Tensor(shape=[2, 2], dtype=float64, place=Place(gpu_pinned), stop_gradient=False,
  1837. [[ 0.89611185, 0.04935038],
  1838. [-0.58883440, 0.99266374]])
  1839. '''
  1840. return self._to_impl(
  1841. device=device,
  1842. dtype=dtype,
  1843. blocking=blocking,
  1844. include_sublayers=True,
  1845. floating_only=False,
  1846. )
  1847. def _apply(self, func, device, dtype, blocking, include_sublayers=True):
  1848. if include_sublayers:
  1849. for layer in self.children():
  1850. layer._apply(func, device, dtype, blocking, include_sublayers)
  1851. for key, param in self._parameters.items():
  1852. if param is not None:
  1853. with no_grad():
  1854. param_applied = func(param, device, dtype, blocking)
  1855. if param.grad is not None:
  1856. with no_grad():
  1857. grad_applied = func(
  1858. param._grad_ivar(), device, dtype, blocking
  1859. )
  1860. for key, buf in self._buffers.items():
  1861. if buf is not None:
  1862. self._buffers[key] = func(buf, device, dtype, blocking)
  1863. self._dtype = dtype
  1864. def _transform(self, t, device, dtype, blocking):
  1865. if device is None:
  1866. device = t.place
  1867. if dtype is None:
  1868. dtype = t.dtype
  1869. if not isinstance(dtype, (VarDesc.VarType, core.DataType)):
  1870. dtype = convert_np_dtype_to_dtype_(dtype)
  1871. # 1. gpu place need to determine whether the memory is sufficient for allocation:
  1872. if t.place.is_gpu_place():
  1873. # for gpu, minimum memory allocation unit is 256 bytes.
  1874. proto_dtype = (
  1875. paddle_type_to_proto_type[dtype]
  1876. if isinstance(dtype, core.DataType)
  1877. else dtype
  1878. )
  1879. size_dtype = core.size_of_dtype(proto_dtype)
  1880. # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will compute ‘t’ occupied memory space.
  1881. # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
  1882. waiting_alloc_memory = (
  1883. ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2
  1884. )
  1885. gpu_memory_available = core.gpu_memory_available()
  1886. if gpu_memory_available < waiting_alloc_memory:
  1887. # Copy param / Tensor to cpu
  1888. t_used = t._copy_to(
  1889. paddle.CPUPlace(), blocking
  1890. ) # k-v type will error
  1891. # Release mem of t
  1892. t.value().get_tensor()._clear()
  1893. else:
  1894. t_used = t
  1895. else:
  1896. t_used = t
  1897. # 2. cast param / Tensor to dtype
  1898. if dtype is not None and dtype != t_used.dtype:
  1899. with paddle.base.framework._dygraph_place_guard(place=t_used.place):
  1900. t_casted = t_used.cast(dtype=dtype)
  1901. else:
  1902. t_casted = t_used
  1903. # 3. Copy casted cpu param / Tensor to device
  1904. if device is not None and not t_casted.place._equals(device):
  1905. new_t = t_casted._copy_to(device, blocking)
  1906. else:
  1907. new_t = t_casted
  1908. # 4. share Tensor to origin param / Tensor
  1909. dst_tensor = t.value().get_tensor()
  1910. src_tensor = new_t.value().get_tensor()
  1911. if t._is_initialized():
  1912. dst_tensor._share_data_with(src_tensor)
  1913. else:
  1914. # If the tensor is not initialized, we can't check the memory size.
  1915. dst_tensor._share_data_nocheck_with(src_tensor)
  1916. return t
  1917. def _to_impl(
  1918. self,
  1919. device=None,
  1920. dtype=None,
  1921. blocking=None,
  1922. include_sublayers=True,
  1923. floating_only=False,
  1924. ):
  1925. '''
  1926. Cast the parameters and buffers of Layer by the give device, dtype and blocking.
  1927. Parameters:
  1928. device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
  1929. If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
  1930. index of the GPUs or XPUs. Default: None.
  1931. dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.
  1932. blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
  1933. asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
  1934. include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.
  1935. floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.
  1936. Returns:
  1937. self
  1938. '''
  1939. if device is None and dtype is None and blocking is None:
  1940. return self
  1941. if device is not None:
  1942. if isinstance(device, str):
  1943. device = paddle.device._convert_to_place(device)
  1944. elif isinstance(
  1945. device,
  1946. (
  1947. core.CPUPlace,
  1948. core.CUDAPlace,
  1949. core.CUDAPinnedPlace,
  1950. core.XPUPlace,
  1951. ),
  1952. ):
  1953. pass
  1954. else:
  1955. raise ValueError(
  1956. "device value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is "
  1957. + type(device).__name__
  1958. )
  1959. if blocking is None:
  1960. blocking = True
  1961. else:
  1962. assert isinstance(
  1963. blocking, bool
  1964. ), "blocking value error, must be the True, False or None"
  1965. def transform(t, device, dtype, blocking):
  1966. if floating_only and (not paddle.is_floating_point(t)):
  1967. return t
  1968. return self._transform(t, device, dtype, blocking)
  1969. with warnings.catch_warnings():
  1970. warnings.filterwarnings("ignore", category=UserWarning)
  1971. self._apply(transform, device, dtype, blocking, include_sublayers)
  1972. self._dtype = dtype
  1973. return self
  1974. def _startup_program(self):
  1975. """
  1976. Return startup program containing initialization operations of all parameters.
  1977. NOTE(dev): This is a very low level API and only for inner developer.
  1978. """
  1979. startup_program = Program()
  1980. for param in self.parameters():
  1981. param._create_init_op(startup_program.global_block())
  1982. return startup_program
  1983. # [aliases] Compatible with old method names
  1984. set_dict = set_state_dict
  1985. load_dict = set_state_dict
  1986. def float(self, excluded_layers=None):
  1987. '''
  1988. Casts all floating point parameters and buffers to ``float`` data type.
  1989. Parameters:
  1990. excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers. Default: None.
  1991. Returns:
  1992. Layer: self
  1993. Examples:
  1994. .. code-block:: python
  1995. >>> import paddle
  1996. >>> class Model(paddle.nn.Layer):
  1997. ... def __init__(self):
  1998. ... super().__init__()
  1999. ... self.linear = paddle.nn.Linear(1, 1)
  2000. ... self.dropout = paddle.nn.Dropout(p=0.5)
  2001. ...
  2002. ... def forward(self, input):
  2003. ... out = self.linear(input)
  2004. ... out = self.dropout(out)
  2005. ... return out
  2006. ...
  2007. >>> model = Model()
  2008. >>> model.float()
  2009. Model(
  2010. (linear): Linear(in_features=1, out_features=1, dtype=paddle.float32)
  2011. (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
  2012. )
  2013. '''
  2014. excluded_layers = [] if excluded_layers is None else excluded_layers
  2015. if isinstance(excluded_layers, type):
  2016. excluded_layers = [excluded_layers]
  2017. elif isinstance(excluded_layers, (list, tuple)):
  2018. excluded_layers = list(excluded_layers)
  2019. else:
  2020. raise TypeError(
  2021. "excluded_layers should be type nn.Layer or list, but got %s.",
  2022. type(excluded_layers).__name__,
  2023. )
  2024. def layer_trans(layer):
  2025. _layer_trans_dtype(layer, paddle.float32, excluded_layers)
  2026. return self.apply(layer_trans)
  2027. def float16(self, excluded_layers=None):
  2028. '''
  2029. Casts all floating point parameters and buffers to ``float16`` data type.
  2030. .. note::
  2031. ``nn.BatchNorm`` does not support ``bfloat16`` weights, so it would not be converted by default.
  2032. Parameters:
  2033. excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers except ``nn.BatchNorm``. Default: None.
  2034. Returns:
  2035. Layer: self
  2036. Examples:
  2037. .. code-block:: python
  2038. >>> # doctest: +SKIP('Paddle compiled by the user does not support float16, so keep original data type.')
  2039. >>> import paddle
  2040. >>> class Model(paddle.nn.Layer):
  2041. ... def __init__(self):
  2042. ... super().__init__()
  2043. ... self.linear = paddle.nn.Linear(1, 1)
  2044. ... self.dropout = paddle.nn.Dropout(p=0.5)
  2045. ...
  2046. ... def forward(self, input):
  2047. ... out = self.linear(input)
  2048. ... out = self.dropout(out)
  2049. ... return out
  2050. ...
  2051. >>> model = Model()
  2052. >>> model.float16()
  2053. Model(
  2054. (linear): Linear(in_features=1, out_features=1, dtype=float32)
  2055. (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
  2056. )
  2057. '''
  2058. if paddle.amp.is_float16_supported() is False:
  2059. warnings.warn(
  2060. "Paddle compiled by the user does not support float16, so keep original data type."
  2061. )
  2062. return self
  2063. excluded_layers = (
  2064. [nn.BatchNorm] if excluded_layers is None else excluded_layers
  2065. )
  2066. if isinstance(excluded_layers, type):
  2067. excluded_layers = [excluded_layers]
  2068. elif isinstance(excluded_layers, (list, tuple)):
  2069. excluded_layers = list(excluded_layers)
  2070. else:
  2071. raise TypeError(
  2072. "excluded_layers should be type nn.Layer or list, but got %s.",
  2073. type(excluded_layers).__name__,
  2074. )
  2075. def layer_trans(layer):
  2076. _layer_trans_dtype(layer, paddle.float16, excluded_layers)
  2077. return self.apply(layer_trans)
  2078. def bfloat16(self, excluded_layers=None):
  2079. '''
  2080. Casts all floating point parameters and buffers to ``bfloat16`` data type.
  2081. .. note::
  2082. ``nn.BatchNorm`` does not support ``bfloat16`` weights, so it would not be converted by default.
  2083. Parameters:
  2084. excluded_layers(nn.Layer|list|tuple|None, optional): Specify the layers that need to be kept original data type. if excluded_layers is None, casts all floating point parameters and buffers except ``nn.BatchNorm``. Default: None.
  2085. Returns:
  2086. Layer: self
  2087. Examples:
  2088. .. code-block:: python
  2089. >>> # doctest: +SKIP('bfloat need V100 compile')
  2090. >>> import paddle
  2091. >>> class Model(paddle.nn.Layer):
  2092. ... def __init__(self):
  2093. ... super().__init__()
  2094. ... self.linear = paddle.nn.Linear(1, 1)
  2095. ... self.dropout = paddle.nn.Dropout(p=0.5)
  2096. ...
  2097. ... def forward(self, input):
  2098. ... out = self.linear(input)
  2099. ... out = self.dropout(out)
  2100. ... return out
  2101. ...
  2102. >>> model = Model()
  2103. >>> model.bfloat16()
  2104. >>> #UserWarning: Paddle compiled by the user does not support bfloat16, so keep original data type.
  2105. Model(
  2106. (linear): Linear(in_features=1, out_features=1, dtype=float32)
  2107. (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
  2108. )
  2109. '''
  2110. if paddle.amp.is_bfloat16_supported() is False:
  2111. warnings.warn(
  2112. "Paddle compiled by the user does not support bfloat16, so keep original data type."
  2113. )
  2114. return self
  2115. excluded_layers = (
  2116. [nn.BatchNorm] if excluded_layers is None else excluded_layers
  2117. )
  2118. if isinstance(excluded_layers, type):
  2119. excluded_layers = [excluded_layers]
  2120. elif isinstance(excluded_layers, (list, tuple)):
  2121. excluded_layers = list(excluded_layers)
  2122. else:
  2123. raise TypeError(
  2124. "excluded_layers should be type nn.Layer or list, but got %s.",
  2125. type(excluded_layers).__name__,
  2126. )
  2127. def layer_trans(layer):
  2128. _layer_trans_dtype(layer, paddle.bfloat16, excluded_layers)
  2129. return self.apply(layer_trans)