optimizer.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. import sys
  2. import math
  3. from paddle.optimizer.lr import LinearWarmup
  4. from paddle.optimizer.lr import PiecewiseDecay
  5. from paddle.optimizer.lr import CosineAnnealingDecay
  6. from paddle.optimizer.lr import ExponentialDecay
  7. import paddle
  8. import paddle.regularizer as regularizer
  9. from copy import deepcopy
  10. class Cosine(CosineAnnealingDecay):
  11. """
  12. Cosine learning rate decay
  13. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  14. Args:
  15. lr(float): initial learning rate
  16. step_each_epoch(int): steps each epoch
  17. epochs(int): total training epochs
  18. """
  19. def __init__(self, lr, step_each_epoch, epochs, **kwargs):
  20. super(Cosine, self).__init__(
  21. learning_rate=lr,
  22. T_max=step_each_epoch * epochs,
  23. )
  24. self.update_specified = False
  25. class Piecewise(PiecewiseDecay):
  26. """
  27. Piecewise learning rate decay
  28. Args:
  29. lr(float): initial learning rate
  30. step_each_epoch(int): steps each epoch
  31. decay_epochs(list): piecewise decay epochs
  32. gamma(float): decay factor
  33. """
  34. def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs):
  35. boundaries = [step_each_epoch * e for e in decay_epochs]
  36. lr_values = [lr * (gamma**i) for i in range(len(boundaries) + 1)]
  37. super(Piecewise, self).__init__(boundaries=boundaries, values=lr_values)
  38. self.update_specified = False
  39. class CosineWarmup(LinearWarmup):
  40. """
  41. Cosine learning rate decay with warmup
  42. [0, warmup_epoch): linear warmup
  43. [warmup_epoch, epochs): cosine decay
  44. Args:
  45. lr(float): initial learning rate
  46. step_each_epoch(int): steps each epoch
  47. epochs(int): total training epochs
  48. warmup_epoch(int): epoch num of warmup
  49. """
  50. def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs):
  51. assert (
  52. epochs > warmup_epoch
  53. ), "total epoch({}) should be larger than warmup_epoch({}) in CosineWarmup.".format(
  54. epochs, warmup_epoch
  55. )
  56. warmup_step = warmup_epoch * step_each_epoch
  57. start_lr = 0.0
  58. end_lr = lr
  59. lr_sch = Cosine(lr, step_each_epoch, epochs - warmup_epoch)
  60. super(CosineWarmup, self).__init__(
  61. learning_rate=lr_sch,
  62. warmup_steps=warmup_step,
  63. start_lr=start_lr,
  64. end_lr=end_lr,
  65. )
  66. self.update_specified = False
  67. class ExponentialWarmup(LinearWarmup):
  68. """
  69. Exponential learning rate decay with warmup
  70. [0, warmup_epoch): linear warmup
  71. [warmup_epoch, epochs): Exponential decay
  72. Args:
  73. lr(float): initial learning rate
  74. step_each_epoch(int): steps each epoch
  75. decay_epochs(float): decay epochs
  76. decay_rate(float): decay rate
  77. warmup_epoch(int): epoch num of warmup
  78. """
  79. def __init__(
  80. self,
  81. lr,
  82. step_each_epoch,
  83. decay_epochs=2.4,
  84. decay_rate=0.97,
  85. warmup_epoch=5,
  86. **kwargs,
  87. ):
  88. warmup_step = warmup_epoch * step_each_epoch
  89. start_lr = 0.0
  90. end_lr = lr
  91. lr_sch = ExponentialDecay(lr, decay_rate)
  92. super(ExponentialWarmup, self).__init__(
  93. learning_rate=lr_sch,
  94. warmup_steps=warmup_step,
  95. start_lr=start_lr,
  96. end_lr=end_lr,
  97. )
  98. # NOTE: hac method to update exponential lr scheduler
  99. self.update_specified = True
  100. self.update_start_step = warmup_step
  101. self.update_step_interval = int(decay_epochs * step_each_epoch)
  102. self.step_each_epoch = step_each_epoch
  103. class LearningRateBuilder:
  104. """
  105. Build learning rate variable
  106. https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn.html
  107. Args:
  108. function(str): class name of learning rate
  109. params(dict): parameters used for init the class
  110. """
  111. def __init__(
  112. self, function="Linear", params={"lr": 0.1, "steps": 100, "end_lr": 0.0}
  113. ):
  114. self.function = function
  115. self.params = params
  116. def __call__(self):
  117. mod = sys.modules[__name__]
  118. lr = getattr(mod, self.function)(**self.params)
  119. return lr
  120. class L1Decay(object):
  121. """
  122. L1 Weight Decay Regularization, which encourages the weights to be sparse.
  123. Args:
  124. factor(float): regularization coeff. Default:0.0.
  125. """
  126. def __init__(self, factor=0.0):
  127. super(L1Decay, self).__init__()
  128. self.factor = factor
  129. def __call__(self):
  130. reg = regularizer.L1Decay(self.factor)
  131. return reg
  132. class L2Decay(object):
  133. """
  134. L2 Weight Decay Regularization, which encourages the weights to be sparse.
  135. Args:
  136. factor(float): regularization coeff. Default:0.0.
  137. """
  138. def __init__(self, factor=0.0):
  139. super(L2Decay, self).__init__()
  140. self.factor = factor
  141. def __call__(self):
  142. reg = regularizer.L2Decay(self.factor)
  143. return reg
  144. class Momentum(object):
  145. """
  146. Simple Momentum optimizer with velocity state.
  147. Args:
  148. learning_rate (float|Variable) - The learning rate used to update parameters.
  149. Can be a float value or a Variable with one float value as data element.
  150. momentum (float) - Momentum factor.
  151. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  152. """
  153. def __init__(
  154. self, learning_rate, momentum, parameter_list=None, regularization=None, **args
  155. ):
  156. super(Momentum, self).__init__()
  157. self.learning_rate = learning_rate
  158. self.momentum = momentum
  159. self.parameter_list = parameter_list
  160. self.regularization = regularization
  161. def __call__(self):
  162. opt = paddle.optimizer.Momentum(
  163. learning_rate=self.learning_rate,
  164. momentum=self.momentum,
  165. parameters=self.parameter_list,
  166. weight_decay=self.regularization,
  167. )
  168. return opt
  169. class RMSProp(object):
  170. """
  171. Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
  172. Args:
  173. learning_rate (float|Variable) - The learning rate used to update parameters.
  174. Can be a float value or a Variable with one float value as data element.
  175. momentum (float) - Momentum factor.
  176. rho (float) - rho value in equation.
  177. epsilon (float) - avoid division by zero, default is 1e-6.
  178. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  179. """
  180. def __init__(
  181. self,
  182. learning_rate,
  183. momentum,
  184. rho=0.95,
  185. epsilon=1e-6,
  186. parameter_list=None,
  187. regularization=None,
  188. **args,
  189. ):
  190. super(RMSProp, self).__init__()
  191. self.learning_rate = learning_rate
  192. self.momentum = momentum
  193. self.rho = rho
  194. self.epsilon = epsilon
  195. self.parameter_list = parameter_list
  196. self.regularization = regularization
  197. def __call__(self):
  198. opt = paddle.optimizer.RMSProp(
  199. learning_rate=self.learning_rate,
  200. momentum=self.momentum,
  201. rho=self.rho,
  202. epsilon=self.epsilon,
  203. parameters=self.parameter_list,
  204. weight_decay=self.regularization,
  205. )
  206. return opt
  207. class OptimizerBuilder(object):
  208. """
  209. Build optimizer
  210. Args:
  211. function(str): optimizer name of learning rate
  212. params(dict): parameters used for init the class
  213. regularizer (dict): parameters used for create regularization
  214. """
  215. def __init__(self, function="Momentum", params={"momentum": 0.9}, regularizer=None):
  216. self.function = function
  217. self.params = params
  218. # create regularizer
  219. if regularizer is not None:
  220. mod = sys.modules[__name__]
  221. reg_func = regularizer["function"] + "Decay"
  222. del regularizer["function"]
  223. reg = getattr(mod, reg_func)(**regularizer)()
  224. self.params["regularization"] = reg
  225. def __call__(self, learning_rate, parameter_list=None):
  226. mod = sys.modules[__name__]
  227. opt = getattr(mod, self.function)
  228. return opt(
  229. learning_rate=learning_rate, parameter_list=parameter_list, **self.params
  230. )()
  231. def create_optimizer(config, parameter_list=None):
  232. """
  233. Create an optimizer using config, usually including
  234. learning rate and regularization.
  235. Args:
  236. config(dict): such as
  237. {
  238. 'LEARNING_RATE':
  239. {'function': 'Cosine',
  240. 'params': {'lr': 0.1}
  241. },
  242. 'OPTIMIZER':
  243. {'function': 'Momentum',
  244. 'params':{'momentum': 0.9},
  245. 'regularizer':
  246. {'function': 'L2', 'factor': 0.0001}
  247. }
  248. }
  249. Returns:
  250. an optimizer instance
  251. """
  252. # create learning_rate instance
  253. lr_config = config["LEARNING_RATE"]
  254. lr_config["params"].update(
  255. {
  256. "epochs": config["epoch"],
  257. "step_each_epoch": config["total_images"] // config["TRAIN"]["batch_size"],
  258. }
  259. )
  260. lr = LearningRateBuilder(**lr_config)()
  261. # create optimizer instance
  262. opt_config = deepcopy(config["OPTIMIZER"])
  263. opt = OptimizerBuilder(**opt_config)
  264. return opt(lr, parameter_list), lr
  265. def create_multi_optimizer(config, parameter_list=None):
  266. """ """
  267. # create learning_rate instance
  268. lr_config = config["LEARNING_RATE"]
  269. lr_config["params"].update(
  270. {
  271. "epochs": config["epoch"],
  272. "step_each_epoch": config["total_images"] // config["TRAIN"]["batch_size"],
  273. }
  274. )
  275. lr = LearningRateBuilder(**lr_config)()
  276. # create optimizer instance
  277. opt_config = deepcopy.copy(config["OPTIMIZER"])
  278. opt = OptimizerBuilder(**opt_config)
  279. return opt(lr, parameter_list), lr