learning_rate.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from __future__ import unicode_literals
  18. from paddle.optimizer import lr
  19. from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay
  20. class Linear(object):
  21. """
  22. Linear learning rate decay
  23. Args:
  24. lr (float): The initial learning rate. It is a python float number.
  25. epochs(int): The decay step size. It determines the decay cycle.
  26. end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
  27. power(float, optional): Power of polynomial. Default: 1.0.
  28. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  29. """
  30. def __init__(
  31. self,
  32. learning_rate,
  33. epochs,
  34. step_each_epoch,
  35. end_lr=0.0,
  36. power=1.0,
  37. warmup_epoch=0,
  38. last_epoch=-1,
  39. **kwargs,
  40. ):
  41. super(Linear, self).__init__()
  42. self.learning_rate = learning_rate
  43. self.epochs = epochs * step_each_epoch
  44. self.end_lr = end_lr
  45. self.power = power
  46. self.last_epoch = last_epoch
  47. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  48. def __call__(self):
  49. learning_rate = lr.PolynomialDecay(
  50. learning_rate=self.learning_rate,
  51. decay_steps=self.epochs,
  52. end_lr=self.end_lr,
  53. power=self.power,
  54. last_epoch=self.last_epoch,
  55. )
  56. if self.warmup_epoch > 0:
  57. learning_rate = lr.LinearWarmup(
  58. learning_rate=learning_rate,
  59. warmup_steps=self.warmup_epoch,
  60. start_lr=0.0,
  61. end_lr=self.learning_rate,
  62. last_epoch=self.last_epoch,
  63. )
  64. return learning_rate
  65. class Cosine(object):
  66. """
  67. Cosine learning rate decay
  68. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  69. Args:
  70. lr(float): initial learning rate
  71. step_each_epoch(int): steps each epoch
  72. epochs(int): total training epochs
  73. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  74. """
  75. def __init__(
  76. self,
  77. learning_rate,
  78. step_each_epoch,
  79. epochs,
  80. warmup_epoch=0,
  81. last_epoch=-1,
  82. **kwargs,
  83. ):
  84. super(Cosine, self).__init__()
  85. self.learning_rate = learning_rate
  86. self.T_max = step_each_epoch * epochs
  87. self.last_epoch = last_epoch
  88. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  89. def __call__(self):
  90. learning_rate = lr.CosineAnnealingDecay(
  91. learning_rate=self.learning_rate,
  92. T_max=self.T_max,
  93. last_epoch=self.last_epoch,
  94. )
  95. if self.warmup_epoch > 0:
  96. learning_rate = lr.LinearWarmup(
  97. learning_rate=learning_rate,
  98. warmup_steps=self.warmup_epoch,
  99. start_lr=0.0,
  100. end_lr=self.learning_rate,
  101. last_epoch=self.last_epoch,
  102. )
  103. return learning_rate
  104. class LinearWarmupCosine(object):
  105. """
  106. LinearWarmupCosine learning rate decay
  107. Args:
  108. learning_rate(float): initial learning rate
  109. step_each_epoch(int): steps each epoch
  110. epochs(int): total training epochs
  111. start_lr (float): Initial learning rate of warm up.
  112. min_lr (float): Minimum learning rate in CosineAnnealingDecay.
  113. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  114. """
  115. def __init__(
  116. self,
  117. learning_rate,
  118. step_each_epoch,
  119. epochs,
  120. warmup_steps=5000,
  121. start_lr=1e-5,
  122. min_lr=1e-8,
  123. last_epoch=-1,
  124. **kwargs,
  125. ):
  126. super(LinearWarmupCosine, self).__init__()
  127. self.learning_rate = float(learning_rate)
  128. self.T_max = step_each_epoch * epochs
  129. self.last_epoch = last_epoch
  130. self.warmup_steps = warmup_steps
  131. self.start_lr = float(start_lr)
  132. self.min_lr = float(min_lr)
  133. def __call__(self):
  134. learning_rate = lr.CosineAnnealingDecay(
  135. learning_rate=self.learning_rate,
  136. T_max=self.T_max,
  137. eta_min=self.min_lr,
  138. last_epoch=self.last_epoch,
  139. )
  140. if self.warmup_steps > 0:
  141. learning_rate = lr.LinearWarmup(
  142. learning_rate=learning_rate,
  143. warmup_steps=self.warmup_steps,
  144. start_lr=self.start_lr,
  145. end_lr=self.learning_rate,
  146. last_epoch=self.last_epoch,
  147. )
  148. return learning_rate
  149. class Step(object):
  150. """
  151. Piecewise learning rate decay
  152. Args:
  153. step_each_epoch(int): steps each epoch
  154. learning_rate (float): The initial learning rate. It is a python float number.
  155. step_size (int): the interval to update.
  156. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  157. It should be less than 1.0. Default: 0.1.
  158. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  159. """
  160. def __init__(
  161. self,
  162. learning_rate,
  163. step_size,
  164. step_each_epoch,
  165. gamma,
  166. warmup_epoch=0,
  167. last_epoch=-1,
  168. **kwargs,
  169. ):
  170. super(Step, self).__init__()
  171. self.step_size = step_each_epoch * step_size
  172. self.learning_rate = learning_rate
  173. self.gamma = gamma
  174. self.last_epoch = last_epoch
  175. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  176. def __call__(self):
  177. learning_rate = lr.StepDecay(
  178. learning_rate=self.learning_rate,
  179. step_size=self.step_size,
  180. gamma=self.gamma,
  181. last_epoch=self.last_epoch,
  182. )
  183. if self.warmup_epoch > 0:
  184. learning_rate = lr.LinearWarmup(
  185. learning_rate=learning_rate,
  186. warmup_steps=self.warmup_epoch,
  187. start_lr=0.0,
  188. end_lr=self.learning_rate,
  189. last_epoch=self.last_epoch,
  190. )
  191. return learning_rate
  192. class Piecewise(object):
  193. """
  194. Piecewise learning rate decay
  195. Args:
  196. boundaries(list): A list of steps numbers. The type of element in the list is python int.
  197. values(list): A list of learning rate values that will be picked during different epoch boundaries.
  198. The type of element in the list is python float.
  199. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  200. """
  201. def __init__(
  202. self,
  203. step_each_epoch,
  204. decay_epochs,
  205. values,
  206. warmup_epoch=0,
  207. last_epoch=-1,
  208. **kwargs,
  209. ):
  210. super(Piecewise, self).__init__()
  211. self.boundaries = [step_each_epoch * e for e in decay_epochs]
  212. self.values = values
  213. self.last_epoch = last_epoch
  214. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  215. def __call__(self):
  216. learning_rate = lr.PiecewiseDecay(
  217. boundaries=self.boundaries, values=self.values, last_epoch=self.last_epoch
  218. )
  219. if self.warmup_epoch > 0:
  220. learning_rate = lr.LinearWarmup(
  221. learning_rate=learning_rate,
  222. warmup_steps=self.warmup_epoch,
  223. start_lr=0.0,
  224. end_lr=self.values[0],
  225. last_epoch=self.last_epoch,
  226. )
  227. return learning_rate
  228. class CyclicalCosine(object):
  229. """
  230. Cyclical cosine learning rate decay
  231. Args:
  232. learning_rate(float): initial learning rate
  233. step_each_epoch(int): steps each epoch
  234. epochs(int): total training epochs
  235. cycle(int): period of the cosine learning rate
  236. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  237. """
  238. def __init__(
  239. self,
  240. learning_rate,
  241. step_each_epoch,
  242. epochs,
  243. cycle,
  244. warmup_epoch=0,
  245. last_epoch=-1,
  246. **kwargs,
  247. ):
  248. super(CyclicalCosine, self).__init__()
  249. self.learning_rate = learning_rate
  250. self.T_max = step_each_epoch * epochs
  251. self.last_epoch = last_epoch
  252. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  253. self.cycle = round(cycle * step_each_epoch)
  254. def __call__(self):
  255. learning_rate = CyclicalCosineDecay(
  256. learning_rate=self.learning_rate,
  257. T_max=self.T_max,
  258. cycle=self.cycle,
  259. last_epoch=self.last_epoch,
  260. )
  261. if self.warmup_epoch > 0:
  262. learning_rate = lr.LinearWarmup(
  263. learning_rate=learning_rate,
  264. warmup_steps=self.warmup_epoch,
  265. start_lr=0.0,
  266. end_lr=self.learning_rate,
  267. last_epoch=self.last_epoch,
  268. )
  269. return learning_rate
  270. class OneCycle(object):
  271. """
  272. One Cycle learning rate decay
  273. Args:
  274. max_lr(float): Upper learning rate boundaries
  275. epochs(int): total training epochs
  276. step_each_epoch(int): steps each epoch
  277. anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing.
  278. Default: ‘cos’
  279. three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’
  280. instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’).
  281. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  282. """
  283. def __init__(
  284. self,
  285. max_lr,
  286. epochs,
  287. step_each_epoch,
  288. anneal_strategy="cos",
  289. three_phase=False,
  290. warmup_epoch=0,
  291. last_epoch=-1,
  292. **kwargs,
  293. ):
  294. super(OneCycle, self).__init__()
  295. self.max_lr = max_lr
  296. self.epochs = epochs
  297. self.steps_per_epoch = step_each_epoch
  298. self.anneal_strategy = anneal_strategy
  299. self.three_phase = three_phase
  300. self.last_epoch = last_epoch
  301. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  302. def __call__(self):
  303. learning_rate = OneCycleDecay(
  304. max_lr=self.max_lr,
  305. epochs=self.epochs,
  306. steps_per_epoch=self.steps_per_epoch,
  307. anneal_strategy=self.anneal_strategy,
  308. three_phase=self.three_phase,
  309. last_epoch=self.last_epoch,
  310. )
  311. if self.warmup_epoch > 0:
  312. learning_rate = lr.LinearWarmup(
  313. learning_rate=learning_rate,
  314. warmup_steps=self.warmup_epoch,
  315. start_lr=0.0,
  316. end_lr=self.max_lr,
  317. last_epoch=self.last_epoch,
  318. )
  319. return learning_rate
  320. class Const(object):
  321. """
  322. Const learning rate decay
  323. Args:
  324. learning_rate(float): initial learning rate
  325. step_each_epoch(int): steps each epoch
  326. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  327. """
  328. def __init__(
  329. self, learning_rate, step_each_epoch, warmup_epoch=0, last_epoch=-1, **kwargs
  330. ):
  331. super(Const, self).__init__()
  332. self.learning_rate = learning_rate
  333. self.last_epoch = last_epoch
  334. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  335. def __call__(self):
  336. learning_rate = self.learning_rate
  337. if self.warmup_epoch > 0:
  338. learning_rate = lr.LinearWarmup(
  339. learning_rate=learning_rate,
  340. warmup_steps=self.warmup_epoch,
  341. start_lr=0.0,
  342. end_lr=self.learning_rate,
  343. last_epoch=self.last_epoch,
  344. )
  345. return learning_rate
  346. class DecayLearningRate(object):
  347. """
  348. DecayLearningRate learning rate decay
  349. new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr
  350. Args:
  351. learning_rate(float): initial learning rate
  352. step_each_epoch(int): steps each epoch
  353. epochs(int): total training epochs
  354. factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9
  355. end_lr(float): The minimum final learning rate. Default: 0.0.
  356. """
  357. def __init__(
  358. self, learning_rate, step_each_epoch, epochs, factor=0.9, end_lr=0, **kwargs
  359. ):
  360. super(DecayLearningRate, self).__init__()
  361. self.learning_rate = learning_rate
  362. self.epochs = epochs + 1
  363. self.factor = factor
  364. self.end_lr = 0
  365. self.decay_steps = step_each_epoch * epochs
  366. def __call__(self):
  367. learning_rate = lr.PolynomialDecay(
  368. learning_rate=self.learning_rate,
  369. decay_steps=self.decay_steps,
  370. power=self.factor,
  371. end_lr=self.end_lr,
  372. )
  373. return learning_rate
  374. class MultiStepDecay(object):
  375. """
  376. Piecewise learning rate decay
  377. Args:
  378. step_each_epoch(int): steps each epoch
  379. learning_rate (float): The initial learning rate. It is a python float number.
  380. step_size (int): the interval to update.
  381. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  382. It should be less than 1.0. Default: 0.1.
  383. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  384. """
  385. def __init__(
  386. self,
  387. learning_rate,
  388. milestones,
  389. step_each_epoch,
  390. gamma,
  391. warmup_epoch=0,
  392. last_epoch=-1,
  393. **kwargs,
  394. ):
  395. super(MultiStepDecay, self).__init__()
  396. self.milestones = [step_each_epoch * e for e in milestones]
  397. self.learning_rate = learning_rate
  398. self.gamma = gamma
  399. self.last_epoch = last_epoch
  400. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  401. def __call__(self):
  402. learning_rate = lr.MultiStepDecay(
  403. learning_rate=self.learning_rate,
  404. milestones=self.milestones,
  405. gamma=self.gamma,
  406. last_epoch=self.last_epoch,
  407. )
  408. if self.warmup_epoch > 0:
  409. learning_rate = lr.LinearWarmup(
  410. learning_rate=learning_rate,
  411. warmup_steps=self.warmup_epoch,
  412. start_lr=0.0,
  413. end_lr=self.learning_rate,
  414. last_epoch=self.last_epoch,
  415. )
  416. return learning_rate
  417. class TwoStepCosine(object):
  418. """
  419. Cosine learning rate decay
  420. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  421. Args:
  422. lr(float): initial learning rate
  423. step_each_epoch(int): steps each epoch
  424. epochs(int): total training epochs
  425. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  426. """
  427. def __init__(
  428. self,
  429. learning_rate,
  430. step_each_epoch,
  431. epochs,
  432. warmup_epoch=0,
  433. last_epoch=-1,
  434. **kwargs,
  435. ):
  436. super(TwoStepCosine, self).__init__()
  437. self.learning_rate = learning_rate
  438. self.T_max1 = step_each_epoch * 200
  439. self.T_max2 = step_each_epoch * epochs
  440. self.last_epoch = last_epoch
  441. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  442. def __call__(self):
  443. learning_rate = TwoStepCosineDecay(
  444. learning_rate=self.learning_rate,
  445. T_max1=self.T_max1,
  446. T_max2=self.T_max2,
  447. last_epoch=self.last_epoch,
  448. )
  449. if self.warmup_epoch > 0:
  450. learning_rate = lr.LinearWarmup(
  451. learning_rate=learning_rate,
  452. warmup_steps=self.warmup_epoch,
  453. start_lr=0.0,
  454. end_lr=self.learning_rate,
  455. last_epoch=self.last_epoch,
  456. )
  457. return learning_rate