generation_utils.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. # Copyright 2020 The HuggingFace Inc. team
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from abc import ABC, abstractmethod
  15. from collections import UserDict
  16. from typing import Iterable, List, Optional, Tuple
  17. import torch
  18. PROCESS_INPUTS_DOCSTRING = r"""
  19. Args:
  20. input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size * num_beams, sequence_length)`):
  21. Indices of input sequence tokens in the vocabulary.
  22. Indices can be obtained using any class inheriting from :class:`~transformers.PretrainedTokenizer`. See
  23. :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
  24. details.
  25. `What are input IDs? <../glossary.html#input-ids>`__
  26. next_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2 * num_beams)`):
  27. Current scores of the top :obj:`2 * num_beams` non-finished beam hypotheses.
  28. next_tokens (:obj:`torch.LongTensor` of shape :obj:`(batch_size, 2 * num_beams)`):
  29. :obj:`input_ids` of the tokens corresponding to the top :obj:`2 * num_beams` non-finished beam hypotheses.
  30. next_indices (:obj:`torch.LongTensor` of shape :obj:`(batch_size, 2 * num_beams)`):
  31. Beam indices indicating to which beam hypothesis the :obj:`next_tokens` correspond.
  32. pad_token_id (:obj:`int`, `optional`):
  33. The id of the `padding` token.
  34. eos_token_id (:obj:`int`, `optional`):
  35. The id of the `end-of-sequence` token.
  36. Return:
  37. :obj:`UserDict`: A dictionary composed of the fields as defined above:
  38. - **next_beam_scores** (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`) -- Updated
  39. scores of all non-finished beams.
  40. - **next_beam_tokens** (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`) -- Next tokens
  41. to be added to the non-finished beam_hypotheses.
  42. - **next_beam_indices** (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`) -- Beam indices
  43. indicating to which beam the next tokens shall be added.
  44. """
  45. FINALIZE_INPUTS_DOCSTRING = r"""
  46. Args:
  47. input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size * num_beams, sequence_length)`):
  48. Indices of input sequence tokens in the vocabulary.
  49. Indices can be obtained using any class inheriting from :class:`~transformers.PretrainedTokenizer`. See
  50. :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
  51. details.
  52. `What are input IDs? <../glossary.html#input-ids>`__
  53. final_beam_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`):
  54. The final scores of all non-finished beams.
  55. final_beam_tokens (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`):
  56. The last tokens to be added to the non-finished beam_hypotheses.
  57. final_beam_indices (:obj:`torch.FloatTensor` of shape :obj:`(batch_size * num_beams)`):
  58. The beam indices indicating to which beam the :obj:`final_beam_tokens` shall be added.
  59. pad_token_id (:obj:`int`, `optional`):
  60. The id of the `padding` token.
  61. eos_token_id (:obj:`int`, `optional`):
  62. The id of the `end-of-sequence` token.
  63. Return:
  64. :obj:`torch.LongTensor` of shape :obj:`(batch_size * num_return_sequences, sequence_length)`: The generated
  65. sequences. The second dimension (sequence_length) is either equal to :obj:`max_length` or shorter if all
  66. batches finished early due to the :obj:`eos_token_id`.
  67. """
  68. class BeamScorer(ABC):
  69. """
  70. Abstract base class for all beam scorers that are used for :meth:`~transformers.PretrainedModel.beam_search` and
  71. :meth:`~transformers.PretrainedModel.beam_sample`.
  72. """
  73. @abstractmethod
  74. def process(self, input_ids: torch.LongTensor,
  75. next_scores: torch.FloatTensor, next_tokens: torch.LongTensor,
  76. next_indices: torch.LongTensor,
  77. **kwargs) -> Tuple[torch.Tensor]:
  78. raise NotImplementedError('This is an abstract method.')
  79. @abstractmethod
  80. def finalize(self, input_ids: torch.LongTensor,
  81. next_scores: torch.FloatTensor, next_tokens: torch.LongTensor,
  82. next_indices: torch.LongTensor, **kwargs) -> torch.LongTensor:
  83. raise NotImplementedError('This is an abstract method.')
  84. class BeamSearchScorer(BeamScorer):
  85. r"""
  86. :class:`transformers.BeamScorer` implementing standard beam search decoding.
  87. Adapted in part from `Facebook's XLM beam search code
  88. <https://github.com/facebookresearch/XLM/blob/9e6f6814d17be4fe5b15f2e6c43eb2b2d76daeb4/src/model/transformer.py#L529>`__.
  89. Args:
  90. batch_size (:obj:`int`):
  91. Batch Size of :obj:`input_ids` for which beam search decoding is run in parallel.
  92. max_length (:obj:`int`):
  93. The maximum length of the sequence to be generated.
  94. num_beams (:obj:`int`):
  95. Number of beams for beam search.
  96. device (:obj:`torch.device`):
  97. Defines the device type (*e.g.*, :obj:`"cpu"` or :obj:`"cuda"`) on which this instance of
  98. :obj:`BeamSearchScorer` will be allocated.
  99. length_penalty (:obj:`float`, `optional`, defaults to 1.0):
  100. Exponential penalty to the length. 1.0 means no penalty. Set to values < 1.0 in order to encourage the
  101. model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer
  102. sequences.
  103. do_early_stopping (:obj:`bool`, `optional`, defaults to :obj:`False`):
  104. Whether to stop the beam search when at least ``num_beams`` sentences are finished per batch or not.
  105. num_beam_hyps_to_keep (:obj:`int`, `optional`, defaults to 1):
  106. The number of beam hypotheses that shall be returned upon calling
  107. :meth:`~transformer.BeamSearchScorer.finalize`.
  108. """
  109. def __init__(
  110. self,
  111. batch_size: int,
  112. max_length: int,
  113. num_beams: int,
  114. device: torch.device,
  115. length_penalty: Optional[float] = 1.0,
  116. do_early_stopping: Optional[bool] = False,
  117. num_beam_hyps_to_keep: Optional[int] = 1,
  118. ):
  119. self.max_length = max_length
  120. self.num_beams = num_beams
  121. self.device = device
  122. self.length_penalty = length_penalty
  123. self.do_early_stopping = do_early_stopping
  124. self.num_beam_hyps_to_keep = num_beam_hyps_to_keep
  125. self._is_init = False
  126. self._beam_hyps = [
  127. BeamHypotheses(
  128. num_beams=self.num_beams,
  129. max_length=self.max_length,
  130. length_penalty=self.length_penalty,
  131. early_stopping=self.do_early_stopping,
  132. ) for _ in range(batch_size)
  133. ]
  134. self._done = torch.tensor([False for _ in range(batch_size)],
  135. dtype=torch.bool,
  136. device=self.device)
  137. # if not isinstance(num_beams, int) or num_beams <= 1:
  138. # raise ValueError(
  139. # )
  140. @property
  141. def is_done(self) -> bool:
  142. return self._done.all()
  143. def process(self,
  144. input_ids: torch.LongTensor,
  145. next_scores: torch.FloatTensor,
  146. next_tokens: torch.LongTensor,
  147. next_indices: torch.LongTensor,
  148. pad_token_id: Optional[int] = None,
  149. eos_token_id: Optional[int] = None,
  150. mems=None) -> Tuple[torch.Tensor]:
  151. cur_len = input_ids.shape[-1]
  152. batch_size = len(self._beam_hyps)
  153. assert batch_size == (input_ids.shape[0] // self.num_beams)
  154. if isinstance(eos_token_id, int):
  155. eos_token_id = [eos_token_id]
  156. device = next_scores.device
  157. next_beam_scores = torch.zeros((batch_size, self.num_beams),
  158. dtype=next_scores.dtype,
  159. device=device)
  160. next_beam_tokens = torch.zeros((batch_size, self.num_beams),
  161. dtype=next_tokens.dtype,
  162. device=device)
  163. next_beam_indices = torch.zeros((batch_size, self.num_beams),
  164. dtype=next_indices.dtype,
  165. device=device)
  166. for batch_idx, beam_hyp in enumerate(self._beam_hyps):
  167. if self._done[batch_idx]:
  168. assert (
  169. len(beam_hyp) >= self.num_beams
  170. ), 'Batch can only be done if at least {} beams have been generated'.format(
  171. self.num_beams)
  172. assert (
  173. eos_token_id is not None and pad_token_id is not None
  174. ), 'generated beams >= num_beams -> eos_token_id and pad_token have to be defined'
  175. # pad the batch
  176. next_beam_scores[batch_idx, :] = 0
  177. next_beam_tokens[batch_idx, :] = pad_token_id
  178. next_beam_indices[batch_idx, :] = 0
  179. continue
  180. # next tokens for this sentence
  181. beam_idx = 0
  182. for beam_token_rank, (next_token, next_score,
  183. next_index) in enumerate(
  184. zip(next_tokens[batch_idx],
  185. next_scores[batch_idx],
  186. next_indices[batch_idx])):
  187. batch_beam_idx = batch_idx * self.num_beams + next_index
  188. # add to generated hypotheses if end of sentence
  189. if (eos_token_id is not None) and (next_token.item()
  190. in eos_token_id):
  191. # if beam_token does not belong to top num_beams tokens, it should not be added
  192. is_beam_token_worse_than_top_num_beams = beam_token_rank >= self.num_beams
  193. if is_beam_token_worse_than_top_num_beams:
  194. continue
  195. beam_hyp.add(
  196. input_ids[batch_beam_idx].clone(),
  197. next_score.item(),
  198. mems=[mem[[next_index.item()]]
  199. for mem in mems] if mems else None)
  200. else:
  201. # add next predicted token since it is not eos_token
  202. next_beam_scores[batch_idx, beam_idx] = next_score
  203. next_beam_tokens[batch_idx, beam_idx] = next_token
  204. next_beam_indices[batch_idx, beam_idx] = batch_beam_idx
  205. beam_idx += 1
  206. # once the beam for next step is full, don't add more tokens to it.
  207. if beam_idx == self.num_beams:
  208. break
  209. if beam_idx < self.num_beams:
  210. raise ValueError(
  211. f'At most {self.num_beams} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id: {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected.' # noqa
  212. ) # noqa
  213. # Check if we are done so that we can save a pad step if all(done)
  214. self._done[batch_idx] = self._done[batch_idx] or beam_hyp.is_done(
  215. next_scores[batch_idx].max().item(), cur_len)
  216. return UserDict({
  217. 'next_beam_scores': next_beam_scores.view(-1),
  218. 'next_beam_tokens': next_beam_tokens.view(-1),
  219. 'next_beam_indices': next_beam_indices.view(-1),
  220. })
  221. def finalize(self,
  222. input_ids: torch.LongTensor,
  223. final_beam_scores: torch.FloatTensor,
  224. final_beam_tokens: torch.LongTensor,
  225. final_beam_indices: torch.LongTensor,
  226. pad_token_id: Optional[int] = None,
  227. eos_token_id: Optional[int] = None,
  228. mems=None) -> Tuple[torch.LongTensor, List[torch.Tensor]]:
  229. batch_size = len(self._beam_hyps)
  230. # finalize all open beam hypotheses and add to generated hypotheses
  231. for batch_idx, beam_hyp in enumerate(self._beam_hyps):
  232. if self._done[batch_idx]:
  233. continue
  234. # need to add best num_beams hypotheses to generated hyps
  235. for beam_id in range(self.num_beams):
  236. batch_beam_idx = batch_idx * self.num_beams + beam_id
  237. final_score = final_beam_scores[batch_beam_idx].item()
  238. final_tokens = input_ids[batch_beam_idx]
  239. beam_hyp.add(
  240. final_tokens,
  241. final_score,
  242. mems=[mem[[batch_beam_idx]]
  243. for mem in mems] if mems else None)
  244. # select the best hypotheses
  245. sent_lengths = input_ids.new(batch_size * self.num_beam_hyps_to_keep)
  246. best = []
  247. # retrieve best hypotheses
  248. for i, beam_hyp in enumerate(self._beam_hyps):
  249. sorted_hyps = sorted(beam_hyp.beams, key=lambda x: x[0])
  250. for j in range(self.num_beam_hyps_to_keep):
  251. best_hyp, mems = sorted_hyps.pop()[1:]
  252. sent_lengths[self.num_beam_hyps_to_keep * i
  253. + j] = len(best_hyp)
  254. best.append((best_hyp, mems))
  255. # prepare for adding eos
  256. sent_max_len = min(sent_lengths.max().item(), self.max_length)
  257. decoded: torch.LongTensor = input_ids.new(
  258. batch_size * self.num_beam_hyps_to_keep, sent_max_len)
  259. # shorter batches are padded if needed
  260. if sent_lengths.min().item() != sent_lengths.max().item():
  261. assert pad_token_id is not None, '`pad_token_id` has to be defined'
  262. decoded.fill_(pad_token_id)
  263. # fill with hypotheses and eos_token_id if the latter fits in
  264. mems = []
  265. for i, (hypo, mem) in enumerate(best):
  266. decoded[i, :sent_lengths[i]] = hypo
  267. if sent_lengths[i] < sent_max_len:
  268. decoded[i, sent_lengths[i]] = eos_token_id
  269. mems.append(mem)
  270. mems = [
  271. torch.cat([mem[i] for mem in mems], dim=0)
  272. for i in range(len(mems[0]))
  273. ] if mems and mems[0] else None
  274. return decoded, mems
  275. class BeamHypotheses:
  276. def __init__(self, num_beams: int, max_length: int, length_penalty: float,
  277. early_stopping: bool):
  278. """
  279. Initialize n-best list of hypotheses.
  280. """
  281. self.max_length = max_length - 1 # ignoring bos_token
  282. self.length_penalty = length_penalty
  283. self.early_stopping = early_stopping
  284. self.num_beams = num_beams
  285. self.beams = []
  286. self.worst_score = 1e9
  287. def __len__(self):
  288. """
  289. Number of hypotheses in the list.
  290. """
  291. return len(self.beams)
  292. def add(self, hyp: torch.LongTensor, sum_logprobs: float, mems=None):
  293. """
  294. Add a new hypothesis to the list.
  295. """
  296. score = sum_logprobs / (max(hyp.shape[-1], 1)**self.length_penalty)
  297. if len(self) < self.num_beams or score > self.worst_score:
  298. self.beams.append((score, hyp, mems))
  299. if len(self) > self.num_beams:
  300. sorted_next_scores = sorted([
  301. (s, idx) for idx, (s, _, _) in enumerate(self.beams)
  302. ])
  303. del self.beams[sorted_next_scores[0][1]]
  304. self.worst_score = sorted_next_scores[1][0]
  305. else:
  306. self.worst_score = min(score, self.worst_score)
  307. def is_done(self, best_sum_logprobs: float, cur_len: int) -> bool:
  308. """
  309. If there are enough hypotheses and that none of the hypotheses being generated can become better than the worst
  310. one in the heap, then we are done with this sentence.
  311. """
  312. if len(self) < self.num_beams:
  313. return False
  314. elif self.early_stopping:
  315. return True
  316. else:
  317. cur_score = best_sum_logprobs / cur_len**self.length_penalty
  318. ret = self.worst_score >= cur_score
  319. return ret
  320. class LogitsProcessor(ABC):
  321. """Abstract base class for all logit processors that can be applied during generation."""
  322. def __call__(self, input_ids: torch.LongTensor,
  323. scores: torch.FloatTensor) -> torch.FloatTensor:
  324. """Torch method for processing logits."""
  325. raise NotImplementedError(
  326. f'{self.__class__} is an abstract class. Only classes inheriting this class can be called.'
  327. )
  328. class LogitsProcessorList(list):
  329. """
  330. This class can be used to create a list of :class:`~transformers.LogitsProcessor` or
  331. :class:`~transformers.LogitsWarper` to subsequently process a :obj:`scores` input tensor. This class inherits from
  332. list and adds a specific `__call__` method to apply each :class:`~transformers.LogitsProcessor` or
  333. :class:`~transformers.LogitsProcessor` to the inputs.
  334. """
  335. def __call__(self, input_ids: torch.LongTensor,
  336. scores: torch.FloatTensor) -> torch.FloatTensor:
  337. for processor in self:
  338. scores = processor(input_ids, scores)
  339. return scores
  340. class MinLengthLogitsProcessor(LogitsProcessor):
  341. r"""
  342. :class:`transformers.LogitsProcessor` enforcing a min-length by setting EOS probability to 0.
  343. Args:
  344. min_length (:obj:`int`):
  345. The minimum length below which the score of :obj:`eos_token_id` is set to :obj:`-float("Inf")`.
  346. eos_token_id (:obj:`int`):
  347. The id of the `end-of-sequence` token.
  348. """
  349. def __init__(self, min_length: int, eos_token_id: int):
  350. if not isinstance(min_length, int) or min_length < 0:
  351. raise ValueError(
  352. f'`min_length` has to be a positive integer, but is {min_length}'
  353. )
  354. if not isinstance(eos_token_id, int) or eos_token_id < 0:
  355. raise ValueError(
  356. f'`eos_token_id` has to be a positive integer, but is {eos_token_id}'
  357. )
  358. self.min_length = min_length
  359. self.eos_token_id = eos_token_id
  360. def __call__(self, input_ids: torch.LongTensor,
  361. scores: torch.FloatTensor) -> torch.FloatTensor:
  362. cur_len = input_ids.shape[-1]
  363. if cur_len < self.min_length:
  364. scores[:, self.eos_token_id] = -float('inf')
  365. return scores
  366. class NoRepeatNGramLogitsProcessor(LogitsProcessor):
  367. r"""
  368. :class:`transformers.LogitsProcessor` that enforces no repetition of n-grams. See `Fairseq
  369. <https://github.com/pytorch/fairseq/blob/a07cb6f40480928c9e0548b737aadd36ee66ac76/fairseq/sequence_generator.py#L345>`__.
  370. Args:
  371. ngram_size (:obj:`int`):
  372. All ngrams of size :obj:`ngram_size` can only occur once.
  373. """
  374. def __init__(self, ngram_size: int):
  375. if not isinstance(ngram_size, int) or ngram_size <= 0:
  376. raise ValueError(
  377. f'`ngram_size` has to be a strictly positive integer, but is {ngram_size}'
  378. )
  379. self.ngram_size = ngram_size
  380. def __call__(self, input_ids: torch.LongTensor,
  381. scores: torch.FloatTensor) -> torch.FloatTensor:
  382. num_batch_hypotheses = scores.shape[0]
  383. cur_len = input_ids.shape[-1]
  384. banned_batch_tokens = self._calc_banned_ngram_tokens(
  385. input_ids, num_batch_hypotheses, cur_len)
  386. for i, banned_tokens in enumerate(banned_batch_tokens):
  387. scores[i, banned_tokens] = -float('inf')
  388. return scores
  389. def _calc_banned_ngram_tokens(self, prev_input_ids: torch.Tensor,
  390. num_hypos: int,
  391. cur_len: int) -> List[Iterable[int]]:
  392. """Copied from fairseq for no_repeat_ngram in beam_search"""
  393. if cur_len + 1 < self.ngram_size:
  394. # return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
  395. return [[] for _ in range(num_hypos)]
  396. generated_ngrams = [{} for _ in range(num_hypos)]
  397. for idx in range(num_hypos):
  398. gen_tokens = prev_input_ids[idx].tolist()
  399. generated_ngram = generated_ngrams[idx]
  400. for ngram in zip(*[gen_tokens[i:]
  401. for i in range(self.ngram_size)]):
  402. prev_ngram_tuple = tuple(ngram[:-1])
  403. generated_ngram[prev_ngram_tuple] = generated_ngram.get(
  404. prev_ngram_tuple, []) + [ngram[-1]]
  405. def _get_generated_ngrams(hypo_idx):
  406. # Before decoding the next token, prevent decoding of ngrams that have already appeared
  407. start_idx = cur_len + 1 - self.ngram_size
  408. ngram_idx = tuple(prev_input_ids[hypo_idx,
  409. start_idx:cur_len].tolist())
  410. return generated_ngrams[hypo_idx].get(ngram_idx, [])
  411. banned_tokens = [
  412. _get_generated_ngrams(hypo_idx) for hypo_idx in range(num_hypos)
  413. ]
  414. return banned_tokens