__init__.pyi 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # Generated content DO NOT EDIT
  2. class DecodeStream:
  3. """
  4. Class needed for streaming decode
  5. """
  6. def __init__(self, ids=None, skip_special_tokens=False):
  7. pass
  8. class Decoder:
  9. """
  10. Base class for all decoders
  11. This class is not supposed to be instantiated directly. Instead, any implementation of
  12. a Decoder will return an instance of this class when instantiated.
  13. """
  14. def decode(self, tokens):
  15. """
  16. Decode the given list of tokens to a final string
  17. Args:
  18. tokens (:obj:`List[str]`):
  19. The list of tokens to decode
  20. Returns:
  21. :obj:`str`: The decoded string
  22. """
  23. pass
  24. class BPEDecoder(Decoder):
  25. """
  26. BPEDecoder Decoder
  27. Args:
  28. suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
  29. The suffix that was used to characterize an end-of-word. This suffix will
  30. be replaced by whitespaces during the decoding
  31. """
  32. def __init__(self, suffix="</w>"):
  33. pass
  34. def decode(self, tokens):
  35. """
  36. Decode the given list of tokens to a final string
  37. Args:
  38. tokens (:obj:`List[str]`):
  39. The list of tokens to decode
  40. Returns:
  41. :obj:`str`: The decoded string
  42. """
  43. pass
  44. class ByteFallback(Decoder):
  45. """
  46. ByteFallback Decoder
  47. ByteFallback is a simple trick which converts tokens looking like `<0x61>`
  48. to pure bytes, and attempts to make them into a string. If the tokens
  49. cannot be decoded you will get � instead for each inconvertible byte token
  50. """
  51. def __init__(self):
  52. pass
  53. def decode(self, tokens):
  54. """
  55. Decode the given list of tokens to a final string
  56. Args:
  57. tokens (:obj:`List[str]`):
  58. The list of tokens to decode
  59. Returns:
  60. :obj:`str`: The decoded string
  61. """
  62. pass
  63. class ByteLevel(Decoder):
  64. """
  65. ByteLevel Decoder
  66. This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.ByteLevel`
  67. :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
  68. """
  69. def __init__(self):
  70. pass
  71. def decode(self, tokens):
  72. """
  73. Decode the given list of tokens to a final string
  74. Args:
  75. tokens (:obj:`List[str]`):
  76. The list of tokens to decode
  77. Returns:
  78. :obj:`str`: The decoded string
  79. """
  80. pass
  81. class CTC(Decoder):
  82. """
  83. CTC Decoder
  84. Args:
  85. pad_token (:obj:`str`, `optional`, defaults to :obj:`<pad>`):
  86. The pad token used by CTC to delimit a new token.
  87. word_delimiter_token (:obj:`str`, `optional`, defaults to :obj:`|`):
  88. The word delimiter token. It will be replaced by a <space>
  89. cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
  90. Whether to cleanup some tokenization artifacts.
  91. Mainly spaces before punctuation, and some abbreviated english forms.
  92. """
  93. def __init__(self, pad_token="<pad>", word_delimiter_token="|", cleanup=True):
  94. pass
  95. def decode(self, tokens):
  96. """
  97. Decode the given list of tokens to a final string
  98. Args:
  99. tokens (:obj:`List[str]`):
  100. The list of tokens to decode
  101. Returns:
  102. :obj:`str`: The decoded string
  103. """
  104. pass
  105. class Fuse(Decoder):
  106. """
  107. Fuse Decoder
  108. Fuse simply fuses every token into a single string.
  109. This is the last step of decoding, this decoder exists only if
  110. there is need to add other decoders *after* the fusion
  111. """
  112. def __init__(self):
  113. pass
  114. def decode(self, tokens):
  115. """
  116. Decode the given list of tokens to a final string
  117. Args:
  118. tokens (:obj:`List[str]`):
  119. The list of tokens to decode
  120. Returns:
  121. :obj:`str`: The decoded string
  122. """
  123. pass
  124. class Metaspace(Decoder):
  125. """
  126. Metaspace Decoder
  127. Args:
  128. replacement (:obj:`str`, `optional`, defaults to :obj:`▁`):
  129. The replacement character. Must be exactly one character. By default we
  130. use the `▁` (U+2581) meta symbol (Same as in SentencePiece).
  131. prepend_scheme (:obj:`str`, `optional`, defaults to :obj:`"always"`):
  132. Whether to add a space to the first word if there isn't already one. This
  133. lets us treat `hello` exactly like `say hello`.
  134. Choices: "always", "never", "first". First means the space is only added on the first
  135. token (relevant when special tokens are used or other pre_tokenizer are used).
  136. """
  137. def __init__(self, replacement="▁", prepend_scheme="always", split=True):
  138. pass
  139. def decode(self, tokens):
  140. """
  141. Decode the given list of tokens to a final string
  142. Args:
  143. tokens (:obj:`List[str]`):
  144. The list of tokens to decode
  145. Returns:
  146. :obj:`str`: The decoded string
  147. """
  148. pass
  149. class Replace(Decoder):
  150. """
  151. Replace Decoder
  152. This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.Replace`
  153. :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
  154. """
  155. def __init__(self, pattern, content):
  156. pass
  157. def decode(self, tokens):
  158. """
  159. Decode the given list of tokens to a final string
  160. Args:
  161. tokens (:obj:`List[str]`):
  162. The list of tokens to decode
  163. Returns:
  164. :obj:`str`: The decoded string
  165. """
  166. pass
  167. class Sequence(Decoder):
  168. """
  169. Sequence Decoder
  170. Args:
  171. decoders (:obj:`List[Decoder]`)
  172. The decoders that need to be chained
  173. """
  174. def __init__(self, decoders):
  175. pass
  176. def decode(self, tokens):
  177. """
  178. Decode the given list of tokens to a final string
  179. Args:
  180. tokens (:obj:`List[str]`):
  181. The list of tokens to decode
  182. Returns:
  183. :obj:`str`: The decoded string
  184. """
  185. pass
  186. class Strip(Decoder):
  187. """
  188. Strip normalizer
  189. Strips n left characters of each token, or n right characters of each token
  190. """
  191. def __init__(self, content, left=0, right=0):
  192. pass
  193. def decode(self, tokens):
  194. """
  195. Decode the given list of tokens to a final string
  196. Args:
  197. tokens (:obj:`List[str]`):
  198. The list of tokens to decode
  199. Returns:
  200. :obj:`str`: The decoded string
  201. """
  202. pass
  203. class WordPiece(Decoder):
  204. """
  205. WordPiece Decoder
  206. Args:
  207. prefix (:obj:`str`, `optional`, defaults to :obj:`##`):
  208. The prefix to use for subwords that are not a beginning-of-word
  209. cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
  210. Whether to cleanup some tokenization artifacts. Mainly spaces before punctuation,
  211. and some abbreviated english forms.
  212. """
  213. def __init__(self, prefix="##", cleanup=True):
  214. pass
  215. def decode(self, tokens):
  216. """
  217. Decode the given list of tokens to a final string
  218. Args:
  219. tokens (:obj:`List[str]`):
  220. The list of tokens to decode
  221. Returns:
  222. :obj:`str`: The decoded string
  223. """
  224. pass