| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946 |
- # Generated content DO NOT EDIT
- class Normalizer:
- """
- Base class for all normalizers
- This class is not supposed to be instantiated directly. Instead, any implementation of a
- Normalizer will return an instance of this class when instantiated.
- """
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class BertNormalizer(Normalizer):
- """
- BertNormalizer
- Takes care of normalizing raw text before giving it to a Bert model.
- This includes cleaning the text, handling accents, chinese chars and lowercasing
- Args:
- clean_text (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to clean the text, by removing any control characters
- and replacing all whitespaces by the classic one.
- handle_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to handle chinese chars by putting spaces around them.
- strip_accents (:obj:`bool`, `optional`):
- Whether to strip all accents. If this option is not specified (ie == None),
- then it will be determined by the value for `lowercase` (as in the original Bert).
- lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to lowercase.
- """
- def __init__(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @property
- def clean_text(self):
- """ """
- pass
- @clean_text.setter
- def clean_text(self, value):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- @property
- def handle_chinese_chars(self):
- """ """
- pass
- @handle_chinese_chars.setter
- def handle_chinese_chars(self, value):
- """ """
- pass
- @property
- def lowercase(self):
- """ """
- pass
- @lowercase.setter
- def lowercase(self, value):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- @property
- def strip_accents(self):
- """ """
- pass
- @strip_accents.setter
- def strip_accents(self, value):
- """ """
- pass
- class ByteLevel(Normalizer):
- """
- Bytelevel Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Lowercase(Normalizer):
- """
- Lowercase Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFC(Normalizer):
- """
- NFC Unicode Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFD(Normalizer):
- """
- NFD Unicode Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFKC(Normalizer):
- """
- NFKC Unicode Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFKD(Normalizer):
- """
- NFKD Unicode Normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Nmt(Normalizer):
- """
- Nmt normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Precompiled(Normalizer):
- """
- Precompiled normalizer
- Don't use manually it is used for compatibility for SentencePiece.
- """
- def __init__(self, precompiled_charsmap):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Prepend(Normalizer):
- """
- Prepend normalizer
- """
- def __init__(self, prepend):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- @property
- def prepend(self):
- """ """
- pass
- @prepend.setter
- def prepend(self, value):
- """ """
- pass
- class Replace(Normalizer):
- """
- Replace normalizer
- """
- def __init__(self, pattern, content):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @property
- def content(self):
- """ """
- pass
- @content.setter
- def content(self, value):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- @property
- def pattern(self):
- """ """
- pass
- @pattern.setter
- def pattern(self, value):
- """ """
- pass
- class Sequence(Normalizer):
- """
- Allows concatenating multiple other Normalizer as a Sequence.
- All the normalizers run in sequence in the given order
- Args:
- normalizers (:obj:`List[Normalizer]`):
- A list of Normalizer to be run as a sequence
- """
- def __init__(self, normalizers):
- pass
- def __getitem__(self, key):
- """
- Return self[key].
- """
- pass
- def __getnewargs__(self):
- """ """
- pass
- def __getstate__(self):
- """ """
- pass
- def __setitem__(self, key, value):
- """
- Set self[key] to value.
- """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Strip(Normalizer):
- """
- Strip normalizer
- """
- def __init__(self, left=True, right=True):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- @property
- def left(self):
- """ """
- pass
- @left.setter
- def left(self, value):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- @property
- def right(self):
- """ """
- pass
- @right.setter
- def right(self, value):
- """ """
- pass
- class StripAccents(Normalizer):
- """
- StripAccents normalizer
- """
- def __init__(self):
- pass
- def __getstate__(self):
- """ """
- pass
- def __setstate__(self, state):
- """ """
- pass
- @staticmethod
- def custom(normalizer):
- """ """
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- from typing import Dict
- NORMALIZERS: Dict[str, Normalizer]
- def unicode_normalizer_from_str(normalizer: str) -> Normalizer: ...
|