| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813 |
- """
- This module contains a set of functions for vectorized string
- operations.
- """
- import functools
- import sys
- import numpy as np
- from numpy import (
- add,
- equal,
- greater,
- greater_equal,
- less,
- less_equal,
- multiply as _multiply_ufunc,
- not_equal,
- )
- from numpy._core.multiarray import _vec_string
- from numpy._core.overrides import array_function_dispatch, set_module
- from numpy._core.umath import (
- _center,
- _expandtabs,
- _expandtabs_length,
- _ljust,
- _lstrip_chars,
- _lstrip_whitespace,
- _partition,
- _partition_index,
- _replace,
- _rjust,
- _rpartition,
- _rpartition_index,
- _rstrip_chars,
- _rstrip_whitespace,
- _slice,
- _strip_chars,
- _strip_whitespace,
- _zfill,
- count as _count_ufunc,
- endswith as _endswith_ufunc,
- find as _find_ufunc,
- index as _index_ufunc,
- isalnum,
- isalpha,
- isdecimal,
- isdigit,
- islower,
- isnumeric,
- isspace,
- istitle,
- isupper,
- rfind as _rfind_ufunc,
- rindex as _rindex_ufunc,
- startswith as _startswith_ufunc,
- str_len,
- )
- def _override___module__():
- for ufunc in [
- isalnum, isalpha, isdecimal, isdigit, islower, isnumeric, isspace,
- istitle, isupper, str_len,
- ]:
- ufunc.__module__ = "numpy.strings"
- ufunc.__qualname__ = ufunc.__name__
- _override___module__()
- __all__ = [
- # UFuncs
- "equal", "not_equal", "less", "less_equal", "greater", "greater_equal",
- "add", "multiply", "isalpha", "isdigit", "isspace", "isalnum", "islower",
- "isupper", "istitle", "isdecimal", "isnumeric", "str_len", "find",
- "rfind", "index", "rindex", "count", "startswith", "endswith", "lstrip",
- "rstrip", "strip", "replace", "expandtabs", "center", "ljust", "rjust",
- "zfill", "partition", "rpartition", "slice",
- # _vec_string - Will gradually become ufuncs as well
- "upper", "lower", "swapcase", "capitalize", "title",
- # _vec_string - Will probably not become ufuncs
- "mod", "decode", "encode", "translate",
- # Removed from namespace until behavior has been crystallized
- # "join", "split", "rsplit", "splitlines",
- ]
- MAX = np.iinfo(np.int64).max
- array_function_dispatch = functools.partial(
- array_function_dispatch, module='numpy.strings')
- def _get_num_chars(a):
- """
- Helper function that returns the number of characters per field in
- a string or unicode array. This is to abstract out the fact that
- for a unicode array this is itemsize / 4.
- """
- if issubclass(a.dtype.type, np.str_):
- return a.itemsize // 4
- return a.itemsize
- def _to_bytes_or_str_array(result, output_dtype_like):
- """
- Helper function to cast a result back into an array
- with the appropriate dtype if an object array must be used
- as an intermediary.
- """
- output_dtype_like = np.asarray(output_dtype_like)
- if result.size == 0:
- # Calling asarray & tolist in an empty array would result
- # in losing shape information
- return result.astype(output_dtype_like.dtype)
- ret = np.asarray(result.tolist())
- if isinstance(output_dtype_like.dtype, np.dtypes.StringDType):
- return ret.astype(type(output_dtype_like.dtype))
- return ret.astype(type(output_dtype_like.dtype)(_get_num_chars(ret)))
- def _clean_args(*args):
- """
- Helper function for delegating arguments to Python string
- functions.
- Many of the Python string operations that have optional arguments
- do not use 'None' to indicate a default value. In these cases,
- we need to remove all None arguments, and those following them.
- """
- newargs = []
- for chk in args:
- if chk is None:
- break
- newargs.append(chk)
- return newargs
- def _multiply_dispatcher(a, i):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_multiply_dispatcher)
- def multiply(a, i):
- """
- Return (a * i), that is string multiple concatenation,
- element-wise.
- Values in ``i`` of less than 0 are treated as 0 (which yields an
- empty string).
- Parameters
- ----------
- a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype
- i : array_like, with any integer dtype
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["a", "b", "c"])
- >>> np.strings.multiply(a, 3)
- array(['aaa', 'bbb', 'ccc'], dtype='<U3')
- >>> i = np.array([1, 2, 3])
- >>> np.strings.multiply(a, i)
- array(['a', 'bb', 'ccc'], dtype='<U3')
- >>> np.strings.multiply(np.array(['a']), i)
- array(['a', 'aa', 'aaa'], dtype='<U3')
- >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
- >>> np.strings.multiply(a, 3)
- array([['aaa', 'bbb', 'ccc'],
- ['ddd', 'eee', 'fff']], dtype='<U3')
- >>> np.strings.multiply(a, i)
- array([['a', 'bb', 'ccc'],
- ['d', 'ee', 'fff']], dtype='<U3')
- """
- a = np.asanyarray(a)
- i = np.asanyarray(i)
- if not np.issubdtype(i.dtype, np.integer):
- raise TypeError(f"unsupported type {i.dtype} for operand 'i'")
- i = np.maximum(i, 0)
- # delegate to stringdtype loops that also do overflow checking
- if a.dtype.char == "T":
- return a * i
- a_len = str_len(a)
- # Ensure we can do a_len * i without overflow.
- if np.any(a_len > sys.maxsize / np.maximum(i, 1)):
- raise OverflowError("Overflow encountered in string multiply")
- buffersizes = a_len * i
- out_dtype = f"{a.dtype.char}{buffersizes.max()}"
- out = np.empty_like(a, shape=buffersizes.shape, dtype=out_dtype)
- return _multiply_ufunc(a, i, out=out)
- def _mod_dispatcher(a, values):
- return (a, values)
- @set_module("numpy.strings")
- @array_function_dispatch(_mod_dispatcher)
- def mod(a, values):
- """
- Return (a % i), that is pre-Python 2.6 string formatting
- (interpolation), element-wise for a pair of array_likes of str
- or unicode.
- Parameters
- ----------
- a : array_like, with `np.bytes_` or `np.str_` dtype
- values : array_like of values
- These values will be element-wise interpolated into the string.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["NumPy is a %s library"])
- >>> np.strings.mod(a, values=["Python"])
- array(['NumPy is a Python library'], dtype='<U25')
- >>> a = np.array([b'%d bytes', b'%d bits'])
- >>> values = np.array([8, 64])
- >>> np.strings.mod(a, values)
- array([b'8 bytes', b'64 bits'], dtype='|S7')
- """
- return _to_bytes_or_str_array(
- _vec_string(a, np.object_, '__mod__', (values,)), a)
- @set_module("numpy.strings")
- def find(a, sub, start=0, end=None):
- """
- For each element, return the lowest index in the string where
- substring ``sub`` is found, such that ``sub`` is contained in the
- range [``start``, ``end``).
- Parameters
- ----------
- a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype
- sub : array_like, with `np.bytes_` or `np.str_` dtype
- The substring to search for.
- start, end : array_like, with any integer dtype
- The range to look in, interpreted as in slice notation.
- Returns
- -------
- y : ndarray
- Output array of ints
- See Also
- --------
- str.find
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["NumPy is a Python library"])
- >>> np.strings.find(a, "Python")
- array([11])
- """
- end = end if end is not None else MAX
- return _find_ufunc(a, sub, start, end)
- @set_module("numpy.strings")
- def rfind(a, sub, start=0, end=None):
- """
- For each element, return the highest index in the string where
- substring ``sub`` is found, such that ``sub`` is contained in the
- range [``start``, ``end``).
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- The substring to search for.
- start, end : array_like, with any integer dtype
- The range to look in, interpreted as in slice notation.
- Returns
- -------
- y : ndarray
- Output array of ints
- See Also
- --------
- str.rfind
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["Computer Science"])
- >>> np.strings.rfind(a, "Science", start=0, end=None)
- array([9])
- >>> np.strings.rfind(a, "Science", start=0, end=8)
- array([-1])
- >>> b = np.array(["Computer Science", "Science"])
- >>> np.strings.rfind(b, "Science", start=0, end=None)
- array([9, 0])
- """
- end = end if end is not None else MAX
- return _rfind_ufunc(a, sub, start, end)
- @set_module("numpy.strings")
- def index(a, sub, start=0, end=None):
- """
- Like `find`, but raises :exc:`ValueError` when the substring is not found.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- start, end : array_like, with any integer dtype, optional
- Returns
- -------
- out : ndarray
- Output array of ints.
- See Also
- --------
- find, str.index
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["Computer Science"])
- >>> np.strings.index(a, "Science", start=0, end=None)
- array([9])
- """
- end = end if end is not None else MAX
- return _index_ufunc(a, sub, start, end)
- @set_module("numpy.strings")
- def rindex(a, sub, start=0, end=None):
- """
- Like `rfind`, but raises :exc:`ValueError` when the substring `sub` is
- not found.
- Parameters
- ----------
- a : array-like, with `np.bytes_` or `np.str_` dtype
- sub : array-like, with `np.bytes_` or `np.str_` dtype
- start, end : array-like, with any integer dtype, optional
- Returns
- -------
- out : ndarray
- Output array of ints.
- See Also
- --------
- rfind, str.rindex
- Examples
- --------
- >>> a = np.array(["Computer Science"])
- >>> np.strings.rindex(a, "Science", start=0, end=None)
- array([9])
- """
- end = end if end is not None else MAX
- return _rindex_ufunc(a, sub, start, end)
- @set_module("numpy.strings")
- def count(a, sub, start=0, end=None):
- """
- Returns an array with the number of non-overlapping occurrences of
- substring ``sub`` in the range [``start``, ``end``).
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- The substring to search for.
- start, end : array_like, with any integer dtype
- The range to look in, interpreted as in slice notation.
- Returns
- -------
- y : ndarray
- Output array of ints
- See Also
- --------
- str.count
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> np.strings.count(c, 'A')
- array([3, 1, 1])
- >>> np.strings.count(c, 'aA')
- array([3, 1, 0])
- >>> np.strings.count(c, 'A', start=1, end=4)
- array([2, 1, 1])
- >>> np.strings.count(c, 'A', start=1, end=3)
- array([1, 0, 0])
- """
- end = end if end is not None else MAX
- return _count_ufunc(a, sub, start, end)
- @set_module("numpy.strings")
- def startswith(a, prefix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in ``a`` starts with ``prefix``, otherwise `False`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- prefix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- start, end : array_like, with any integer dtype
- With ``start``, test beginning at that position. With ``end``,
- stop comparing at that position.
- Returns
- -------
- out : ndarray
- Output array of bools
- See Also
- --------
- str.startswith
- Examples
- --------
- >>> import numpy as np
- >>> s = np.array(['foo', 'bar'])
- >>> s
- array(['foo', 'bar'], dtype='<U3')
- >>> np.strings.startswith(s, 'fo')
- array([True, False])
- >>> np.strings.startswith(s, 'o', start=1, end=2)
- array([True, False])
- """
- end = end if end is not None else MAX
- return _startswith_ufunc(a, prefix, start, end)
- @set_module("numpy.strings")
- def endswith(a, suffix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in ``a`` ends with ``suffix``, otherwise `False`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- suffix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- start, end : array_like, with any integer dtype
- With ``start``, test beginning at that position. With ``end``,
- stop comparing at that position.
- Returns
- -------
- out : ndarray
- Output array of bools
- See Also
- --------
- str.endswith
- Examples
- --------
- >>> import numpy as np
- >>> s = np.array(['foo', 'bar'])
- >>> s
- array(['foo', 'bar'], dtype='<U3')
- >>> np.strings.endswith(s, 'ar')
- array([False, True])
- >>> np.strings.endswith(s, 'a', start=1, end=2)
- array([False, True])
- """
- end = end if end is not None else MAX
- return _endswith_ufunc(a, suffix, start, end)
- def _code_dispatcher(a, encoding=None, errors=None):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_code_dispatcher)
- def decode(a, encoding=None, errors=None):
- r"""
- Calls :meth:`bytes.decode` element-wise.
- The set of available codecs comes from the Python standard library,
- and may be extended at runtime. For more information, see the
- :mod:`codecs` module.
- Parameters
- ----------
- a : array_like, with ``bytes_`` dtype
- encoding : str, optional
- The name of an encoding
- errors : str, optional
- Specifies how to handle encoding errors
- Returns
- -------
- out : ndarray
- See Also
- --------
- :py:meth:`bytes.decode`
- Notes
- -----
- The type of the result will depend on the encoding specified.
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
- ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
- >>> c
- array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
- b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
- >>> np.strings.decode(c, encoding='cp037')
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- """
- return _to_bytes_or_str_array(
- _vec_string(a, np.object_, 'decode', _clean_args(encoding, errors)),
- np.str_(''))
- @set_module("numpy.strings")
- @array_function_dispatch(_code_dispatcher)
- def encode(a, encoding=None, errors=None):
- """
- Calls :meth:`str.encode` element-wise.
- The set of available codecs comes from the Python standard library,
- and may be extended at runtime. For more information, see the
- :mod:`codecs` module.
- Parameters
- ----------
- a : array_like, with ``StringDType`` or ``str_`` dtype
- encoding : str, optional
- The name of an encoding
- errors : str, optional
- Specifies how to handle encoding errors
- Returns
- -------
- out : ndarray
- See Also
- --------
- str.encode
- Notes
- -----
- The type of the result will depend on the encoding specified.
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> np.strings.encode(a, encoding='cp037')
- array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
- b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
- """
- return _to_bytes_or_str_array(
- _vec_string(a, np.object_, 'encode', _clean_args(encoding, errors)),
- np.bytes_(b''))
- def _expandtabs_dispatcher(a, tabsize=None):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_expandtabs_dispatcher)
- def expandtabs(a, tabsize=8):
- """
- Return a copy of each string element where all tab characters are
- replaced by one or more spaces.
- Calls :meth:`str.expandtabs` element-wise.
- Return a copy of each string element where all tab characters are
- replaced by one or more spaces, depending on the current column
- and the given `tabsize`. The column number is reset to zero after
- each newline occurring in the string. This doesn't understand other
- non-printing characters or escape sequences.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array
- tabsize : int, optional
- Replace tabs with `tabsize` number of spaces. If not given defaults
- to 8 spaces.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input type
- See Also
- --------
- str.expandtabs
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['\t\tHello\tworld'])
- >>> np.strings.expandtabs(a, tabsize=4) # doctest: +SKIP
- array([' Hello world'], dtype='<U21') # doctest: +SKIP
- """
- a = np.asanyarray(a)
- tabsize = np.asanyarray(tabsize)
- if a.dtype.char == "T":
- return _expandtabs(a, tabsize)
- buffersizes = _expandtabs_length(a, tabsize)
- out_dtype = f"{a.dtype.char}{buffersizes.max()}"
- out = np.empty_like(a, shape=buffersizes.shape, dtype=out_dtype)
- return _expandtabs(a, tabsize, out=out)
- def _just_dispatcher(a, width, fillchar=None):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_just_dispatcher)
- def center(a, width, fillchar=' '):
- """
- Return a copy of `a` with its elements centered in a string of
- length `width`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- width : array_like, with any integer dtype
- The length of the resulting strings, unless ``width < str_len(a)``.
- fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Optional padding character to use (default is space).
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.center
- Notes
- -----
- While it is possible for ``a`` and ``fillchar`` to have different dtypes,
- passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
- is not allowed, and a ``ValueError`` is raised.
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
- array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
- >>> np.strings.center(c, width=9)
- array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
- >>> np.strings.center(c, width=9, fillchar='*')
- array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
- >>> np.strings.center(c, width=1)
- array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
- """
- width = np.asanyarray(width)
- if not np.issubdtype(width.dtype, np.integer):
- raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
- a = np.asanyarray(a)
- fillchar = np.asanyarray(fillchar)
- if np.any(str_len(fillchar) != 1):
- raise TypeError(
- "The fill character must be exactly one character long")
- if np.result_type(a, fillchar).char == "T":
- return _center(a, width, fillchar)
- fillchar = fillchar.astype(a.dtype, copy=False)
- width = np.maximum(str_len(a), width)
- out_dtype = f"{a.dtype.char}{width.max()}"
- shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _center(a, width, fillchar, out=out)
- @set_module("numpy.strings")
- @array_function_dispatch(_just_dispatcher)
- def ljust(a, width, fillchar=' '):
- """
- Return an array with the elements of `a` left-justified in a
- string of length `width`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- width : array_like, with any integer dtype
- The length of the resulting strings, unless ``width < str_len(a)``.
- fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Optional character to use for padding (default is space).
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.ljust
- Notes
- -----
- While it is possible for ``a`` and ``fillchar`` to have different dtypes,
- passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
- is not allowed, and a ``ValueError`` is raised.
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> np.strings.ljust(c, width=3)
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> np.strings.ljust(c, width=9)
- array(['aAaAaA ', ' aA ', 'abBABba '], dtype='<U9')
- """
- width = np.asanyarray(width)
- if not np.issubdtype(width.dtype, np.integer):
- raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
- a = np.asanyarray(a)
- fillchar = np.asanyarray(fillchar)
- if np.any(str_len(fillchar) != 1):
- raise TypeError(
- "The fill character must be exactly one character long")
- if np.result_type(a, fillchar).char == "T":
- return _ljust(a, width, fillchar)
- fillchar = fillchar.astype(a.dtype, copy=False)
- width = np.maximum(str_len(a), width)
- shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
- out_dtype = f"{a.dtype.char}{width.max()}"
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _ljust(a, width, fillchar, out=out)
- @set_module("numpy.strings")
- @array_function_dispatch(_just_dispatcher)
- def rjust(a, width, fillchar=' '):
- """
- Return an array with the elements of `a` right-justified in a
- string of length `width`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- width : array_like, with any integer dtype
- The length of the resulting strings, unless ``width < str_len(a)``.
- fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Optional padding character to use (default is space).
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.rjust
- Notes
- -----
- While it is possible for ``a`` and ``fillchar`` to have different dtypes,
- passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
- is not allowed, and a ``ValueError`` is raised.
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> np.strings.rjust(a, width=3)
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> np.strings.rjust(a, width=9)
- array([' aAaAaA', ' aA ', ' abBABba'], dtype='<U9')
- """
- width = np.asanyarray(width)
- if not np.issubdtype(width.dtype, np.integer):
- raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
- a = np.asanyarray(a)
- fillchar = np.asanyarray(fillchar)
- if np.any(str_len(fillchar) != 1):
- raise TypeError(
- "The fill character must be exactly one character long")
- if np.result_type(a, fillchar).char == "T":
- return _rjust(a, width, fillchar)
- fillchar = fillchar.astype(a.dtype, copy=False)
- width = np.maximum(str_len(a), width)
- shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
- out_dtype = f"{a.dtype.char}{width.max()}"
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _rjust(a, width, fillchar, out=out)
- def _zfill_dispatcher(a, width):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_zfill_dispatcher)
- def zfill(a, width):
- """
- Return the numeric string left-filled with zeros. A leading
- sign prefix (``+``/``-``) is handled by inserting the padding
- after the sign character rather than before.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- width : array_like, with any integer dtype
- Width of string to left-fill elements in `a`.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input type
- See Also
- --------
- str.zfill
- Examples
- --------
- >>> import numpy as np
- >>> np.strings.zfill(['1', '-1', '+1'], 3)
- array(['001', '-01', '+01'], dtype='<U3')
- """
- width = np.asanyarray(width)
- if not np.issubdtype(width.dtype, np.integer):
- raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
- a = np.asanyarray(a)
- if a.dtype.char == "T":
- return _zfill(a, width)
- width = np.maximum(str_len(a), width)
- shape = np.broadcast_shapes(a.shape, width.shape)
- out_dtype = f"{a.dtype.char}{width.max()}"
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _zfill(a, width, out=out)
- @set_module("numpy.strings")
- def lstrip(a, chars=None):
- """
- For each element in `a`, return a copy with the leading characters
- removed.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- chars : scalar with the same dtype as ``a``, optional
- The ``chars`` argument is a string specifying the set of
- characters to be removed. If ``None``, the ``chars``
- argument defaults to removing whitespace. The ``chars`` argument
- is not a prefix or suffix; rather, all combinations of its
- values are stripped.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.lstrip
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- # The 'a' variable is unstripped from c[1] because of leading whitespace.
- >>> np.strings.lstrip(c, 'a')
- array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
- >>> np.strings.lstrip(c, 'A') # leaves c unchanged
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c, '')).all()
- np.False_
- >>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c)).all()
- np.True_
- """
- if chars is None:
- return _lstrip_whitespace(a)
- return _lstrip_chars(a, chars)
- @set_module("numpy.strings")
- def rstrip(a, chars=None):
- """
- For each element in `a`, return a copy with the trailing characters
- removed.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- chars : scalar with the same dtype as ``a``, optional
- The ``chars`` argument is a string specifying the set of
- characters to be removed. If ``None``, the ``chars``
- argument defaults to removing whitespace. The ``chars`` argument
- is not a prefix or suffix; rather, all combinations of its
- values are stripped.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.rstrip
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['aAaAaA', 'abBABba'])
- >>> c
- array(['aAaAaA', 'abBABba'], dtype='<U7')
- >>> np.strings.rstrip(c, 'a')
- array(['aAaAaA', 'abBABb'], dtype='<U7')
- >>> np.strings.rstrip(c, 'A')
- array(['aAaAa', 'abBABba'], dtype='<U7')
- """
- if chars is None:
- return _rstrip_whitespace(a)
- return _rstrip_chars(a, chars)
- @set_module("numpy.strings")
- def strip(a, chars=None):
- """
- For each element in `a`, return a copy with the leading and
- trailing characters removed.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- chars : scalar with the same dtype as ``a``, optional
- The ``chars`` argument is a string specifying the set of
- characters to be removed. If ``None``, the ``chars``
- argument defaults to removing whitespace. The ``chars`` argument
- is not a prefix or suffix; rather, all combinations of its
- values are stripped.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.strip
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> np.strings.strip(c)
- array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
- # 'a' unstripped from c[1] because of leading whitespace.
- >>> np.strings.strip(c, 'a')
- array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
- # 'A' unstripped from c[1] because of trailing whitespace.
- >>> np.strings.strip(c, 'A')
- array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
- """
- if chars is None:
- return _strip_whitespace(a)
- return _strip_chars(a, chars)
- def _unary_op_dispatcher(a):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_unary_op_dispatcher)
- def upper(a):
- """
- Return an array with the elements converted to uppercase.
- Calls :meth:`str.upper` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.upper
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['a1b c', '1bca', 'bca1']); c
- array(['a1b c', '1bca', 'bca1'], dtype='<U5')
- >>> np.strings.upper(c)
- array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
- """
- a_arr = np.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'upper')
- @set_module("numpy.strings")
- @array_function_dispatch(_unary_op_dispatcher)
- def lower(a):
- """
- Return an array with the elements converted to lowercase.
- Call :meth:`str.lower` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.lower
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
- array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
- >>> np.strings.lower(c)
- array(['a1b c', '1bca', 'bca1'], dtype='<U5')
- """
- a_arr = np.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'lower')
- @set_module("numpy.strings")
- @array_function_dispatch(_unary_op_dispatcher)
- def swapcase(a):
- """
- Return element-wise a copy of the string with
- uppercase characters converted to lowercase and vice versa.
- Calls :meth:`str.swapcase` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.swapcase
- Examples
- --------
- >>> import numpy as np
- >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
- array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
- dtype='|S5')
- >>> np.strings.swapcase(c)
- array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
- dtype='|S5')
- """
- a_arr = np.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'swapcase')
- @set_module("numpy.strings")
- @array_function_dispatch(_unary_op_dispatcher)
- def capitalize(a):
- """
- Return a copy of ``a`` with only the first character of each element
- capitalized.
- Calls :meth:`str.capitalize` element-wise.
- For byte strings, this method is locale-dependent.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array of strings to capitalize.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.capitalize
- Examples
- --------
- >>> import numpy as np
- >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
- array(['a1b2', '1b2a', 'b2a1', '2a1b'],
- dtype='|S4')
- >>> np.strings.capitalize(c)
- array(['A1b2', '1b2a', 'B2a1', '2a1b'],
- dtype='|S4')
- """
- a_arr = np.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'capitalize')
- @set_module("numpy.strings")
- @array_function_dispatch(_unary_op_dispatcher)
- def title(a):
- """
- Return element-wise title cased version of string or unicode.
- Title case words start with uppercase characters, all remaining cased
- characters are lowercase.
- Calls :meth:`str.title` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.title
- Examples
- --------
- >>> import numpy as np
- >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
- array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
- dtype='|S5')
- >>> np.strings.title(c)
- array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
- dtype='|S5')
- """
- a_arr = np.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'title')
- def _replace_dispatcher(a, old, new, count=None):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_replace_dispatcher)
- def replace(a, old, new, count=-1):
- """
- For each element in ``a``, return a copy of the string with
- occurrences of substring ``old`` replaced by ``new``.
- Parameters
- ----------
- a : array_like, with ``bytes_`` or ``str_`` dtype
- old, new : array_like, with ``bytes_`` or ``str_`` dtype
- count : array_like, with ``int_`` dtype
- If the optional argument ``count`` is given, only the first
- ``count`` occurrences are replaced.
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.replace
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
- >>> np.strings.replace(a, 'mango', 'banana')
- array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
- >>> a = np.array(["The dish is fresh", "This is it"])
- >>> np.strings.replace(a, 'is', 'was')
- array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
- """
- count = np.asanyarray(count)
- if not np.issubdtype(count.dtype, np.integer):
- raise TypeError(f"unsupported type {count.dtype} for operand 'count'")
- arr = np.asanyarray(a)
- old_dtype = getattr(old, 'dtype', None)
- old = np.asanyarray(old)
- new_dtype = getattr(new, 'dtype', None)
- new = np.asanyarray(new)
- if np.result_type(arr, old, new).char == "T":
- return _replace(arr, old, new, count)
- a_dt = arr.dtype
- old = old.astype(old_dtype or a_dt, copy=False)
- new = new.astype(new_dtype or a_dt, copy=False)
- max_int64 = np.iinfo(np.int64).max
- counts = _count_ufunc(arr, old, 0, max_int64)
- counts = np.where(count < 0, counts, np.minimum(counts, count))
- buffersizes = str_len(arr) + counts * (str_len(new) - str_len(old))
- out_dtype = f"{arr.dtype.char}{buffersizes.max()}"
- out = np.empty_like(arr, shape=buffersizes.shape, dtype=out_dtype)
- return _replace(arr, old, new, counts, out=out)
- def _join_dispatcher(sep, seq):
- return (sep, seq)
- @array_function_dispatch(_join_dispatcher)
- def _join(sep, seq):
- """
- Return a string which is the concatenation of the strings in the
- sequence `seq`.
- Calls :meth:`str.join` element-wise.
- Parameters
- ----------
- sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- seq : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input types
- See Also
- --------
- str.join
- Examples
- --------
- >>> import numpy as np
- >>> np.strings.join('-', 'osd') # doctest: +SKIP
- array('o-s-d', dtype='<U5') # doctest: +SKIP
- >>> np.strings.join(['-', '.'], ['ghc', 'osd']) # doctest: +SKIP
- array(['g-h-c', 'o.s.d'], dtype='<U5') # doctest: +SKIP
- """
- return _to_bytes_or_str_array(
- _vec_string(sep, np.object_, 'join', (seq,)), seq)
- def _split_dispatcher(a, sep=None, maxsplit=None):
- return (a,)
- @array_function_dispatch(_split_dispatcher)
- def _split(a, sep=None, maxsplit=None):
- """
- For each element in `a`, return a list of the words in the
- string, using `sep` as the delimiter string.
- Calls :meth:`str.split` element-wise.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- sep : str or unicode, optional
- If `sep` is not specified or None, any whitespace string is a
- separator.
- maxsplit : int, optional
- If `maxsplit` is given, at most `maxsplit` splits are done.
- Returns
- -------
- out : ndarray
- Array of list objects
- Examples
- --------
- >>> import numpy as np
- >>> x = np.array("Numpy is nice!")
- >>> np.strings.split(x, " ") # doctest: +SKIP
- array(list(['Numpy', 'is', 'nice!']), dtype=object) # doctest: +SKIP
- >>> np.strings.split(x, " ", 1) # doctest: +SKIP
- array(list(['Numpy', 'is nice!']), dtype=object) # doctest: +SKIP
- See Also
- --------
- str.split, rsplit
- """
- # This will return an array of lists of different sizes, so we
- # leave it as an object array
- return _vec_string(
- a, np.object_, 'split', [sep] + _clean_args(maxsplit))
- @array_function_dispatch(_split_dispatcher)
- def _rsplit(a, sep=None, maxsplit=None):
- """
- For each element in `a`, return a list of the words in the
- string, using `sep` as the delimiter string.
- Calls :meth:`str.rsplit` element-wise.
- Except for splitting from the right, `rsplit`
- behaves like `split`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- sep : str or unicode, optional
- If `sep` is not specified or None, any whitespace string
- is a separator.
- maxsplit : int, optional
- If `maxsplit` is given, at most `maxsplit` splits are done,
- the rightmost ones.
- Returns
- -------
- out : ndarray
- Array of list objects
- See Also
- --------
- str.rsplit, split
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['aAaAaA', 'abBABba'])
- >>> np.strings.rsplit(a, 'A') # doctest: +SKIP
- array([list(['a', 'a', 'a', '']), # doctest: +SKIP
- list(['abB', 'Bba'])], dtype=object) # doctest: +SKIP
- """
- # This will return an array of lists of different sizes, so we
- # leave it as an object array
- return _vec_string(
- a, np.object_, 'rsplit', [sep] + _clean_args(maxsplit))
- def _splitlines_dispatcher(a, keepends=None):
- return (a,)
- @array_function_dispatch(_splitlines_dispatcher)
- def _splitlines(a, keepends=None):
- """
- For each element in `a`, return a list of the lines in the
- element, breaking at line boundaries.
- Calls :meth:`str.splitlines` element-wise.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- keepends : bool, optional
- Line breaks are not included in the resulting list unless
- keepends is given and true.
- Returns
- -------
- out : ndarray
- Array of list objects
- See Also
- --------
- str.splitlines
- Examples
- --------
- >>> np.char.splitlines("first line\\nsecond line")
- array(list(['first line', 'second line']), dtype=object)
- >>> a = np.array(["first\\nsecond", "third\\nfourth"])
- >>> np.char.splitlines(a)
- array([list(['first', 'second']), list(['third', 'fourth'])], dtype=object)
- """
- return _vec_string(
- a, np.object_, 'splitlines', _clean_args(keepends))
- def _partition_dispatcher(a, sep):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_partition_dispatcher)
- def partition(a, sep):
- """
- Partition each element in ``a`` around ``sep``.
- For each element in ``a``, split the element at the first
- occurrence of ``sep``, and return a 3-tuple containing the part
- before the separator, the separator itself, and the part after
- the separator. If the separator is not found, the first item of
- the tuple will contain the whole string, and the second and third
- ones will be the empty string.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array
- sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Separator to split each string element in ``a``.
- Returns
- -------
- out : 3-tuple:
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- part before the separator
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- separator
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- part after the separator
- See Also
- --------
- str.partition
- Examples
- --------
- >>> import numpy as np
- >>> x = np.array(["Numpy is nice!"])
- >>> np.strings.partition(x, " ")
- (array(['Numpy'], dtype='<U5'),
- array([' '], dtype='<U1'),
- array(['is nice!'], dtype='<U8'))
- """
- a = np.asanyarray(a)
- sep = np.asanyarray(sep)
- if np.result_type(a, sep).char == "T":
- return _partition(a, sep)
- sep = sep.astype(a.dtype, copy=False)
- pos = _find_ufunc(a, sep, 0, MAX)
- a_len = str_len(a)
- sep_len = str_len(sep)
- not_found = pos < 0
- buffersizes1 = np.where(not_found, a_len, pos)
- buffersizes3 = np.where(not_found, 0, a_len - pos - sep_len)
- out_dtype = ",".join([f"{a.dtype.char}{n}" for n in (
- buffersizes1.max(),
- 1 if np.all(not_found) else sep_len.max(),
- buffersizes3.max(),
- )])
- shape = np.broadcast_shapes(a.shape, sep.shape)
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _partition_index(a, sep, pos, out=(out["f0"], out["f1"], out["f2"]))
- @set_module("numpy.strings")
- @array_function_dispatch(_partition_dispatcher)
- def rpartition(a, sep):
- """
- Partition (split) each element around the right-most separator.
- For each element in ``a``, split the element at the last
- occurrence of ``sep``, and return a 3-tuple containing the part
- before the separator, the separator itself, and the part after
- the separator. If the separator is not found, the third item of
- the tuple will contain the whole string, and the first and second
- ones will be the empty string.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array
- sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Separator to split each string element in ``a``.
- Returns
- -------
- out : 3-tuple:
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- part before the separator
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- separator
- - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
- part after the separator
- See Also
- --------
- str.rpartition
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> np.strings.rpartition(a, 'A')
- (array(['aAaAa', ' a', 'abB'], dtype='<U5'),
- array(['A', 'A', 'A'], dtype='<U1'),
- array(['', ' ', 'Bba'], dtype='<U3'))
- """
- a = np.asanyarray(a)
- sep = np.asanyarray(sep)
- if np.result_type(a, sep).char == "T":
- return _rpartition(a, sep)
- sep = sep.astype(a.dtype, copy=False)
- pos = _rfind_ufunc(a, sep, 0, MAX)
- a_len = str_len(a)
- sep_len = str_len(sep)
- not_found = pos < 0
- buffersizes1 = np.where(not_found, 0, pos)
- buffersizes3 = np.where(not_found, a_len, a_len - pos - sep_len)
- out_dtype = ",".join([f"{a.dtype.char}{n}" for n in (
- buffersizes1.max(),
- 1 if np.all(not_found) else sep_len.max(),
- buffersizes3.max(),
- )])
- shape = np.broadcast_shapes(a.shape, sep.shape)
- out = np.empty_like(a, shape=shape, dtype=out_dtype)
- return _rpartition_index(
- a, sep, pos, out=(out["f0"], out["f1"], out["f2"]))
- def _translate_dispatcher(a, table, deletechars=None):
- return (a,)
- @set_module("numpy.strings")
- @array_function_dispatch(_translate_dispatcher)
- def translate(a, table, deletechars=None):
- """
- For each element in `a`, return a copy of the string where all
- characters occurring in the optional argument `deletechars` are
- removed, and the remaining characters have been mapped through the
- given translation table.
- Calls :meth:`str.translate` element-wise.
- Parameters
- ----------
- a : array-like, with `np.bytes_` or `np.str_` dtype
- table : str of length 256
- deletechars : str
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See Also
- --------
- str.translate
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['a1b c', '1bca', 'bca1'])
- >>> table = a[0].maketrans('abc', '123')
- >>> deletechars = ' '
- >>> np.char.translate(a, table, deletechars)
- array(['112 3', '1231', '2311'], dtype='<U5')
- """
- a_arr = np.asarray(a)
- if issubclass(a_arr.dtype.type, np.str_):
- return _vec_string(
- a_arr, a_arr.dtype, 'translate', (table,))
- else:
- return _vec_string(
- a_arr,
- a_arr.dtype,
- 'translate',
- [table] + _clean_args(deletechars)
- )
- @set_module("numpy.strings")
- def slice(a, start=None, stop=np._NoValue, step=None, /):
- """
- Slice the strings in `a` by slices specified by `start`, `stop`, `step`.
- Like in the regular Python `slice` object, if only `start` is
- specified then it is interpreted as the `stop`.
- Parameters
- ----------
- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
- Input array
- start : None, an integer or an array of integers
- The start of the slice, broadcasted to `a`'s shape
- stop : None, an integer or an array of integers
- The end of the slice, broadcasted to `a`'s shape
- step : None, an integer or an array of integers
- The step for the slice, broadcasted to `a`'s shape
- Returns
- -------
- out : ndarray
- Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
- depending on input type
- Examples
- --------
- >>> import numpy as np
- >>> a = np.array(['hello', 'world'])
- >>> np.strings.slice(a, 2)
- array(['he', 'wo'], dtype='<U5')
- >>> np.strings.slice(a, 2, None)
- array(['llo', 'rld'], dtype='<U5')
- >>> np.strings.slice(a, 1, 5, 2)
- array(['el', 'ol'], dtype='<U5')
- One can specify different start/stop/step for different array entries:
- >>> np.strings.slice(a, np.array([1, 2]), np.array([4, 5]))
- array(['ell', 'rld'], dtype='<U5')
- Negative slices have the same meaning as in regular Python:
- >>> b = np.array(['hello world', 'γεια σου κόσμε', '你好世界', '👋 🌍'],
- ... dtype=np.dtypes.StringDType())
- >>> np.strings.slice(b, -2)
- array(['hello wor', 'γεια σου κόσ', '你好', '👋'], dtype=StringDType())
- >>> np.strings.slice(b, -2, None)
- array(['ld', 'με', '世界', ' 🌍'], dtype=StringDType())
- >>> np.strings.slice(b, [3, -10, 2, -3], [-1, -2, -1, 3])
- array(['lo worl', ' σου κόσ', '世', '👋 🌍'], dtype=StringDType())
- >>> np.strings.slice(b, None, None, -1)
- array(['dlrow olleh', 'εμσόκ υοσ αιεγ', '界世好你', '🌍 👋'],
- dtype=StringDType())
- """
- # Just like in the construction of a regular slice object, if only start
- # is specified then start will become stop, see logic in slice_new.
- if stop is np._NoValue:
- stop = start
- start = None
- # adjust start, stop, step to be integers, see logic in PySlice_Unpack
- if step is None:
- step = 1
- step = np.asanyarray(step)
- if not np.issubdtype(step.dtype, np.integer):
- raise TypeError(f"unsupported type {step.dtype} for operand 'step'")
- if np.any(step == 0):
- raise ValueError("slice step cannot be zero")
- if start is None:
- start = np.where(step < 0, np.iinfo(np.intp).max, 0)
- if stop is None:
- stop = np.where(step < 0, np.iinfo(np.intp).min, np.iinfo(np.intp).max)
- return _slice(a, start, stop, step)
|