defchararray.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations and methods.
  4. .. note::
  5. The `chararray` class exists for backwards compatibility with
  6. Numarray, it is not recommended for new development. Starting from numpy
  7. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  8. `dtype` `object_`, `bytes_` or `str_`, and use the free functions
  9. in the `numpy.char` module for fast vectorized string operations.
  10. Some methods will only be available if the corresponding string method is
  11. available in your version of Python.
  12. The preferred alias for `defchararray` is `numpy.char`.
  13. """
  14. import functools
  15. import numpy as np
  16. from numpy._core import overrides
  17. from numpy._core.multiarray import compare_chararrays
  18. from numpy._core.strings import (
  19. _join as join,
  20. _rsplit as rsplit,
  21. _split as split,
  22. _splitlines as splitlines,
  23. )
  24. from numpy._utils import set_module
  25. from numpy.strings import *
  26. from numpy.strings import (
  27. multiply as strings_multiply,
  28. partition as strings_partition,
  29. rpartition as strings_rpartition,
  30. )
  31. from .numeric import array as narray, asarray as asnarray, ndarray
  32. from .numerictypes import bytes_, character, str_
  33. __all__ = [
  34. 'equal', 'not_equal', 'greater_equal', 'less_equal',
  35. 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
  36. 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
  37. 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
  38. 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
  39. 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
  40. 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
  41. 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
  42. 'array', 'asarray', 'compare_chararrays', 'chararray'
  43. ]
  44. array_function_dispatch = functools.partial(
  45. overrides.array_function_dispatch, module='numpy.char')
  46. def _binary_op_dispatcher(x1, x2):
  47. return (x1, x2)
  48. @array_function_dispatch(_binary_op_dispatcher)
  49. def equal(x1, x2):
  50. """
  51. Return (x1 == x2) element-wise.
  52. Unlike `numpy.equal`, this comparison is performed by first
  53. stripping whitespace characters from the end of the string. This
  54. behavior is provided for backward-compatibility with numarray.
  55. Parameters
  56. ----------
  57. x1, x2 : array_like of str or unicode
  58. Input arrays of the same shape.
  59. Returns
  60. -------
  61. out : ndarray
  62. Output array of bools.
  63. Examples
  64. --------
  65. >>> import numpy as np
  66. >>> y = "aa "
  67. >>> x = "aa"
  68. >>> np.char.equal(x, y)
  69. array(True)
  70. See Also
  71. --------
  72. not_equal, greater_equal, less_equal, greater, less
  73. """
  74. return compare_chararrays(x1, x2, '==', True)
  75. @array_function_dispatch(_binary_op_dispatcher)
  76. def not_equal(x1, x2):
  77. """
  78. Return (x1 != x2) element-wise.
  79. Unlike `numpy.not_equal`, this comparison is performed by first
  80. stripping whitespace characters from the end of the string. This
  81. behavior is provided for backward-compatibility with numarray.
  82. Parameters
  83. ----------
  84. x1, x2 : array_like of str or unicode
  85. Input arrays of the same shape.
  86. Returns
  87. -------
  88. out : ndarray
  89. Output array of bools.
  90. See Also
  91. --------
  92. equal, greater_equal, less_equal, greater, less
  93. Examples
  94. --------
  95. >>> import numpy as np
  96. >>> x1 = np.array(['a', 'b', 'c'])
  97. >>> np.char.not_equal(x1, 'b')
  98. array([ True, False, True])
  99. """
  100. return compare_chararrays(x1, x2, '!=', True)
  101. @array_function_dispatch(_binary_op_dispatcher)
  102. def greater_equal(x1, x2):
  103. """
  104. Return (x1 >= x2) element-wise.
  105. Unlike `numpy.greater_equal`, this comparison is performed by
  106. first stripping whitespace characters from the end of the string.
  107. This behavior is provided for backward-compatibility with
  108. numarray.
  109. Parameters
  110. ----------
  111. x1, x2 : array_like of str or unicode
  112. Input arrays of the same shape.
  113. Returns
  114. -------
  115. out : ndarray
  116. Output array of bools.
  117. See Also
  118. --------
  119. equal, not_equal, less_equal, greater, less
  120. Examples
  121. --------
  122. >>> import numpy as np
  123. >>> x1 = np.array(['a', 'b', 'c'])
  124. >>> np.char.greater_equal(x1, 'b')
  125. array([False, True, True])
  126. """
  127. return compare_chararrays(x1, x2, '>=', True)
  128. @array_function_dispatch(_binary_op_dispatcher)
  129. def less_equal(x1, x2):
  130. """
  131. Return (x1 <= x2) element-wise.
  132. Unlike `numpy.less_equal`, this comparison is performed by first
  133. stripping whitespace characters from the end of the string. This
  134. behavior is provided for backward-compatibility with numarray.
  135. Parameters
  136. ----------
  137. x1, x2 : array_like of str or unicode
  138. Input arrays of the same shape.
  139. Returns
  140. -------
  141. out : ndarray
  142. Output array of bools.
  143. See Also
  144. --------
  145. equal, not_equal, greater_equal, greater, less
  146. Examples
  147. --------
  148. >>> import numpy as np
  149. >>> x1 = np.array(['a', 'b', 'c'])
  150. >>> np.char.less_equal(x1, 'b')
  151. array([ True, True, False])
  152. """
  153. return compare_chararrays(x1, x2, '<=', True)
  154. @array_function_dispatch(_binary_op_dispatcher)
  155. def greater(x1, x2):
  156. """
  157. Return (x1 > x2) element-wise.
  158. Unlike `numpy.greater`, this comparison is performed by first
  159. stripping whitespace characters from the end of the string. This
  160. behavior is provided for backward-compatibility with numarray.
  161. Parameters
  162. ----------
  163. x1, x2 : array_like of str or unicode
  164. Input arrays of the same shape.
  165. Returns
  166. -------
  167. out : ndarray
  168. Output array of bools.
  169. See Also
  170. --------
  171. equal, not_equal, greater_equal, less_equal, less
  172. Examples
  173. --------
  174. >>> import numpy as np
  175. >>> x1 = np.array(['a', 'b', 'c'])
  176. >>> np.char.greater(x1, 'b')
  177. array([False, False, True])
  178. """
  179. return compare_chararrays(x1, x2, '>', True)
  180. @array_function_dispatch(_binary_op_dispatcher)
  181. def less(x1, x2):
  182. """
  183. Return (x1 < x2) element-wise.
  184. Unlike `numpy.greater`, this comparison is performed by first
  185. stripping whitespace characters from the end of the string. This
  186. behavior is provided for backward-compatibility with numarray.
  187. Parameters
  188. ----------
  189. x1, x2 : array_like of str or unicode
  190. Input arrays of the same shape.
  191. Returns
  192. -------
  193. out : ndarray
  194. Output array of bools.
  195. See Also
  196. --------
  197. equal, not_equal, greater_equal, less_equal, greater
  198. Examples
  199. --------
  200. >>> import numpy as np
  201. >>> x1 = np.array(['a', 'b', 'c'])
  202. >>> np.char.less(x1, 'b')
  203. array([True, False, False])
  204. """
  205. return compare_chararrays(x1, x2, '<', True)
  206. @set_module("numpy.char")
  207. def multiply(a, i):
  208. """
  209. Return (a * i), that is string multiple concatenation,
  210. element-wise.
  211. Values in ``i`` of less than 0 are treated as 0 (which yields an
  212. empty string).
  213. Parameters
  214. ----------
  215. a : array_like, with `np.bytes_` or `np.str_` dtype
  216. i : array_like, with any integer dtype
  217. Returns
  218. -------
  219. out : ndarray
  220. Output array of str or unicode, depending on input types
  221. Notes
  222. -----
  223. This is a thin wrapper around np.strings.multiply that raises
  224. `ValueError` when ``i`` is not an integer. It only
  225. exists for backwards-compatibility.
  226. Examples
  227. --------
  228. >>> import numpy as np
  229. >>> a = np.array(["a", "b", "c"])
  230. >>> np.strings.multiply(a, 3)
  231. array(['aaa', 'bbb', 'ccc'], dtype='<U3')
  232. >>> i = np.array([1, 2, 3])
  233. >>> np.strings.multiply(a, i)
  234. array(['a', 'bb', 'ccc'], dtype='<U3')
  235. >>> np.strings.multiply(np.array(['a']), i)
  236. array(['a', 'aa', 'aaa'], dtype='<U3')
  237. >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
  238. >>> np.strings.multiply(a, 3)
  239. array([['aaa', 'bbb', 'ccc'],
  240. ['ddd', 'eee', 'fff']], dtype='<U3')
  241. >>> np.strings.multiply(a, i)
  242. array([['a', 'bb', 'ccc'],
  243. ['d', 'ee', 'fff']], dtype='<U3')
  244. """
  245. try:
  246. return strings_multiply(a, i)
  247. except TypeError:
  248. raise ValueError("Can only multiply by integers")
  249. @set_module("numpy.char")
  250. def partition(a, sep):
  251. """
  252. Partition each element in `a` around `sep`.
  253. Calls :meth:`str.partition` element-wise.
  254. For each element in `a`, split the element as the first
  255. occurrence of `sep`, and return 3 strings containing the part
  256. before the separator, the separator itself, and the part after
  257. the separator. If the separator is not found, return 3 strings
  258. containing the string itself, followed by two empty strings.
  259. Parameters
  260. ----------
  261. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  262. Input array
  263. sep : {str, unicode}
  264. Separator to split each string element in `a`.
  265. Returns
  266. -------
  267. out : ndarray
  268. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  269. depending on input types. The output array will have an extra
  270. dimension with 3 elements per input element.
  271. Examples
  272. --------
  273. >>> import numpy as np
  274. >>> x = np.array(["Numpy is nice!"])
  275. >>> np.char.partition(x, " ")
  276. array([['Numpy', ' ', 'is nice!']], dtype='<U8')
  277. See Also
  278. --------
  279. str.partition
  280. """
  281. return np.stack(strings_partition(a, sep), axis=-1)
  282. @set_module("numpy.char")
  283. def rpartition(a, sep):
  284. """
  285. Partition (split) each element around the right-most separator.
  286. Calls :meth:`str.rpartition` element-wise.
  287. For each element in `a`, split the element as the last
  288. occurrence of `sep`, and return 3 strings containing the part
  289. before the separator, the separator itself, and the part after
  290. the separator. If the separator is not found, return 3 strings
  291. containing the string itself, followed by two empty strings.
  292. Parameters
  293. ----------
  294. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  295. Input array
  296. sep : str or unicode
  297. Right-most separator to split each element in array.
  298. Returns
  299. -------
  300. out : ndarray
  301. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  302. depending on input types. The output array will have an extra
  303. dimension with 3 elements per input element.
  304. See Also
  305. --------
  306. str.rpartition
  307. Examples
  308. --------
  309. >>> import numpy as np
  310. >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
  311. >>> np.char.rpartition(a, 'A')
  312. array([['aAaAa', 'A', ''],
  313. [' a', 'A', ' '],
  314. ['abB', 'A', 'Bba']], dtype='<U5')
  315. """
  316. return np.stack(strings_rpartition(a, sep), axis=-1)
  317. @set_module("numpy.char")
  318. class chararray(ndarray):
  319. """
  320. chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
  321. strides=None, order=None)
  322. Provides a convenient view on arrays of string and unicode values.
  323. .. note::
  324. The `chararray` class exists for backwards compatibility with
  325. Numarray, it is not recommended for new development. Starting from numpy
  326. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  327. `dtype` `~numpy.object_`, `~numpy.bytes_` or `~numpy.str_`, and use
  328. the free functions in the `numpy.char` module for fast vectorized
  329. string operations.
  330. Versus a NumPy array of dtype `~numpy.bytes_` or `~numpy.str_`, this
  331. class adds the following functionality:
  332. 1) values automatically have whitespace removed from the end
  333. when indexed
  334. 2) comparison operators automatically remove whitespace from the
  335. end when comparing values
  336. 3) vectorized string operations are provided as methods
  337. (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
  338. chararrays should be created using `numpy.char.array` or
  339. `numpy.char.asarray`, rather than this constructor directly.
  340. This constructor creates the array, using `buffer` (with `offset`
  341. and `strides`) if it is not ``None``. If `buffer` is ``None``, then
  342. constructs a new array with `strides` in "C order", unless both
  343. ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
  344. is in "Fortran order".
  345. Methods
  346. -------
  347. astype
  348. argsort
  349. copy
  350. count
  351. decode
  352. dump
  353. dumps
  354. encode
  355. endswith
  356. expandtabs
  357. fill
  358. find
  359. flatten
  360. getfield
  361. index
  362. isalnum
  363. isalpha
  364. isdecimal
  365. isdigit
  366. islower
  367. isnumeric
  368. isspace
  369. istitle
  370. isupper
  371. item
  372. join
  373. ljust
  374. lower
  375. lstrip
  376. nonzero
  377. put
  378. ravel
  379. repeat
  380. replace
  381. reshape
  382. resize
  383. rfind
  384. rindex
  385. rjust
  386. rsplit
  387. rstrip
  388. searchsorted
  389. setfield
  390. setflags
  391. sort
  392. split
  393. splitlines
  394. squeeze
  395. startswith
  396. strip
  397. swapaxes
  398. swapcase
  399. take
  400. title
  401. tofile
  402. tolist
  403. translate
  404. transpose
  405. upper
  406. view
  407. zfill
  408. Parameters
  409. ----------
  410. shape : tuple
  411. Shape of the array.
  412. itemsize : int, optional
  413. Length of each array element, in number of characters. Default is 1.
  414. unicode : bool, optional
  415. Are the array elements of type unicode (True) or string (False).
  416. Default is False.
  417. buffer : object exposing the buffer interface or str, optional
  418. Memory address of the start of the array data. Default is None,
  419. in which case a new array is created.
  420. offset : int, optional
  421. Fixed stride displacement from the beginning of an axis?
  422. Default is 0. Needs to be >=0.
  423. strides : array_like of ints, optional
  424. Strides for the array (see `~numpy.ndarray.strides` for
  425. full description). Default is None.
  426. order : {'C', 'F'}, optional
  427. The order in which the array data is stored in memory: 'C' ->
  428. "row major" order (the default), 'F' -> "column major"
  429. (Fortran) order.
  430. Examples
  431. --------
  432. >>> import numpy as np
  433. >>> charar = np.char.chararray((3, 3))
  434. >>> charar[:] = 'a'
  435. >>> charar
  436. chararray([[b'a', b'a', b'a'],
  437. [b'a', b'a', b'a'],
  438. [b'a', b'a', b'a']], dtype='|S1')
  439. >>> charar = np.char.chararray(charar.shape, itemsize=5)
  440. >>> charar[:] = 'abc'
  441. >>> charar
  442. chararray([[b'abc', b'abc', b'abc'],
  443. [b'abc', b'abc', b'abc'],
  444. [b'abc', b'abc', b'abc']], dtype='|S5')
  445. """
  446. def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
  447. offset=0, strides=None, order='C'):
  448. if unicode:
  449. dtype = str_
  450. else:
  451. dtype = bytes_
  452. # force itemsize to be a Python int, since using NumPy integer
  453. # types results in itemsize.itemsize being used as the size of
  454. # strings in the new array.
  455. itemsize = int(itemsize)
  456. if isinstance(buffer, str):
  457. # unicode objects do not have the buffer interface
  458. filler = buffer
  459. buffer = None
  460. else:
  461. filler = None
  462. if buffer is None:
  463. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  464. order=order)
  465. else:
  466. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  467. buffer=buffer,
  468. offset=offset, strides=strides,
  469. order=order)
  470. if filler is not None:
  471. self[...] = filler
  472. return self
  473. def __array_wrap__(self, arr, context=None, return_scalar=False):
  474. # When calling a ufunc (and some other functions), we return a
  475. # chararray if the ufunc output is a string-like array,
  476. # or an ndarray otherwise
  477. if arr.dtype.char in "SUbc":
  478. return arr.view(type(self))
  479. return arr
  480. def __array_finalize__(self, obj):
  481. # The b is a special case because it is used for reconstructing.
  482. if self.dtype.char not in 'VSUbc':
  483. raise ValueError("Can only create a chararray from string data.")
  484. def __getitem__(self, obj):
  485. val = ndarray.__getitem__(self, obj)
  486. if isinstance(val, character):
  487. return val.rstrip()
  488. return val
  489. # IMPLEMENTATION NOTE: Most of the methods of this class are
  490. # direct delegations to the free functions in this module.
  491. # However, those that return an array of strings should instead
  492. # return a chararray, so some extra wrapping is required.
  493. def __eq__(self, other):
  494. """
  495. Return (self == other) element-wise.
  496. See Also
  497. --------
  498. equal
  499. """
  500. return equal(self, other)
  501. def __ne__(self, other):
  502. """
  503. Return (self != other) element-wise.
  504. See Also
  505. --------
  506. not_equal
  507. """
  508. return not_equal(self, other)
  509. def __ge__(self, other):
  510. """
  511. Return (self >= other) element-wise.
  512. See Also
  513. --------
  514. greater_equal
  515. """
  516. return greater_equal(self, other)
  517. def __le__(self, other):
  518. """
  519. Return (self <= other) element-wise.
  520. See Also
  521. --------
  522. less_equal
  523. """
  524. return less_equal(self, other)
  525. def __gt__(self, other):
  526. """
  527. Return (self > other) element-wise.
  528. See Also
  529. --------
  530. greater
  531. """
  532. return greater(self, other)
  533. def __lt__(self, other):
  534. """
  535. Return (self < other) element-wise.
  536. See Also
  537. --------
  538. less
  539. """
  540. return less(self, other)
  541. def __add__(self, other):
  542. """
  543. Return (self + other), that is string concatenation,
  544. element-wise for a pair of array_likes of str or unicode.
  545. See Also
  546. --------
  547. add
  548. """
  549. return add(self, other)
  550. def __radd__(self, other):
  551. """
  552. Return (other + self), that is string concatenation,
  553. element-wise for a pair of array_likes of `bytes_` or `str_`.
  554. See Also
  555. --------
  556. add
  557. """
  558. return add(other, self)
  559. def __mul__(self, i):
  560. """
  561. Return (self * i), that is string multiple concatenation,
  562. element-wise.
  563. See Also
  564. --------
  565. multiply
  566. """
  567. return asarray(multiply(self, i))
  568. def __rmul__(self, i):
  569. """
  570. Return (self * i), that is string multiple concatenation,
  571. element-wise.
  572. See Also
  573. --------
  574. multiply
  575. """
  576. return asarray(multiply(self, i))
  577. def __mod__(self, i):
  578. """
  579. Return (self % i), that is pre-Python 2.6 string formatting
  580. (interpolation), element-wise for a pair of array_likes of `bytes_`
  581. or `str_`.
  582. See Also
  583. --------
  584. mod
  585. """
  586. return asarray(mod(self, i))
  587. def __rmod__(self, other):
  588. return NotImplemented
  589. def argsort(self, axis=-1, kind=None, order=None, *, stable=None):
  590. """
  591. Return the indices that sort the array lexicographically.
  592. For full documentation see `numpy.argsort`, for which this method is
  593. in fact merely a "thin wrapper."
  594. Examples
  595. --------
  596. >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
  597. >>> c = c.view(np.char.chararray); c
  598. chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
  599. dtype='|S5')
  600. >>> c[c.argsort()]
  601. chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
  602. dtype='|S5')
  603. """
  604. return self.__array__().argsort(axis, kind, order, stable=stable)
  605. argsort.__doc__ = ndarray.argsort.__doc__
  606. def capitalize(self):
  607. """
  608. Return a copy of `self` with only the first character of each element
  609. capitalized.
  610. See Also
  611. --------
  612. char.capitalize
  613. """
  614. return asarray(capitalize(self))
  615. def center(self, width, fillchar=' '):
  616. """
  617. Return a copy of `self` with its elements centered in a
  618. string of length `width`.
  619. See Also
  620. --------
  621. center
  622. """
  623. return asarray(center(self, width, fillchar))
  624. def count(self, sub, start=0, end=None):
  625. """
  626. Returns an array with the number of non-overlapping occurrences of
  627. substring `sub` in the range [`start`, `end`].
  628. See Also
  629. --------
  630. char.count
  631. """
  632. return count(self, sub, start, end)
  633. def decode(self, encoding=None, errors=None):
  634. """
  635. Calls ``bytes.decode`` element-wise.
  636. See Also
  637. --------
  638. char.decode
  639. """
  640. return decode(self, encoding, errors)
  641. def encode(self, encoding=None, errors=None):
  642. """
  643. Calls :meth:`str.encode` element-wise.
  644. See Also
  645. --------
  646. char.encode
  647. """
  648. return encode(self, encoding, errors)
  649. def endswith(self, suffix, start=0, end=None):
  650. """
  651. Returns a boolean array which is `True` where the string element
  652. in `self` ends with `suffix`, otherwise `False`.
  653. See Also
  654. --------
  655. char.endswith
  656. """
  657. return endswith(self, suffix, start, end)
  658. def expandtabs(self, tabsize=8):
  659. """
  660. Return a copy of each string element where all tab characters are
  661. replaced by one or more spaces.
  662. See Also
  663. --------
  664. char.expandtabs
  665. """
  666. return asarray(expandtabs(self, tabsize))
  667. def find(self, sub, start=0, end=None):
  668. """
  669. For each element, return the lowest index in the string where
  670. substring `sub` is found.
  671. See Also
  672. --------
  673. char.find
  674. """
  675. return find(self, sub, start, end)
  676. def index(self, sub, start=0, end=None):
  677. """
  678. Like `find`, but raises :exc:`ValueError` when the substring is not
  679. found.
  680. See Also
  681. --------
  682. char.index
  683. """
  684. return index(self, sub, start, end)
  685. def isalnum(self):
  686. """
  687. Returns true for each element if all characters in the string
  688. are alphanumeric and there is at least one character, false
  689. otherwise.
  690. See Also
  691. --------
  692. char.isalnum
  693. """
  694. return isalnum(self)
  695. def isalpha(self):
  696. """
  697. Returns true for each element if all characters in the string
  698. are alphabetic and there is at least one character, false
  699. otherwise.
  700. See Also
  701. --------
  702. char.isalpha
  703. """
  704. return isalpha(self)
  705. def isdigit(self):
  706. """
  707. Returns true for each element if all characters in the string are
  708. digits and there is at least one character, false otherwise.
  709. See Also
  710. --------
  711. char.isdigit
  712. """
  713. return isdigit(self)
  714. def islower(self):
  715. """
  716. Returns true for each element if all cased characters in the
  717. string are lowercase and there is at least one cased character,
  718. false otherwise.
  719. See Also
  720. --------
  721. char.islower
  722. """
  723. return islower(self)
  724. def isspace(self):
  725. """
  726. Returns true for each element if there are only whitespace
  727. characters in the string and there is at least one character,
  728. false otherwise.
  729. See Also
  730. --------
  731. char.isspace
  732. """
  733. return isspace(self)
  734. def istitle(self):
  735. """
  736. Returns true for each element if the element is a titlecased
  737. string and there is at least one character, false otherwise.
  738. See Also
  739. --------
  740. char.istitle
  741. """
  742. return istitle(self)
  743. def isupper(self):
  744. """
  745. Returns true for each element if all cased characters in the
  746. string are uppercase and there is at least one character, false
  747. otherwise.
  748. See Also
  749. --------
  750. char.isupper
  751. """
  752. return isupper(self)
  753. def join(self, seq):
  754. """
  755. Return a string which is the concatenation of the strings in the
  756. sequence `seq`.
  757. See Also
  758. --------
  759. char.join
  760. """
  761. return join(self, seq)
  762. def ljust(self, width, fillchar=' '):
  763. """
  764. Return an array with the elements of `self` left-justified in a
  765. string of length `width`.
  766. See Also
  767. --------
  768. char.ljust
  769. """
  770. return asarray(ljust(self, width, fillchar))
  771. def lower(self):
  772. """
  773. Return an array with the elements of `self` converted to
  774. lowercase.
  775. See Also
  776. --------
  777. char.lower
  778. """
  779. return asarray(lower(self))
  780. def lstrip(self, chars=None):
  781. """
  782. For each element in `self`, return a copy with the leading characters
  783. removed.
  784. See Also
  785. --------
  786. char.lstrip
  787. """
  788. return lstrip(self, chars)
  789. def partition(self, sep):
  790. """
  791. Partition each element in `self` around `sep`.
  792. See Also
  793. --------
  794. partition
  795. """
  796. return asarray(partition(self, sep))
  797. def replace(self, old, new, count=None):
  798. """
  799. For each element in `self`, return a copy of the string with all
  800. occurrences of substring `old` replaced by `new`.
  801. See Also
  802. --------
  803. char.replace
  804. """
  805. return replace(self, old, new, count if count is not None else -1)
  806. def rfind(self, sub, start=0, end=None):
  807. """
  808. For each element in `self`, return the highest index in the string
  809. where substring `sub` is found, such that `sub` is contained
  810. within [`start`, `end`].
  811. See Also
  812. --------
  813. char.rfind
  814. """
  815. return rfind(self, sub, start, end)
  816. def rindex(self, sub, start=0, end=None):
  817. """
  818. Like `rfind`, but raises :exc:`ValueError` when the substring `sub` is
  819. not found.
  820. See Also
  821. --------
  822. char.rindex
  823. """
  824. return rindex(self, sub, start, end)
  825. def rjust(self, width, fillchar=' '):
  826. """
  827. Return an array with the elements of `self`
  828. right-justified in a string of length `width`.
  829. See Also
  830. --------
  831. char.rjust
  832. """
  833. return asarray(rjust(self, width, fillchar))
  834. def rpartition(self, sep):
  835. """
  836. Partition each element in `self` around `sep`.
  837. See Also
  838. --------
  839. rpartition
  840. """
  841. return asarray(rpartition(self, sep))
  842. def rsplit(self, sep=None, maxsplit=None):
  843. """
  844. For each element in `self`, return a list of the words in
  845. the string, using `sep` as the delimiter string.
  846. See Also
  847. --------
  848. char.rsplit
  849. """
  850. return rsplit(self, sep, maxsplit)
  851. def rstrip(self, chars=None):
  852. """
  853. For each element in `self`, return a copy with the trailing
  854. characters removed.
  855. See Also
  856. --------
  857. char.rstrip
  858. """
  859. return rstrip(self, chars)
  860. def split(self, sep=None, maxsplit=None):
  861. """
  862. For each element in `self`, return a list of the words in the
  863. string, using `sep` as the delimiter string.
  864. See Also
  865. --------
  866. char.split
  867. """
  868. return split(self, sep, maxsplit)
  869. def splitlines(self, keepends=None):
  870. """
  871. For each element in `self`, return a list of the lines in the
  872. element, breaking at line boundaries.
  873. See Also
  874. --------
  875. char.splitlines
  876. """
  877. return splitlines(self, keepends)
  878. def startswith(self, prefix, start=0, end=None):
  879. """
  880. Returns a boolean array which is `True` where the string element
  881. in `self` starts with `prefix`, otherwise `False`.
  882. See Also
  883. --------
  884. char.startswith
  885. """
  886. return startswith(self, prefix, start, end)
  887. def strip(self, chars=None):
  888. """
  889. For each element in `self`, return a copy with the leading and
  890. trailing characters removed.
  891. See Also
  892. --------
  893. char.strip
  894. """
  895. return strip(self, chars)
  896. def swapcase(self):
  897. """
  898. For each element in `self`, return a copy of the string with
  899. uppercase characters converted to lowercase and vice versa.
  900. See Also
  901. --------
  902. char.swapcase
  903. """
  904. return asarray(swapcase(self))
  905. def title(self):
  906. """
  907. For each element in `self`, return a titlecased version of the
  908. string: words start with uppercase characters, all remaining cased
  909. characters are lowercase.
  910. See Also
  911. --------
  912. char.title
  913. """
  914. return asarray(title(self))
  915. def translate(self, table, deletechars=None):
  916. """
  917. For each element in `self`, return a copy of the string where
  918. all characters occurring in the optional argument
  919. `deletechars` are removed, and the remaining characters have
  920. been mapped through the given translation table.
  921. See Also
  922. --------
  923. char.translate
  924. """
  925. return asarray(translate(self, table, deletechars))
  926. def upper(self):
  927. """
  928. Return an array with the elements of `self` converted to
  929. uppercase.
  930. See Also
  931. --------
  932. char.upper
  933. """
  934. return asarray(upper(self))
  935. def zfill(self, width):
  936. """
  937. Return the numeric string left-filled with zeros in a string of
  938. length `width`.
  939. See Also
  940. --------
  941. char.zfill
  942. """
  943. return asarray(zfill(self, width))
  944. def isnumeric(self):
  945. """
  946. For each element in `self`, return True if there are only
  947. numeric characters in the element.
  948. See Also
  949. --------
  950. char.isnumeric
  951. """
  952. return isnumeric(self)
  953. def isdecimal(self):
  954. """
  955. For each element in `self`, return True if there are only
  956. decimal characters in the element.
  957. See Also
  958. --------
  959. char.isdecimal
  960. """
  961. return isdecimal(self)
  962. @set_module("numpy.char")
  963. def array(obj, itemsize=None, copy=True, unicode=None, order=None):
  964. """
  965. Create a `~numpy.char.chararray`.
  966. .. note::
  967. This class is provided for numarray backward-compatibility.
  968. New code (not concerned with numarray compatibility) should use
  969. arrays of type `bytes_` or `str_` and use the free functions
  970. in :mod:`numpy.char` for fast vectorized string operations instead.
  971. Versus a NumPy array of dtype `bytes_` or `str_`, this
  972. class adds the following functionality:
  973. 1) values automatically have whitespace removed from the end
  974. when indexed
  975. 2) comparison operators automatically remove whitespace from the
  976. end when comparing values
  977. 3) vectorized string operations are provided as methods
  978. (e.g. `chararray.endswith <numpy.char.chararray.endswith>`)
  979. and infix operators (e.g. ``+, *, %``)
  980. Parameters
  981. ----------
  982. obj : array of str or unicode-like
  983. itemsize : int, optional
  984. `itemsize` is the number of characters per scalar in the
  985. resulting array. If `itemsize` is None, and `obj` is an
  986. object array or a Python list, the `itemsize` will be
  987. automatically determined. If `itemsize` is provided and `obj`
  988. is of type str or unicode, then the `obj` string will be
  989. chunked into `itemsize` pieces.
  990. copy : bool, optional
  991. If true (default), then the object is copied. Otherwise, a copy
  992. will only be made if ``__array__`` returns a copy, if obj is a
  993. nested sequence, or if a copy is needed to satisfy any of the other
  994. requirements (`itemsize`, unicode, `order`, etc.).
  995. unicode : bool, optional
  996. When true, the resulting `~numpy.char.chararray` can contain Unicode
  997. characters, when false only 8-bit characters. If unicode is
  998. None and `obj` is one of the following:
  999. - a `~numpy.char.chararray`,
  1000. - an ndarray of type :class:`str_` or :class:`bytes_`
  1001. - a Python :class:`str` or :class:`bytes` object,
  1002. then the unicode setting of the output array will be
  1003. automatically determined.
  1004. order : {'C', 'F', 'A'}, optional
  1005. Specify the order of the array. If order is 'C' (default), then the
  1006. array will be in C-contiguous order (last-index varies the
  1007. fastest). If order is 'F', then the returned array
  1008. will be in Fortran-contiguous order (first-index varies the
  1009. fastest). If order is 'A', then the returned array may
  1010. be in any order (either C-, Fortran-contiguous, or even
  1011. discontiguous).
  1012. Examples
  1013. --------
  1014. >>> import numpy as np
  1015. >>> char_array = np.char.array(['hello', 'world', 'numpy','array'])
  1016. >>> char_array
  1017. chararray(['hello', 'world', 'numpy', 'array'], dtype='<U5')
  1018. """
  1019. if isinstance(obj, (bytes, str)):
  1020. if unicode is None:
  1021. if isinstance(obj, str):
  1022. unicode = True
  1023. else:
  1024. unicode = False
  1025. if itemsize is None:
  1026. itemsize = len(obj)
  1027. shape = len(obj) // itemsize
  1028. return chararray(shape, itemsize=itemsize, unicode=unicode,
  1029. buffer=obj, order=order)
  1030. if isinstance(obj, (list, tuple)):
  1031. obj = asnarray(obj)
  1032. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
  1033. # If we just have a vanilla chararray, create a chararray
  1034. # view around it.
  1035. if not isinstance(obj, chararray):
  1036. obj = obj.view(chararray)
  1037. if itemsize is None:
  1038. itemsize = obj.itemsize
  1039. # itemsize is in 8-bit chars, so for Unicode, we need
  1040. # to divide by the size of a single Unicode character,
  1041. # which for NumPy is always 4
  1042. if issubclass(obj.dtype.type, str_):
  1043. itemsize //= 4
  1044. if unicode is None:
  1045. if issubclass(obj.dtype.type, str_):
  1046. unicode = True
  1047. else:
  1048. unicode = False
  1049. if unicode:
  1050. dtype = str_
  1051. else:
  1052. dtype = bytes_
  1053. if order is not None:
  1054. obj = asnarray(obj, order=order)
  1055. if (copy or
  1056. (itemsize != obj.itemsize) or
  1057. (not unicode and isinstance(obj, str_)) or
  1058. (unicode and isinstance(obj, bytes_))):
  1059. obj = obj.astype((dtype, int(itemsize)))
  1060. return obj
  1061. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
  1062. if itemsize is None:
  1063. # Since no itemsize was specified, convert the input array to
  1064. # a list so the ndarray constructor will automatically
  1065. # determine the itemsize for us.
  1066. obj = obj.tolist()
  1067. # Fall through to the default case
  1068. if unicode:
  1069. dtype = str_
  1070. else:
  1071. dtype = bytes_
  1072. if itemsize is None:
  1073. val = narray(obj, dtype=dtype, order=order, subok=True)
  1074. else:
  1075. val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
  1076. return val.view(chararray)
  1077. @set_module("numpy.char")
  1078. def asarray(obj, itemsize=None, unicode=None, order=None):
  1079. """
  1080. Convert the input to a `~numpy.char.chararray`, copying the data only if
  1081. necessary.
  1082. Versus a NumPy array of dtype `bytes_` or `str_`, this
  1083. class adds the following functionality:
  1084. 1) values automatically have whitespace removed from the end
  1085. when indexed
  1086. 2) comparison operators automatically remove whitespace from the
  1087. end when comparing values
  1088. 3) vectorized string operations are provided as methods
  1089. (e.g. `chararray.endswith <numpy.char.chararray.endswith>`)
  1090. and infix operators (e.g. ``+``, ``*``, ``%``)
  1091. Parameters
  1092. ----------
  1093. obj : array of str or unicode-like
  1094. itemsize : int, optional
  1095. `itemsize` is the number of characters per scalar in the
  1096. resulting array. If `itemsize` is None, and `obj` is an
  1097. object array or a Python list, the `itemsize` will be
  1098. automatically determined. If `itemsize` is provided and `obj`
  1099. is of type str or unicode, then the `obj` string will be
  1100. chunked into `itemsize` pieces.
  1101. unicode : bool, optional
  1102. When true, the resulting `~numpy.char.chararray` can contain Unicode
  1103. characters, when false only 8-bit characters. If unicode is
  1104. None and `obj` is one of the following:
  1105. - a `~numpy.char.chararray`,
  1106. - an ndarray of type `str_` or `unicode_`
  1107. - a Python str or unicode object,
  1108. then the unicode setting of the output array will be
  1109. automatically determined.
  1110. order : {'C', 'F'}, optional
  1111. Specify the order of the array. If order is 'C' (default), then the
  1112. array will be in C-contiguous order (last-index varies the
  1113. fastest). If order is 'F', then the returned array
  1114. will be in Fortran-contiguous order (first-index varies the
  1115. fastest).
  1116. Examples
  1117. --------
  1118. >>> import numpy as np
  1119. >>> np.char.asarray(['hello', 'world'])
  1120. chararray(['hello', 'world'], dtype='<U5')
  1121. """
  1122. return array(obj, itemsize, copy=False,
  1123. unicode=unicode, order=order)