_coo.py 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881
  1. """ A sparse matrix in COOrdinate or 'triplet' format"""
  2. __docformat__ = "restructuredtext en"
  3. __all__ = ['coo_array', 'coo_matrix', 'isspmatrix_coo']
  4. import math
  5. from warnings import warn
  6. import numpy as np
  7. from .._lib._util import copy_if_needed
  8. from ._matrix import spmatrix
  9. from ._sparsetools import (coo_tocsr, coo_todense, coo_todense_nd,
  10. coo_matvec, coo_matvec_nd, coo_matmat_dense,
  11. coo_matmat_dense_nd)
  12. from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray
  13. from ._data import _data_matrix, _minmax_mixin
  14. from ._sputils import (upcast_char, to_native, isshape, getdtype,
  15. getdata, downcast_intp_index, get_index_dtype,
  16. check_shape, check_reshape_kwargs, isscalarlike,
  17. isintlike, isdense)
  18. from ._index import _validate_indices
  19. import operator
  20. class _coo_base(_data_matrix, _minmax_mixin):
  21. _format = 'coo'
  22. _allow_nd = range(1, 65)
  23. def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
  24. _data_matrix.__init__(self, arg1, maxprint=maxprint)
  25. if not copy:
  26. copy = copy_if_needed
  27. if isinstance(arg1, tuple):
  28. if isshape(arg1, allow_nd=self._allow_nd):
  29. self._shape = check_shape(arg1, allow_nd=self._allow_nd)
  30. idx_dtype = self._get_index_dtype(maxval=max(self._shape))
  31. data_dtype = getdtype(dtype, default=float)
  32. self.coords = tuple(np.array([], dtype=idx_dtype)
  33. for _ in range(len(self._shape)))
  34. self.data = np.array([], dtype=data_dtype)
  35. self.has_canonical_format = True
  36. else:
  37. try:
  38. obj, coords = arg1
  39. except (TypeError, ValueError) as e:
  40. raise TypeError('invalid input format') from e
  41. if shape is None:
  42. if any(len(idx) == 0 for idx in coords):
  43. raise ValueError('cannot infer dimensions from zero '
  44. 'sized index arrays')
  45. shape = tuple(operator.index(np.max(idx)) + 1
  46. for idx in coords)
  47. self._shape = check_shape(shape, allow_nd=self._allow_nd)
  48. idx_dtype = self._get_index_dtype(coords,
  49. maxval=max(self.shape),
  50. check_contents=True)
  51. self.coords = tuple(np.array(idx, copy=copy, dtype=idx_dtype)
  52. for idx in coords)
  53. self.data = getdata(obj, copy=copy, dtype=dtype)
  54. self.has_canonical_format = False
  55. else:
  56. if issparse(arg1):
  57. if arg1.format == self.format and copy:
  58. self.coords = tuple(idx.copy() for idx in arg1.coords)
  59. self.data = arg1.data.astype(getdtype(dtype, arg1)) # copy=True
  60. self._shape = check_shape(arg1.shape, allow_nd=self._allow_nd)
  61. self.has_canonical_format = arg1.has_canonical_format
  62. else:
  63. coo = arg1.tocoo(copy=copy)
  64. self.coords = tuple(coo.coords)
  65. self.data = coo.data.astype(getdtype(dtype, coo), copy=False)
  66. self._shape = check_shape(coo.shape, allow_nd=self._allow_nd)
  67. self.has_canonical_format = False
  68. else:
  69. # dense argument
  70. M = np.asarray(arg1)
  71. if not isinstance(self, sparray):
  72. M = np.atleast_2d(M)
  73. if M.ndim != 2:
  74. raise TypeError(f'expected 2D array or matrix, not {M.ndim}D')
  75. self._shape = check_shape(M.shape, allow_nd=self._allow_nd)
  76. if shape is not None:
  77. if check_shape(shape, allow_nd=self._allow_nd) != self._shape:
  78. message = f'inconsistent shapes: {shape} != {self._shape}'
  79. raise ValueError(message)
  80. index_dtype = self._get_index_dtype(maxval=max(self._shape))
  81. coords = M.nonzero()
  82. self.coords = tuple(idx.astype(index_dtype, copy=False)
  83. for idx in coords)
  84. self.data = getdata(M[coords], copy=copy, dtype=dtype)
  85. self.has_canonical_format = True
  86. if len(self._shape) > 2:
  87. self.coords = tuple(idx.astype(np.int64, copy=False) for idx in self.coords)
  88. self._check()
  89. @property
  90. def row(self):
  91. if self.ndim > 1:
  92. return self.coords[-2]
  93. result = np.zeros_like(self.col)
  94. result.setflags(write=False)
  95. return result
  96. @row.setter
  97. def row(self, new_row):
  98. if self.ndim < 2:
  99. raise ValueError('cannot set row attribute of a 1-dimensional sparse array')
  100. new_row = np.asarray(new_row, dtype=self.coords[-2].dtype)
  101. self.coords = self.coords[:-2] + (new_row,) + self.coords[-1:]
  102. @property
  103. def col(self):
  104. return self.coords[-1]
  105. @col.setter
  106. def col(self, new_col):
  107. new_col = np.asarray(new_col, dtype=self.coords[-1].dtype)
  108. self.coords = self.coords[:-1] + (new_col,)
  109. def reshape(self, *args, **kwargs):
  110. shape = check_shape(args, self.shape, allow_nd=self._allow_nd)
  111. order, copy = check_reshape_kwargs(kwargs)
  112. # Return early if reshape is not required
  113. if shape == self.shape:
  114. if copy:
  115. return self.copy()
  116. else:
  117. return self
  118. # When reducing the number of dimensions, we need to be careful about
  119. # index overflow. This is why we can't simply call
  120. # `np.ravel_multi_index()` followed by `np.unravel_index()` here.
  121. flat_coords = _ravel_coords(self.coords, self.shape, order=order)
  122. if len(shape) == 2:
  123. if order == 'C':
  124. new_coords = divmod(flat_coords, shape[1])
  125. else:
  126. new_coords = divmod(flat_coords, shape[0])[::-1]
  127. else:
  128. new_coords = np.unravel_index(flat_coords, shape, order=order)
  129. idx_dtype = self._get_index_dtype(self.coords, maxval=max(shape))
  130. new_coords = tuple(np.asarray(co, dtype=idx_dtype) for co in new_coords)
  131. # Handle copy here rather than passing on to the constructor so that no
  132. # copy will be made of `new_coords` regardless.
  133. if copy:
  134. new_data = self.data.copy()
  135. else:
  136. new_data = self.data
  137. return self.__class__((new_data, new_coords), shape=shape, copy=False)
  138. reshape.__doc__ = _spbase.reshape.__doc__
  139. def _getnnz(self, axis=None):
  140. if axis is None or (axis == 0 and self.ndim == 1):
  141. nnz = len(self.data)
  142. if any(len(idx) != nnz for idx in self.coords):
  143. raise ValueError('all index and data arrays must have the '
  144. 'same length')
  145. if self.data.ndim != 1 or any(idx.ndim != 1 for idx in self.coords):
  146. raise ValueError('coordinates and data arrays must be 1-D')
  147. return int(nnz)
  148. if axis < 0:
  149. axis += self.ndim
  150. if axis >= self.ndim:
  151. raise ValueError('axis out of bounds')
  152. return np.bincount(downcast_intp_index(self.coords[1 - axis]),
  153. minlength=self.shape[1 - axis])
  154. _getnnz.__doc__ = _spbase._getnnz.__doc__
  155. def count_nonzero(self, axis=None):
  156. self.sum_duplicates()
  157. if axis is None:
  158. return np.count_nonzero(self.data)
  159. if axis < 0:
  160. axis += self.ndim
  161. if axis < 0 or axis >= self.ndim:
  162. raise ValueError('axis out of bounds')
  163. mask = self.data != 0
  164. coord = self.coords[1 - axis][mask]
  165. return np.bincount(downcast_intp_index(coord), minlength=self.shape[1 - axis])
  166. count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
  167. def _check(self):
  168. """ Checks data structure for consistency """
  169. if self.ndim != len(self.coords):
  170. raise ValueError('mismatching number of index arrays for shape; '
  171. f'got {len(self.coords)}, expected {self.ndim}')
  172. # index arrays should have integer data types
  173. for i, idx in enumerate(self.coords):
  174. if idx.dtype.kind != 'i':
  175. warn(f'index array {i} has non-integer dtype ({idx.dtype.name})',
  176. stacklevel=3)
  177. idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.shape))
  178. self.coords = tuple(np.asarray(idx, dtype=idx_dtype)
  179. for idx in self.coords)
  180. self.data = to_native(self.data)
  181. if self.nnz > 0:
  182. for i, idx in enumerate(self.coords):
  183. if idx.max() >= self.shape[i]:
  184. raise ValueError(f'axis {i} index {idx.max()} exceeds '
  185. f'matrix dimension {self.shape[i]}')
  186. if idx.min() < 0:
  187. raise ValueError(f'negative axis {i} index: {idx.min()}')
  188. def transpose(self, axes=None, copy=False):
  189. if axes is None:
  190. axes = range(self.ndim)[::-1]
  191. elif isinstance(self, sparray):
  192. if not hasattr(axes, "__len__") or len(axes) != self.ndim:
  193. raise ValueError("axes don't match matrix dimensions")
  194. if len(set(axes)) != self.ndim:
  195. raise ValueError("repeated axis in transpose")
  196. elif axes != (1, 0):
  197. raise ValueError("Sparse matrices do not support an 'axes' "
  198. "parameter because swapping dimensions is the "
  199. "only logical permutation.")
  200. permuted_shape = tuple(self._shape[i] for i in axes)
  201. permuted_coords = tuple(self.coords[i] for i in axes)
  202. return self.__class__((self.data, permuted_coords),
  203. shape=permuted_shape, copy=copy)
  204. transpose.__doc__ = _spbase.transpose.__doc__
  205. def resize(self, *shape) -> None:
  206. shape = check_shape(shape, allow_nd=self._allow_nd)
  207. if self.ndim > 2:
  208. raise ValueError("only 1-D or 2-D input accepted")
  209. if len(shape) > 2:
  210. raise ValueError("shape argument must be 1-D or 2-D")
  211. # Check for added dimensions.
  212. if len(shape) > self.ndim:
  213. flat_coords = _ravel_coords(self.coords, self.shape)
  214. max_size = math.prod(shape)
  215. self.coords = np.unravel_index(flat_coords[:max_size], shape)
  216. self.data = self.data[:max_size]
  217. self._shape = shape
  218. return
  219. # Check for removed dimensions.
  220. if len(shape) < self.ndim:
  221. tmp_shape = (
  222. self._shape[:len(shape) - 1] # Original shape without last axis
  223. + (-1,) # Last axis is used to flatten the array
  224. + (1,) * (self.ndim - len(shape)) # Pad with ones
  225. )
  226. tmp = self.reshape(tmp_shape)
  227. self.coords = tmp.coords[:len(shape)]
  228. self._shape = tmp.shape[:len(shape)]
  229. # Handle truncation of existing dimensions.
  230. is_truncating = any(old > new for old, new in zip(self.shape, shape))
  231. if is_truncating:
  232. mask = np.logical_and.reduce([
  233. idx < size for idx, size in zip(self.coords, shape)
  234. ])
  235. if not mask.all():
  236. self.coords = tuple(idx[mask] for idx in self.coords)
  237. self.data = self.data[mask]
  238. self._shape = shape
  239. resize.__doc__ = _spbase.resize.__doc__
  240. def toarray(self, order=None, out=None):
  241. B = self._process_toarray_args(order, out)
  242. fortran = int(B.flags.f_contiguous)
  243. if not fortran and not B.flags.c_contiguous:
  244. raise ValueError("Output array must be C or F contiguous")
  245. # This handles both 0D and 1D cases correctly regardless of the
  246. # original shape.
  247. if self.ndim == 1:
  248. coo_todense_nd(np.array([1]), self.nnz, self.ndim,
  249. self.coords[0], self.data, B.ravel('A'), fortran)
  250. elif self.ndim == 2:
  251. M, N = self.shape
  252. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  253. B.ravel('A'), fortran)
  254. else: # dim>2
  255. if fortran:
  256. strides = np.append(1, np.cumprod(self.shape[:-1]))
  257. else:
  258. strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
  259. coords = np.concatenate(self.coords)
  260. coo_todense_nd(strides, self.nnz, self.ndim,
  261. coords, self.data, B.ravel('A'), fortran)
  262. # Note: reshape() doesn't copy here, but does return a new array (view).
  263. return B.reshape(self.shape)
  264. toarray.__doc__ = _spbase.toarray.__doc__
  265. def tocsc(self, copy=False):
  266. """Convert this array/matrix to Compressed Sparse Column format
  267. Duplicate entries will be summed together.
  268. Examples
  269. --------
  270. >>> from numpy import array
  271. >>> from scipy.sparse import coo_array
  272. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  273. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  274. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  275. >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsc()
  276. >>> A.toarray()
  277. array([[3, 0, 1, 0],
  278. [0, 2, 0, 0],
  279. [0, 0, 0, 0],
  280. [0, 0, 0, 1]])
  281. """
  282. if self.ndim != 2:
  283. raise ValueError(f'Cannot convert. CSC format must be 2D. Got {self.ndim}D')
  284. if self.nnz == 0:
  285. return self._csc_container(self.shape, dtype=self.dtype)
  286. else:
  287. from ._csc import csc_array
  288. indptr, indices, data, shape = self._coo_to_compressed(csc_array._swap)
  289. x = self._csc_container((data, indices, indptr), shape=shape)
  290. if not self.has_canonical_format:
  291. x.sum_duplicates()
  292. return x
  293. def tocsr(self, copy=False):
  294. """Convert this array/matrix to Compressed Sparse Row format
  295. Duplicate entries will be summed together.
  296. Examples
  297. --------
  298. >>> from numpy import array
  299. >>> from scipy.sparse import coo_array
  300. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  301. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  302. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  303. >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsr()
  304. >>> A.toarray()
  305. array([[3, 0, 1, 0],
  306. [0, 2, 0, 0],
  307. [0, 0, 0, 0],
  308. [0, 0, 0, 1]])
  309. """
  310. if self.ndim > 2:
  311. raise ValueError(f'Cannot convert. CSR must be 1D or 2D. Got {self.ndim}D')
  312. if self.nnz == 0:
  313. return self._csr_container(self.shape, dtype=self.dtype)
  314. else:
  315. from ._csr import csr_array
  316. arrays = self._coo_to_compressed(csr_array._swap, copy=copy)
  317. indptr, indices, data, shape = arrays
  318. x = self._csr_container((data, indices, indptr), shape=self.shape)
  319. if not self.has_canonical_format:
  320. x.sum_duplicates()
  321. return x
  322. def _coo_to_compressed(self, swap, copy=False):
  323. """convert (shape, coords, data) to (indptr, indices, data, shape)"""
  324. M, N = swap(self._shape_as_2d)
  325. # convert idx_dtype intc to int32 for pythran.
  326. # tested in scipy/optimize/tests/test__numdiff.py::test_group_columns
  327. idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.nnz, N))
  328. if self.ndim == 1:
  329. indices = self.coords[0].copy() if copy else self.coords[0]
  330. nnz = len(indices)
  331. indptr = np.array([0, nnz], dtype=idx_dtype)
  332. data = self.data.copy() if copy else self.data
  333. return indptr, indices, data, self.shape
  334. # ndim == 2
  335. major, minor = swap(self.coords)
  336. nnz = len(major)
  337. major = major.astype(idx_dtype, copy=False)
  338. minor = minor.astype(idx_dtype, copy=False)
  339. indptr = np.empty(M + 1, dtype=idx_dtype)
  340. indices = np.empty_like(minor, dtype=idx_dtype)
  341. data = np.empty_like(self.data, dtype=self.dtype)
  342. coo_tocsr(M, N, nnz, major, minor, self.data, indptr, indices, data)
  343. return indptr, indices, data, self.shape
  344. def tocoo(self, copy=False):
  345. if copy:
  346. return self.copy()
  347. else:
  348. return self
  349. tocoo.__doc__ = _spbase.tocoo.__doc__
  350. def todia(self, copy=False):
  351. if self.ndim != 2:
  352. raise ValueError(f'Cannot convert. DIA format must be 2D. Got {self.ndim}D')
  353. self.sum_duplicates()
  354. ks = self.col - self.row # the diagonal for each nonzero
  355. diags, diag_idx = np.unique(ks, return_inverse=True)
  356. if len(diags) > 100:
  357. # probably undesired, should todia() have a maxdiags parameter?
  358. warn(f"Constructing a DIA matrix with {len(diags)} diagonals "
  359. "is inefficient",
  360. SparseEfficiencyWarning, stacklevel=2)
  361. #initialize and fill in data array
  362. if self.data.size == 0:
  363. data = np.zeros((0, 0), dtype=self.dtype)
  364. else:
  365. data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
  366. data[diag_idx, self.col] = self.data
  367. return self._dia_container((data, diags), shape=self.shape)
  368. todia.__doc__ = _spbase.todia.__doc__
  369. def todok(self, copy=False):
  370. if self.ndim > 2:
  371. raise ValueError(f'Cannot convert. DOK must be 1D or 2D. Got {self.ndim}D')
  372. self.sum_duplicates()
  373. dok = self._dok_container(self.shape, dtype=self.dtype)
  374. # ensure that 1d coordinates are not tuples
  375. if self.ndim == 1:
  376. coords = self.coords[0]
  377. else:
  378. coords = zip(*self.coords)
  379. dok._dict = dict(zip(coords, self.data))
  380. return dok
  381. todok.__doc__ = _spbase.todok.__doc__
  382. def diagonal(self, k=0):
  383. if self.ndim != 2:
  384. raise ValueError("diagonal requires two dimensions")
  385. rows, cols = self.shape
  386. if k <= -rows or k >= cols:
  387. return np.empty(0, dtype=self.data.dtype)
  388. diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
  389. dtype=self.dtype)
  390. diag_mask = (self.row + k) == self.col
  391. if self.has_canonical_format:
  392. row = self.row[diag_mask]
  393. data = self.data[diag_mask]
  394. else:
  395. inds = tuple(idx[diag_mask] for idx in self.coords)
  396. (row, _), data = self._sum_duplicates(inds, self.data[diag_mask])
  397. diag[row + min(k, 0)] = data
  398. return diag
  399. diagonal.__doc__ = _data_matrix.diagonal.__doc__
  400. def _setdiag(self, values, k):
  401. if self.ndim != 2:
  402. raise ValueError("setting a diagonal requires two dimensions")
  403. M, N = self.shape
  404. if values.ndim and not len(values):
  405. return
  406. idx_dtype = self.row.dtype
  407. # Determine which triples to keep and where to put the new ones.
  408. full_keep = self.col - self.row != k
  409. if k < 0:
  410. max_index = min(M+k, N)
  411. if values.ndim:
  412. max_index = min(max_index, len(values))
  413. keep = np.logical_or(full_keep, self.col >= max_index)
  414. new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
  415. new_col = np.arange(max_index, dtype=idx_dtype)
  416. else:
  417. max_index = min(M, N-k)
  418. if values.ndim:
  419. max_index = min(max_index, len(values))
  420. keep = np.logical_or(full_keep, self.row >= max_index)
  421. new_row = np.arange(max_index, dtype=idx_dtype)
  422. new_col = np.arange(k, k + max_index, dtype=idx_dtype)
  423. # Define the array of data consisting of the entries to be added.
  424. if values.ndim:
  425. new_data = values[:max_index]
  426. else:
  427. new_data = np.empty(max_index, dtype=self.dtype)
  428. new_data[:] = values
  429. # Update the internal structure.
  430. self.coords = (np.concatenate((self.row[keep], new_row)),
  431. np.concatenate((self.col[keep], new_col)))
  432. self.data = np.concatenate((self.data[keep], new_data))
  433. self.has_canonical_format = False
  434. # needed by _data_matrix
  435. def _with_data(self, data, copy=True):
  436. """Returns a matrix with the same sparsity structure as self,
  437. but with different data. By default the index arrays are copied.
  438. """
  439. if copy:
  440. coords = tuple(idx.copy() for idx in self.coords)
  441. else:
  442. coords = self.coords
  443. return self.__class__((data, coords), shape=self.shape, dtype=data.dtype)
  444. def __getitem__(self, key):
  445. index, new_shape, arr_int_pos, none_pos = _validate_indices(
  446. key, self.shape, self.format
  447. )
  448. # handle int, slice and int-array indices
  449. index_mask = np.ones(len(self.data), dtype=np.bool_)
  450. slice_coords = []
  451. arr_coords = []
  452. arr_indices = []
  453. for i, (idx, co) in enumerate(zip(index, self.coords)):
  454. if isinstance(idx, int):
  455. index_mask &= (co == idx)
  456. elif isinstance(idx, slice):
  457. if idx == slice(None):
  458. slice_coords.append(co)
  459. else:
  460. start, stop, step = idx.indices(self.shape[i])
  461. if step != 1:
  462. if step < 0:
  463. in_range = (co <= start) & (co > stop)
  464. else:
  465. in_range = (co >= start) & (co < stop)
  466. new_ix, m = np.divmod(co - start, step)
  467. index_mask &= (m == 0) & in_range
  468. else:
  469. in_range = (co >= start) & (co < stop)
  470. new_ix = co - start
  471. index_mask &= in_range
  472. slice_coords.append(new_ix)
  473. else: # array
  474. arr_coords.append(co)
  475. arr_indices.append(idx)
  476. # shortcut for scalar output
  477. if new_shape == ():
  478. return self.data[index_mask].sum().astype(self.dtype, copy=False)
  479. new_coords = [co[index_mask] for co in slice_coords]
  480. new_data = self.data[index_mask]
  481. # handle array indices
  482. if arr_indices:
  483. arr_shape = arr_indices[0].shape # already broadcast in validate_indices
  484. # There are three dimensions required to check array indices against coords
  485. # Their lengths are described as:
  486. # a) number of indices that are arrays - arr_dim
  487. # b) number of coords to check - masked_nnz (already masked by slices)
  488. # c) size of the index arrays - arr_size
  489. # Note for this, integer indices are treated like slices, not like arrays.
  490. #
  491. # Goal:
  492. # Find new_coords and index positions that match across all arr_dim axes.
  493. # Approach: Track matches using bool array. Size: masked_nnz by arr_size.
  494. # True means all arr_indices match at that coord and index position.
  495. # Equate with broadcasting and check for all equal across (arr_dim) axis 0.
  496. # 1st array is "keyarr" (arr_dim by 1 by arr_size) from arr_indices.
  497. # 2nd array is "arr_coords" (arr_dim by masked_nnz by 1) from arr_coords.
  498. keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
  499. arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
  500. found = (keyarr == arr_coords).all(axis=0)
  501. arr_co, arr_ix = found.nonzero()
  502. new_data = new_data[arr_co]
  503. new_coords = [co[arr_co] for co in new_coords]
  504. new_arr_coords = list(np.unravel_index(arr_ix, shape=arr_shape))
  505. # check for contiguous positions of array and int indices
  506. if len(arr_int_pos) == arr_int_pos[-1] - arr_int_pos[0] + 1:
  507. # Contiguous. Put all array index shape at pos of array indices
  508. pos = arr_int_pos[0]
  509. new_coords = new_coords[:pos] + new_arr_coords + new_coords[pos:]
  510. else:
  511. # Not contiguous. Put all array coords at front
  512. new_coords = new_arr_coords + new_coords
  513. if none_pos:
  514. if new_coords:
  515. coord_like = np.zeros_like(new_coords[0])
  516. else:
  517. coord_like = np.zeros(len(new_data), dtype=self.coords[0].dtype)
  518. new_coords.insert(none_pos[0], coord_like)
  519. for i in none_pos[1:]:
  520. new_coords.insert(i, coord_like.copy())
  521. return coo_array((new_data, new_coords), shape=new_shape, dtype=self.dtype)
  522. def __setitem__(self, key, x):
  523. # enact self[key] = x
  524. index, new_shape, arr_int_pos, none_pos = _validate_indices(
  525. key, self.shape, self.format
  526. )
  527. # remove None's at beginning of index. Should not impact indexing coords
  528. # and will mistakenly align with x_coord columns if not removed.
  529. if none_pos:
  530. new_shape = list(new_shape)
  531. for j in none_pos[::-1]:
  532. new_shape.pop(j)
  533. new_shape = tuple(new_shape)
  534. # get coords and data from x
  535. if issparse(x):
  536. if 0 in x.shape:
  537. return # Nothing to set.
  538. x_data, x_coords = _get_sparse_data_and_coords(x, new_shape, self.dtype)
  539. else:
  540. x = np.asarray(x, dtype=self.dtype)
  541. if x.size == 0:
  542. return # Nothing to set.
  543. x_data, x_coords = _get_dense_data_and_coords(x, new_shape)
  544. # Approach:
  545. # Set indexed values to zero (drop from `self.coords` and `self.data`)
  546. # create new coords and data arrays for setting nonzeros
  547. # concatenate old (undropped) values with new coords and data
  548. old_data, old_coords = self._zero_many(index)
  549. if len(x_coords) == 1 and len(x_coords[0]) == 0:
  550. self.data, self.coords = old_data, old_coords
  551. # leave self.has_canonical_format unchanged
  552. return
  553. # To process array indices, need the x_coords for those axes
  554. # and need to ravel the array part of x_coords to build new_coords
  555. # Along the way, Find pos and shape of array-index portion of key.
  556. # arr_shape is None and pos = -1 when no arrays are used as indices.
  557. arr_shape = None
  558. pos = -1
  559. if arr_int_pos:
  560. # Get arr_shape if any arrays are in the index.
  561. # Also ravel the corresponding x_coords.
  562. for idx in index:
  563. if not isinstance(idx, slice) and not isintlike(idx):
  564. arr_shape = idx.shape
  565. # Find x_coord pos of integer and array portion of index.
  566. # If contiguous put int and array axes at pos of those indices.
  567. # If not contiguous, put all int and array axes at pos=0.
  568. if len(arr_int_pos) == (arr_int_pos[-1] - arr_int_pos[0] + 1):
  569. pos = arr_int_pos[0]
  570. else:
  571. pos = 0
  572. # compute the raveled coords of the array part of x_coords.
  573. # Used to build the new coords from the index arrays.
  574. x_arr_coo = x_coords[pos:pos + len(arr_shape)]
  575. # could use np.ravel_multi_index but _ravel_coords avoids overflow
  576. x_arr_coo_ravel = _ravel_coords(x_arr_coo, arr_shape)
  577. break
  578. # find map from x_coord slice axes to index axes
  579. x_ax = 0
  580. x_axes = {}
  581. for i, idx in enumerate(index):
  582. if i == pos:
  583. x_ax += len(arr_shape)
  584. if isinstance(idx, slice):
  585. x_axes[i] = x_ax
  586. x_ax += 1
  587. # Build new_coords and new_data
  588. new_coords = [None] * self.ndim
  589. new_nnz = len(x_data)
  590. for i, idx in enumerate(index):
  591. if isintlike(idx):
  592. new_coords[i] = (np.broadcast_to(idx, (new_nnz,)))
  593. continue
  594. elif isinstance(idx, slice):
  595. start, stop, step = idx.indices(self.shape[i])
  596. new_coords[i] = (start + x_coords[x_axes[i]] * step)
  597. else: # array idx
  598. new_coords[i] = idx.ravel()[x_arr_coo_ravel]
  599. # seems like a copy is prudent when setting data in this array
  600. new_data = x_data.copy()
  601. # deduplicate entries created by multiple coords matching in the array index
  602. # NumPy does not specify which value is put into the spot (last one assigned)
  603. # so only one value should appear if we want to match NumPy.
  604. # If matching NumPy is not crucial, we could make dups a feature where
  605. # integer array indices with repeated indices creates duplicate values.
  606. # Not doing that here. We are just removing duplicates (keep 1st data found)
  607. new_coords = np.array(new_coords)
  608. _, ind = np.unique(new_coords, axis=1, return_index=True)
  609. # update values with stack of old and new data and coords.
  610. self.data = np.hstack([old_data, new_data[ind]])
  611. self.coords = tuple(np.hstack(c) for c in zip(old_coords, new_coords[:, ind]))
  612. self.has_canonical_format = False
  613. def _zero_many(self, index):
  614. # handle int, slice and integer-array indices
  615. # index_mask accumulates a bool array of nonzeros that match index
  616. index_mask=np.ones(len(self.data), dtype=np.bool_)
  617. arr_coords = []
  618. arr_indices = []
  619. for i, (idx, co) in enumerate(zip(index, self.coords)):
  620. if isinstance(idx, int):
  621. index_mask &= (co == idx)
  622. elif isinstance(idx, slice) and idx != slice(None):
  623. start, stop, step = idx.indices(self.shape[i])
  624. if step != 1:
  625. if step < 0:
  626. in_range = (co <= start) & (co > stop)
  627. else:
  628. in_range = (co >= start) & (co < stop)
  629. m = np.mod(co - start, step)
  630. index_mask &= (m == 0) & in_range
  631. else:
  632. in_range = (co >= start) & (co < stop)
  633. index_mask &= in_range
  634. elif isinstance(idx, slice) and idx == slice(None):
  635. # slice is full axis so no changes to index_mask
  636. pass
  637. else: # array
  638. arr_coords.append(co)
  639. arr_indices.append(idx)
  640. # match array indices with masked coords. See comments in __getitem__
  641. if arr_indices:
  642. keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
  643. arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
  644. found = (keyarr == arr_coords).all(axis=0)
  645. arr_coo, _ = found.nonzero()
  646. arr_index_mask = np.zeros_like(index_mask)
  647. arr_index_mask[index_mask.nonzero()[0][arr_coo]] = True
  648. index_mask &= arr_index_mask
  649. # remove matching coords and data to set them to zero
  650. pruned_coords = [co[~index_mask] for co in self.coords]
  651. pruned_data = self.data[~index_mask]
  652. return pruned_data, pruned_coords
  653. def sum_duplicates(self) -> None:
  654. """Eliminate duplicate entries by adding them together
  655. This is an *in place* operation
  656. """
  657. if self.has_canonical_format:
  658. return
  659. summed = self._sum_duplicates(self.coords, self.data)
  660. self.coords, self.data = summed
  661. self.has_canonical_format = True
  662. def _sum_duplicates(self, coords, data):
  663. # Assumes coords not in canonical format.
  664. if len(data) == 0:
  665. return coords, data
  666. # Sort coords w.r.t. rows, then cols. This corresponds to C-order,
  667. # which we rely on for argmin/argmax to return the first index in the
  668. # same way that numpy does (in the case of ties).
  669. order = np.lexsort(coords[::-1])
  670. coords = tuple(idx[order] for idx in coords)
  671. data = data[order]
  672. unique_mask = np.logical_or.reduce([
  673. idx[1:] != idx[:-1] for idx in coords
  674. ])
  675. unique_mask = np.append(True, unique_mask)
  676. coords = tuple(idx[unique_mask] for idx in coords)
  677. unique_inds, = np.nonzero(unique_mask)
  678. data = np.add.reduceat(data, downcast_intp_index(unique_inds), dtype=self.dtype)
  679. return coords, data
  680. def eliminate_zeros(self):
  681. """Remove zero entries from the array/matrix
  682. This is an *in place* operation
  683. """
  684. mask = self.data != 0
  685. self.data = self.data[mask]
  686. self.coords = tuple(idx[mask] for idx in self.coords)
  687. #######################
  688. # Arithmetic handlers #
  689. #######################
  690. def _add_dense(self, other):
  691. if other.shape != self.shape:
  692. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  693. dtype = upcast_char(self.dtype.char, other.dtype.char)
  694. result = np.array(other, dtype=dtype, copy=True)
  695. fortran = int(result.flags.f_contiguous)
  696. if self.ndim == 1:
  697. coo_todense_nd(np.array([1]), self.nnz, self.ndim,
  698. self.coords[0], self.data, result.ravel('A'), fortran)
  699. elif self.ndim == 2:
  700. M, N = self._shape_as_2d
  701. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  702. result.ravel('A'), fortran)
  703. else:
  704. if fortran:
  705. strides = np.append(1, np.cumprod(self.shape[:-1]))
  706. else:
  707. strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
  708. coords = np.concatenate(self.coords)
  709. coo_todense_nd(strides, self.nnz, self.ndim,
  710. coords, self.data, result.ravel('A'), fortran)
  711. return self._container(result, copy=False)
  712. def _add_sparse(self, other):
  713. if self.ndim < 3:
  714. return self.tocsr()._add_sparse(other)
  715. if other.shape != self.shape:
  716. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  717. other = self.__class__(other)
  718. new_data = np.concatenate((self.data, other.data))
  719. new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
  720. A = self.__class__((new_data, new_coords), shape=self.shape)
  721. return A
  722. def _sub_sparse(self, other):
  723. if self.ndim < 3:
  724. return self.tocsr()._sub_sparse(other)
  725. if other.shape != self.shape:
  726. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  727. other = self.__class__(other)
  728. new_data = np.concatenate((self.data, -other.data))
  729. new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
  730. A = coo_array((new_data, new_coords), shape=self.shape)
  731. return A
  732. def _matmul_vector(self, other):
  733. if self.ndim > 2:
  734. result = np.zeros(math.prod(self.shape[:-1]),
  735. dtype=upcast_char(self.dtype.char, other.dtype.char))
  736. shape = np.array(self.shape)
  737. strides = np.append(np.cumprod(shape[:-1][::-1])[::-1][1:], 1)
  738. coords = np.concatenate(self.coords)
  739. coo_matvec_nd(self.nnz, len(self.shape), strides, coords, self.data,
  740. other, result)
  741. result = result.reshape(self.shape[:-1])
  742. return result
  743. # self.ndim <= 2
  744. result_shape = self.shape[0] if self.ndim > 1 else 1
  745. result = np.zeros(result_shape,
  746. dtype=upcast_char(self.dtype.char, other.dtype.char))
  747. if self.ndim == 2:
  748. col = self.col
  749. row = self.row
  750. elif self.ndim == 1:
  751. col = self.coords[0]
  752. row = np.zeros_like(col)
  753. else:
  754. raise NotImplementedError(
  755. f"coo_matvec not implemented for ndim={self.ndim}")
  756. coo_matvec(self.nnz, row, col, self.data, other, result)
  757. # Array semantics return a scalar here, not a single-element array.
  758. if isinstance(self, sparray) and result_shape == 1:
  759. return result[0]
  760. return result
  761. def _rmatmul_dispatch(self, other):
  762. if isscalarlike(other):
  763. return self._mul_scalar(other)
  764. else:
  765. # Don't use asarray unless we have to
  766. try:
  767. o_ndim = other.ndim
  768. except AttributeError:
  769. other = np.asarray(other)
  770. o_ndim = other.ndim
  771. perm = tuple(range(o_ndim)[:-2]) + tuple(range(o_ndim)[-2:][::-1])
  772. tr = other.transpose(perm)
  773. s_ndim = self.ndim
  774. perm = tuple(range(s_ndim)[:-2]) + tuple(range(s_ndim)[-2:][::-1])
  775. ret = self.transpose(perm)._matmul_dispatch(tr)
  776. if ret is NotImplemented:
  777. return NotImplemented
  778. if s_ndim == 1 or o_ndim == 1:
  779. perm = range(ret.ndim)
  780. else:
  781. perm = tuple(range(ret.ndim)[:-2]) + tuple(range(ret.ndim)[-2:][::-1])
  782. return ret.transpose(perm)
  783. def _matmul_dispatch(self, other):
  784. if isscalarlike(other):
  785. return self.multiply(other)
  786. if not (issparse(other) or isdense(other)):
  787. # If it's a list or whatever, treat it like an array
  788. other_a = np.asanyarray(other)
  789. if other_a.ndim == 0 and other_a.dtype == np.object_:
  790. # Not interpretable as an array; return NotImplemented so that
  791. # other's __rmatmul__ can kick in if that's implemented.
  792. return NotImplemented
  793. # Allow custom sparse class indicated by attr sparse gh-6520
  794. try:
  795. other.shape
  796. except AttributeError:
  797. other = other_a
  798. if self.ndim < 3 and other.ndim < 3:
  799. return _spbase._matmul_dispatch(self, other)
  800. N = self.shape[-1]
  801. err_prefix = "matmul: dimension mismatch with signature"
  802. if other.__class__ is np.ndarray:
  803. if other.shape == (N,):
  804. return self._matmul_vector(other)
  805. if other.shape == (N, 1):
  806. result = self._matmul_vector(other.ravel())
  807. return result.reshape(*self.shape[:-1], 1)
  808. if other.ndim == 1:
  809. msg = f"{err_prefix} (n,k={N}),(k={other.shape[0]},)->(n,)"
  810. raise ValueError(msg)
  811. if other.shape[-2] == N:
  812. # check for batch dimensions compatibility
  813. batch_shape_A = self.shape[:-2]
  814. batch_shape_B = other.shape[:-2]
  815. if batch_shape_A != batch_shape_B:
  816. try:
  817. # This will raise an error if the shapes are not broadcastable
  818. np.broadcast_shapes(batch_shape_A, batch_shape_B)
  819. except ValueError:
  820. raise ValueError("Batch dimensions are not broadcastable")
  821. return self._matmul_multivector(other)
  822. else:
  823. raise ValueError(
  824. f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
  825. )
  826. if isscalarlike(other):
  827. # scalar value
  828. return self._mul_scalar(other)
  829. if issparse(other):
  830. self_is_1d = self.ndim == 1
  831. other_is_1d = other.ndim == 1
  832. # reshape to 2-D if self or other is 1-D
  833. if self_is_1d:
  834. self = self.reshape(self._shape_as_2d) # prepend 1 to shape
  835. if other_is_1d:
  836. other = other.reshape((other.shape[0], 1)) # append 1 to shape
  837. # Check if the inner dimensions match for matrix multiplication
  838. if N != other.shape[-2]:
  839. raise ValueError(
  840. f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
  841. )
  842. # If A or B has more than 2 dimensions, check for
  843. # batch dimensions compatibility
  844. if self.ndim > 2 or other.ndim > 2:
  845. batch_shape_A = self.shape[:-2]
  846. batch_shape_B = other.shape[:-2]
  847. if batch_shape_A != batch_shape_B:
  848. try:
  849. # This will raise an error if the shapes are not broadcastable
  850. np.broadcast_shapes(batch_shape_A, batch_shape_B)
  851. except ValueError:
  852. raise ValueError("Batch dimensions are not broadcastable")
  853. result = self._matmul_sparse(other)
  854. # reshape back if a or b were originally 1-D
  855. if self_is_1d:
  856. # if self was originally 1-D, reshape result accordingly
  857. result = result.reshape(tuple(result.shape[:-2]) +
  858. tuple(result.shape[-1:]))
  859. if other_is_1d:
  860. result = result.reshape(result.shape[:-1])
  861. return result
  862. def _matmul_multivector(self, other):
  863. result_dtype = upcast_char(self.dtype.char, other.dtype.char)
  864. if self.ndim >= 3 or other.ndim >= 3:
  865. # if self has shape (N,), reshape to (1,N)
  866. if self.ndim == 1:
  867. result = self.reshape(1, self.shape[0])._matmul_multivector(other)
  868. return result.reshape(tuple(other.shape[:-2]) + tuple(other.shape[-1:]))
  869. broadcast_shape = np.broadcast_shapes(self.shape[:-2], other.shape[:-2])
  870. self_shape = broadcast_shape + self.shape[-2:]
  871. other_shape = broadcast_shape + other.shape[-2:]
  872. self = self._broadcast_to(self_shape)
  873. other = np.broadcast_to(other, other_shape)
  874. result_shape = broadcast_shape + self.shape[-2:-1] + other.shape[-1:]
  875. result = np.zeros(result_shape, dtype=result_dtype)
  876. coo_matmat_dense_nd(self.nnz, len(self.shape), other.shape[-1],
  877. np.array(other_shape), np.array(result_shape),
  878. np.concatenate(self.coords),
  879. self.data, other.ravel('C'), result)
  880. return result
  881. if self.ndim == 2:
  882. result_shape = (self.shape[0], other.shape[1])
  883. col = self.col
  884. row = self.row
  885. elif self.ndim == 1:
  886. result_shape = (other.shape[1],)
  887. col = self.coords[0]
  888. row = np.zeros_like(col)
  889. result = np.zeros(result_shape, dtype=result_dtype)
  890. coo_matmat_dense(self.nnz, other.shape[-1], row, col,
  891. self.data, other.ravel('C'), result)
  892. return result.view(type=type(other))
  893. def dot(self, other):
  894. """Return the dot product of two arrays.
  895. Strictly speaking a dot product involves two vectors.
  896. But in the sense that an array with ndim >= 1 is a collection
  897. of vectors, the function computes the collection of dot products
  898. between each vector in the first array with each vector in the
  899. second array. The axis upon which the sum of products is performed
  900. is the last axis of the first array and the second to last axis of
  901. the second array. If the second array is 1-D, the last axis is used.
  902. Thus, if both arrays are 1-D, the inner product is returned.
  903. If both are 2-D, we have matrix multiplication. If `other` is 1-D,
  904. the sum product is taken along the last axis of each array. If
  905. `other` is N-D for N>=2, the sum product is over the last axis of
  906. the first array and the second-to-last axis of the second array.
  907. Parameters
  908. ----------
  909. other : array_like (dense or sparse)
  910. Second array
  911. Returns
  912. -------
  913. output : array (sparse or dense)
  914. The dot product of this array with `other`.
  915. It will be dense/sparse if `other` is dense/sparse.
  916. Examples
  917. --------
  918. >>> import numpy as np
  919. >>> from scipy.sparse import coo_array
  920. >>> A = coo_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
  921. >>> v = np.array([1, 0, -1])
  922. >>> A.dot(v)
  923. array([ 1, -3, -1], dtype=int64)
  924. For 2-D arrays it is the matrix product:
  925. >>> A = coo_array([[1, 0], [0, 1]])
  926. >>> B = coo_array([[4, 1], [2, 2]])
  927. >>> A.dot(B).toarray()
  928. array([[4, 1],
  929. [2, 2]])
  930. For 3-D arrays the shape extends unused axes by other unused axes.
  931. >>> A = coo_array(np.arange(3*4*5*6)).reshape((3,4,5,6))
  932. >>> B = coo_array(np.arange(3*4*5*6)).reshape((5,4,6,3))
  933. >>> A.dot(B).shape
  934. (3, 4, 5, 5, 4, 3)
  935. """
  936. # handle non-array input: lists, ints, etc
  937. if not (issparse(other) or isdense(other) or isscalarlike(other)):
  938. # If it's a list or whatever, treat it like an array
  939. o_array = np.asanyarray(other)
  940. if o_array.ndim == 0 and o_array.dtype == np.object_:
  941. raise TypeError(f"dot argument not supported type: '{type(other)}'")
  942. try:
  943. other.shape
  944. except AttributeError:
  945. other = o_array
  946. # Handle scalar multiplication
  947. if isscalarlike(other):
  948. return self * other
  949. # other.shape[-2:][0] gets last index of 1d, next to last index of >1d
  950. if self.shape[-1] != other.shape[-2:][0]:
  951. raise ValueError(f"shapes {self.shape} and {other.shape}"
  952. " are not aligned for n-D dot")
  953. if self.ndim < 3 and other.ndim < 3:
  954. return self @ other
  955. if isdense(other):
  956. return self._dense_dot(other)
  957. return self._sparse_dot(other.tocoo())
  958. def _sparse_dot(self, other):
  959. # already checked: at least one is >2d, neither scalar, both are coo
  960. # Ravel non-reduced axes coordinates
  961. self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
  962. other_2d, o_new_shape = _convert_to_2d(other, [max(0, other.ndim - 2)])
  963. prod = self_2d @ other_2d.T # routes via 2-D CSR
  964. prod = prod.tocoo()
  965. # Combine the shapes of the non-contracted axes
  966. combined_shape = s_new_shape + o_new_shape
  967. # Unravel the 2D coordinates to get multi-dimensional coordinates
  968. coords = []
  969. new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
  970. for c, s in zip(prod.coords, new_shapes):
  971. coords.extend(np.unravel_index(c, s))
  972. # Construct the resulting COO array with coords and shape
  973. return coo_array((prod.data, coords), shape=combined_shape)
  974. def _dense_dot(self, other):
  975. # already checked: self is >0d, other is dense and >0d
  976. # Ravel non-reduced axes coordinates
  977. s_ndim = self.ndim
  978. if s_ndim <= 2:
  979. s_new_shape = () if s_ndim == 1 else (self.shape[0],)
  980. self_2d = self
  981. else:
  982. self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
  983. o_ndim = other.ndim
  984. if o_ndim <= 2:
  985. o_new_shape = () if o_ndim == 1 else (other.shape[-1],)
  986. other_2d = other
  987. else:
  988. o_new_shape = other.shape[:-2] + other.shape[-1:]
  989. reorder_dims = (o_ndim - 2, *range(o_ndim - 2), o_ndim - 1)
  990. o_reorg = np.transpose(other, reorder_dims)
  991. other_2d = o_reorg.reshape((other.shape[-2], math.prod(o_new_shape)))
  992. prod = self_2d @ other_2d # routes via 2-D CSR
  993. # Combine the shapes of the non-contracted axes
  994. combined_shape = s_new_shape + o_new_shape
  995. return prod.reshape(combined_shape)
  996. def tensordot(self, other, axes=2):
  997. """Return the tensordot product with another array along the given axes.
  998. The tensordot differs from dot and matmul in that any axis can be
  999. chosen for each of the first and second array and the sum of the
  1000. products is computed just like for matrix multiplication, only not
  1001. just for the rows of the first times the columns of the second. It
  1002. takes the dot product of the collection of vectors along the specified
  1003. axes. Here we can even take the sum of the products along two or even
  1004. more axes if desired. So, tensordot is a dot product computation
  1005. applied to arrays of any dimension >= 1. It is like matmul but over
  1006. arbitrary axes for each matrix.
  1007. Given two tensors, `a` and `b`, and the desired axes specified as a
  1008. 2-tuple/list/array containing two sequences of axis numbers,
  1009. ``(a_axes, b_axes)``, sum the products of `a`'s and `b`'s elements
  1010. (components) over the axes specified by ``a_axes`` and ``b_axes``.
  1011. The `axes` input can be a single non-negative integer, ``N``;
  1012. if it is, then the last ``N`` dimensions of `a` and the first
  1013. ``N`` dimensions of `b` are summed over.
  1014. Parameters
  1015. ----------
  1016. a, b : array_like
  1017. Tensors to "dot".
  1018. axes : int or (2,) array_like
  1019. * integer_like
  1020. If an int N, sum over the last N axes of `a` and the first N axes
  1021. of `b` in order. The sizes of the corresponding axes must match.
  1022. * (2,) array_like
  1023. A 2-tuple of sequences of axes to be summed over, the first applying
  1024. to `a`, the second to `b`. The sequences must be the same length.
  1025. The shape of the corresponding axes must match between `a` and `b`.
  1026. Returns
  1027. -------
  1028. output : coo_array
  1029. The tensor dot product of this array with `other`.
  1030. It will be dense/sparse if `other` is dense/sparse.
  1031. See Also
  1032. --------
  1033. dot
  1034. Examples
  1035. --------
  1036. >>> import numpy as np
  1037. >>> import scipy.sparse
  1038. >>> A = scipy.sparse.coo_array([[[2, 3], [0, 0]], [[0, 1], [0, 5]]])
  1039. >>> A.shape
  1040. (2, 2, 2)
  1041. Integer axes N are shorthand for (range(-N, 0), range(0, N)):
  1042. >>> A.tensordot(A, axes=1).toarray()
  1043. array([[[[ 4, 9],
  1044. [ 0, 15]],
  1045. <BLANKLINE>
  1046. [[ 0, 0],
  1047. [ 0, 0]]],
  1048. <BLANKLINE>
  1049. <BLANKLINE>
  1050. [[[ 0, 1],
  1051. [ 0, 5]],
  1052. <BLANKLINE>
  1053. [[ 0, 5],
  1054. [ 0, 25]]]])
  1055. >>> A.tensordot(A, axes=2).toarray()
  1056. array([[ 4, 6],
  1057. [ 0, 25]])
  1058. >>> A.tensordot(A, axes=3)
  1059. array(39)
  1060. Using tuple for axes:
  1061. >>> a = scipy.sparse.coo_array(np.arange(60).reshape(3,4,5))
  1062. >>> b = np.arange(24).reshape(4,3,2)
  1063. >>> c = a.tensordot(b, axes=([1,0],[0,1]))
  1064. >>> c.shape
  1065. (5, 2)
  1066. >>> c
  1067. array([[4400, 4730],
  1068. [4532, 4874],
  1069. [4664, 5018],
  1070. [4796, 5162],
  1071. [4928, 5306]])
  1072. """
  1073. if not isdense(other) and not issparse(other):
  1074. # If it's a list or whatever, treat it like an array
  1075. other_array = np.asanyarray(other)
  1076. if other_array.ndim == 0 and other_array.dtype == np.object_:
  1077. raise TypeError(f"tensordot arg not supported type: '{type(other)}'")
  1078. try:
  1079. other.shape
  1080. except AttributeError:
  1081. other = other_array
  1082. axes_self, axes_other = _process_axes(self.ndim, other.ndim, axes)
  1083. # Check for shape compatibility along specified axes
  1084. if any(self.shape[ax] != other.shape[bx]
  1085. for ax, bx in zip(axes_self, axes_other)):
  1086. raise ValueError("sizes of the corresponding axes must match")
  1087. if isdense(other):
  1088. return self._dense_tensordot(other, axes_self, axes_other)
  1089. else:
  1090. return self._sparse_tensordot(other, axes_self, axes_other)
  1091. def _sparse_tensordot(self, other, s_axes, o_axes):
  1092. # Prepare the tensors for tensordot operation
  1093. # Ravel non-reduced axes coordinates
  1094. self_2d, s_new_shape = _convert_to_2d(self, s_axes)
  1095. other_2d, o_new_shape = _convert_to_2d(other, o_axes)
  1096. # Perform matrix multiplication (routed via 2-D CSR)
  1097. prod = self_2d @ other_2d.T
  1098. # handle case of scalar result (axis includes all axes for both)
  1099. if not issparse(prod):
  1100. return prod
  1101. prod = prod.tocoo()
  1102. # Combine the shapes of the non-contracted axes
  1103. combined_shape = s_new_shape + o_new_shape
  1104. # Unravel the 2D coordinates to get multi-dimensional coordinates
  1105. coords = []
  1106. new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
  1107. for c, s in zip(prod.coords, new_shapes):
  1108. if s:
  1109. coords.extend(np.unravel_index(c, s))
  1110. # Construct the resulting COO array with coords and shape
  1111. return coo_array((prod.data, coords), shape=combined_shape)
  1112. def _dense_tensordot(self, other, s_axes, o_axes):
  1113. s_ndim = len(self.shape)
  1114. o_ndim = len(other.shape)
  1115. s_non_axes = [i for i in range(s_ndim) if i not in s_axes]
  1116. s_axes_shape = [self.shape[i] for i in s_axes]
  1117. s_non_axes_shape = [self.shape[i] for i in s_non_axes]
  1118. o_non_axes = [i for i in range(o_ndim) if i not in o_axes]
  1119. o_axes_shape = [other.shape[i] for i in o_axes]
  1120. o_non_axes_shape = [other.shape[i] for i in o_non_axes]
  1121. left = self.transpose(s_non_axes + s_axes)
  1122. right = np.transpose(other, o_non_axes[:-1] + o_axes + o_non_axes[-1:])
  1123. reshape_left = (*s_non_axes_shape, math.prod(s_axes_shape))
  1124. reshape_right = (*o_non_axes_shape[:-1], math.prod(o_axes_shape),
  1125. *o_non_axes_shape[-1:])
  1126. return left.reshape(reshape_left).dot(right.reshape(reshape_right))
  1127. def _matmul_sparse(self, other):
  1128. """
  1129. Perform sparse-sparse matrix multiplication for two n-D COO arrays.
  1130. The method converts input n-D arrays to 2-D block array format,
  1131. uses csr_matmat to multiply them, and then converts the
  1132. result back to n-D COO array.
  1133. Parameters:
  1134. self (COO): The first n-D sparse array in COO format.
  1135. other (COO): The second n-D sparse array in COO format.
  1136. Returns:
  1137. prod (COO): The resulting n-D sparse array after multiplication.
  1138. """
  1139. if self.ndim < 3 and other.ndim < 3:
  1140. return _spbase._matmul_sparse(self, other)
  1141. # Get the shapes of self and other
  1142. self_shape = self.shape
  1143. other_shape = other.shape
  1144. # Determine the new shape to broadcast self and other
  1145. broadcast_shape = np.broadcast_shapes(self_shape[:-2], other_shape[:-2])
  1146. self_new_shape = tuple(broadcast_shape) + self_shape[-2:]
  1147. other_new_shape = tuple(broadcast_shape) + other_shape[-2:]
  1148. self_broadcasted = self._broadcast_to(self_new_shape)
  1149. other_broadcasted = other._broadcast_to(other_new_shape)
  1150. # Convert n-D COO arrays to 2-D block diagonal arrays
  1151. self_block_diag = _block_diag(self_broadcasted)
  1152. other_block_diag = _block_diag(other_broadcasted)
  1153. # Use csr_matmat to perform sparse matrix multiplication
  1154. prod_block_diag = (self_block_diag @ other_block_diag).tocoo()
  1155. # Convert the 2-D block diagonal array back to n-D
  1156. return _extract_block_diag(
  1157. prod_block_diag,
  1158. shape=(*broadcast_shape, self.shape[-2], other.shape[-1]),
  1159. )
  1160. def _broadcast_to(self, new_shape, copy=False):
  1161. if self.shape == new_shape:
  1162. return self.copy() if copy else self
  1163. old_shape = self.shape
  1164. # Check if the new shape is compatible for broadcasting
  1165. if len(new_shape) < len(old_shape):
  1166. raise ValueError("New shape must have at least as many dimensions"
  1167. " as the current shape")
  1168. # Add leading ones to shape to ensure same length as `new_shape`
  1169. shape = (1,) * (len(new_shape) - len(old_shape)) + tuple(old_shape)
  1170. # Ensure the old shape can be broadcast to the new shape
  1171. if any((o != 1 and o != n) for o, n in zip(shape, new_shape)):
  1172. raise ValueError(f"current shape {old_shape} cannot be "
  1173. "broadcast to new shape {new_shape}")
  1174. # Reshape the COO array to match the new dimensions
  1175. self = self.reshape(shape)
  1176. idx_dtype = get_index_dtype(self.coords, maxval=max(new_shape))
  1177. coords = self.coords
  1178. new_data = self.data
  1179. new_coords = coords[-1:] # Copy last coordinate to start
  1180. cum_repeat = 1 # Cumulative repeat factor for broadcasting
  1181. if shape[-1] != new_shape[-1]: # broadcasting the n-th (col) dimension
  1182. repeat_count = new_shape[-1]
  1183. cum_repeat *= repeat_count
  1184. new_data = np.tile(new_data, repeat_count)
  1185. new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), self.nnz)
  1186. new_coords = (new_dim,)
  1187. for i in range(-2, -(len(shape)+1), -1):
  1188. if shape[i] != new_shape[i]:
  1189. repeat_count = new_shape[i] # number of times to repeat data, coords
  1190. cum_repeat *= repeat_count # update cumulative repeat factor
  1191. nnz = len(new_data) # Number of non-zero elements so far
  1192. # Tile data and coordinates to match the new repeat count
  1193. new_data = np.tile(new_data, repeat_count)
  1194. new_coords = tuple(np.tile(new_coords[i+1:], repeat_count))
  1195. # Create new dimensions and stack them
  1196. new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), nnz)
  1197. new_coords = (new_dim,) + new_coords
  1198. else:
  1199. # If no broadcasting needed, tile the coordinates
  1200. new_dim = np.tile(coords[i], cum_repeat)
  1201. new_coords = (new_dim,) + new_coords
  1202. return coo_array((new_data, new_coords), new_shape)
  1203. def _sum_nd(self, axis, res_dtype, out):
  1204. # axis and out are preprocessed. out.shape is new_shape
  1205. A2d, new_shape = _convert_to_2d(self, axis)
  1206. ones = np.ones((A2d.shape[1], 1), dtype=res_dtype)
  1207. # sets dtype while loading into out
  1208. out[...] = (A2d @ ones).reshape(new_shape)
  1209. return out
  1210. def _min_or_max_axis_nd(self, axis, min_or_max, explicit):
  1211. A2d, new_shape = _convert_to_2d(self, axis)
  1212. res = A2d._min_or_max_axis(1, min_or_max, explicit)
  1213. unraveled_coords = np.unravel_index(res.coords[0], new_shape)
  1214. return coo_array((res.data, unraveled_coords), new_shape)
  1215. def _argminmax_axis_nd(self, axis, argminmax, compare, explicit):
  1216. A2d, new_shape = _convert_to_2d(self, axis)
  1217. res_flat = A2d._argminmax_axis(1, argminmax, compare, explicit)
  1218. return res_flat.reshape(new_shape)
  1219. def _block_diag(self):
  1220. """
  1221. Converts an N-D COO array into a 2-D COO array in block diagonal form.
  1222. Parameters:
  1223. self (coo_array): An N-Dimensional COO sparse array.
  1224. Returns:
  1225. coo_array: A 2-Dimensional COO sparse array in block diagonal form.
  1226. """
  1227. if self.ndim<2:
  1228. raise ValueError("array must have atleast dim=2")
  1229. num_blocks = math.prod(self.shape[:-2])
  1230. n_col = self.shape[-1]
  1231. n_row = self.shape[-2]
  1232. res_arr = self.reshape((num_blocks, n_row, n_col))
  1233. new_coords = (
  1234. res_arr.coords[1] + res_arr.coords[0] * res_arr.shape[1],
  1235. res_arr.coords[2] + res_arr.coords[0] * res_arr.shape[2],
  1236. )
  1237. new_shape = (num_blocks * n_row, num_blocks * n_col)
  1238. return coo_array((self.data, tuple(new_coords)), shape=new_shape)
  1239. def _extract_block_diag(self, shape):
  1240. n_row, n_col = shape[-2], shape[-1]
  1241. # Extract data and coordinates from the block diagonal COO array
  1242. data = self.data
  1243. row, col = self.row, self.col
  1244. # Initialize new coordinates array
  1245. new_coords = np.empty((len(shape), self.nnz), dtype=int)
  1246. # Calculate within-block indices
  1247. new_coords[-2] = row % n_row
  1248. new_coords[-1] = col % n_col
  1249. # Calculate coordinates for higher dimensions
  1250. temp_block_idx = row // n_row
  1251. for i in range(len(shape) - 3, -1, -1):
  1252. size = shape[i]
  1253. new_coords[i] = temp_block_idx % size
  1254. temp_block_idx = temp_block_idx // size
  1255. # Create the new COO array with the original n-D shape
  1256. return coo_array((data, tuple(new_coords)), shape=shape)
  1257. def _get_sparse_data_and_coords(x, new_shape, dtype):
  1258. x = x.tocoo()
  1259. x.sum_duplicates()
  1260. x_coords = list(x.coords)
  1261. x_data = x.data.astype(dtype, copy=False)
  1262. x_shape = x.shape
  1263. if new_shape == x_shape:
  1264. return x_data, x_coords
  1265. # broadcasting needed
  1266. len_diff = len(new_shape) - len(x_shape)
  1267. if len_diff > 0:
  1268. # prepend ones to shape of x to match ndim
  1269. x_shape = [1] * len_diff + list(x_shape)
  1270. coord_zeros = np.zeroslike(x_coords[0])
  1271. x_coords = tuple([coord_zeros] * len_diff + x_coords)
  1272. # taking away axes (squeezing) is not part of broadcasting, but long
  1273. # spmatrix history of using 2d vectors in 1d space, so we manually
  1274. # squeeze the front and back axes here to be compatible
  1275. if len_diff < 0:
  1276. for _ in range(-len_diff):
  1277. if x_shape[0] == 1:
  1278. x_shape = x_shape[1:]
  1279. x_coords = x_coords[1:]
  1280. elif x_shape[-1] == 1:
  1281. x_shape = x_shape[:-1]
  1282. x_coords = x_coords[:-1]
  1283. else:
  1284. raise ValueError("shape mismatch in assignment")
  1285. # broadcast with copy (will need to copy eventually anyway)
  1286. tot_expand = 1
  1287. for i, (nn, nx) in enumerate(zip(new_shape, x_shape)):
  1288. if nn == nx:
  1289. continue
  1290. if nx != 1:
  1291. raise ValueError("shape mismatch in assignment")
  1292. x_nnz = len(x_coords[0])
  1293. x_coords[i] = np.repeat(np.arange(nn), x_nnz)
  1294. for j, co in enumerate(x_coords):
  1295. if j == i:
  1296. continue
  1297. x_coords[j] = np.tile(co, nn)
  1298. tot_expand *= nn
  1299. x_data = np.tile(x_data.ravel(), tot_expand)
  1300. return x_data, x_coords
  1301. def _get_dense_data_and_coords(x, new_shape):
  1302. if x.shape != new_shape:
  1303. x = np.broadcast_to(x.squeeze(), new_shape)
  1304. # shift scalar input to 1d so has coords
  1305. if new_shape == ():
  1306. x_coords = tuple([np.array([0])] * len(new_shape))
  1307. x_data = x.ravel()
  1308. else:
  1309. x_coords = x.nonzero()
  1310. x_data = x[x_coords]
  1311. return x_data, x_coords
  1312. def _process_axes(ndim_a, ndim_b, axes):
  1313. if isinstance(axes, int):
  1314. if axes < 1 or axes > min(ndim_a, ndim_b):
  1315. raise ValueError("axes integer is out of bounds for input arrays")
  1316. axes_a = list(range(ndim_a - axes, ndim_a))
  1317. axes_b = list(range(axes))
  1318. elif isinstance(axes, tuple | list):
  1319. if len(axes) != 2:
  1320. raise ValueError("axes must be a tuple/list of length 2")
  1321. axes_a, axes_b = axes
  1322. if len(axes_a) != len(axes_b):
  1323. raise ValueError("axes lists/tuples must be of the same length")
  1324. if any(ax >= ndim_a or ax < -ndim_a for ax in axes_a) or \
  1325. any(bx >= ndim_b or bx < -ndim_b for bx in axes_b):
  1326. raise ValueError("axes indices are out of bounds for input arrays")
  1327. else:
  1328. raise TypeError("axes must be an integer or a tuple/list of integers")
  1329. axes_a = [axis + ndim_a if axis < 0 else axis for axis in axes_a]
  1330. axes_b = [axis + ndim_b if axis < 0 else axis for axis in axes_b]
  1331. return axes_a, axes_b
  1332. def _convert_to_2d(coo, axis):
  1333. axis_coords = tuple(coo.coords[i] for i in axis)
  1334. axis_shape = tuple(coo.shape[i] for i in axis)
  1335. axis_ravel = _ravel_coords(axis_coords, axis_shape)
  1336. ndim = len(coo.coords)
  1337. non_axis = tuple(i for i in range(ndim) if i not in axis)
  1338. if non_axis:
  1339. non_axis_coords = tuple(coo.coords[i] for i in non_axis)
  1340. non_axis_shape = tuple(coo.shape[i] for i in non_axis)
  1341. non_axis_ravel = _ravel_coords(non_axis_coords, non_axis_shape)
  1342. coords_2d = (non_axis_ravel, axis_ravel)
  1343. shape_2d = (math.prod(non_axis_shape), math.prod(axis_shape))
  1344. else: # all axes included in axis so result will have 1 element
  1345. coords_2d = (axis_ravel,)
  1346. shape_2d = (math.prod(axis_shape),)
  1347. non_axis_shape = ()
  1348. new_coo = coo_array((coo.data, coords_2d), shape=shape_2d)
  1349. return new_coo, non_axis_shape
  1350. def _ravel_coords(coords, shape, order='C'):
  1351. """Like np.ravel_multi_index, but avoids some overflow issues."""
  1352. if len(coords) == 1:
  1353. return coords[0]
  1354. # Handle overflow as in https://github.com/scipy/scipy/pull/9132
  1355. if len(coords) == 2:
  1356. nrows, ncols = shape
  1357. row, col = coords
  1358. if order == 'C':
  1359. maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1))
  1360. idx_dtype = get_index_dtype(maxval=maxval)
  1361. return np.multiply(ncols, row, dtype=idx_dtype) + col
  1362. elif order == 'F':
  1363. maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1))
  1364. idx_dtype = get_index_dtype(maxval=maxval)
  1365. return np.multiply(nrows, col, dtype=idx_dtype) + row
  1366. else:
  1367. raise ValueError("'order' must be 'C' or 'F'")
  1368. return np.ravel_multi_index(coords, shape, order=order)
  1369. def isspmatrix_coo(x):
  1370. """Is `x` of coo_matrix type?
  1371. Parameters
  1372. ----------
  1373. x
  1374. object to check for being a coo matrix
  1375. Returns
  1376. -------
  1377. bool
  1378. True if `x` is a coo matrix, False otherwise
  1379. Examples
  1380. --------
  1381. >>> from scipy.sparse import coo_array, coo_matrix, csr_matrix, isspmatrix_coo
  1382. >>> isspmatrix_coo(coo_matrix([[5]]))
  1383. True
  1384. >>> isspmatrix_coo(coo_array([[5]]))
  1385. False
  1386. >>> isspmatrix_coo(csr_matrix([[5]]))
  1387. False
  1388. """
  1389. return isinstance(x, coo_matrix)
  1390. # This namespace class separates array from matrix with isinstance
  1391. class coo_array(_coo_base, sparray):
  1392. """
  1393. A sparse array in COOrdinate format.
  1394. Also known as the 'ijv' or 'triplet' format.
  1395. This can be instantiated in several ways:
  1396. coo_array(D)
  1397. where D is an ndarray
  1398. coo_array(S)
  1399. with another sparse array or matrix S (equivalent to S.tocoo())
  1400. coo_array(shape, [dtype])
  1401. to construct an empty sparse array with shape `shape`
  1402. dtype is optional, defaulting to dtype='d'.
  1403. coo_array((data, coords), [shape])
  1404. to construct from existing data and index arrays:
  1405. 1. data[:] the entries of the sparse array, in any order
  1406. 2. coords[i][:] the axis-i coordinates of the data entries
  1407. Where ``A[coords] = data``, and coords is a tuple of index arrays.
  1408. When shape is not specified, it is inferred from the index arrays.
  1409. Attributes
  1410. ----------
  1411. dtype : dtype
  1412. Data type of the sparse array
  1413. shape : tuple of integers
  1414. Shape of the sparse array
  1415. ndim : int
  1416. Number of dimensions of the sparse array
  1417. nnz
  1418. size
  1419. data
  1420. COO format data array of the sparse array
  1421. coords
  1422. COO format tuple of index arrays
  1423. has_canonical_format : bool
  1424. Whether the matrix has sorted coordinates and no duplicates
  1425. format
  1426. T
  1427. Notes
  1428. -----
  1429. Sparse arrays can be used in arithmetic operations: they support
  1430. addition, subtraction, multiplication, division, and matrix power.
  1431. Advantages of the COO format
  1432. - facilitates fast conversion among sparse formats
  1433. - permits duplicate entries (see example)
  1434. - very fast conversion to and from CSR/CSC formats
  1435. Disadvantages of the COO format
  1436. - does not directly support:
  1437. + arithmetic operations
  1438. + slicing
  1439. Intended Usage
  1440. - COO is a fast format for constructing sparse arrays
  1441. - Once a COO array has been constructed, convert to CSR or
  1442. CSC format for fast arithmetic and matrix vector operations
  1443. - By default when converting to CSR or CSC format, duplicate (i,j)
  1444. entries will be summed together. This facilitates efficient
  1445. construction of finite element matrices and the like. (see example)
  1446. Canonical format
  1447. - Entries and coordinates sorted by row, then column.
  1448. - There are no duplicate entries (i.e. duplicate (i,j) locations)
  1449. - Data arrays MAY have explicit zeros.
  1450. Examples
  1451. --------
  1452. >>> # Constructing an empty sparse array
  1453. >>> import numpy as np
  1454. >>> from scipy.sparse import coo_array
  1455. >>> coo_array((3, 4), dtype=np.int8).toarray()
  1456. array([[0, 0, 0, 0],
  1457. [0, 0, 0, 0],
  1458. [0, 0, 0, 0]], dtype=int8)
  1459. >>> # Constructing a sparse array using ijv format
  1460. >>> row = np.array([0, 3, 1, 0])
  1461. >>> col = np.array([0, 3, 1, 2])
  1462. >>> data = np.array([4, 5, 7, 9])
  1463. >>> coo_array((data, (row, col)), shape=(4, 4)).toarray()
  1464. array([[4, 0, 9, 0],
  1465. [0, 7, 0, 0],
  1466. [0, 0, 0, 0],
  1467. [0, 0, 0, 5]])
  1468. >>> # Constructing a sparse array with duplicate coordinates
  1469. >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
  1470. >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
  1471. >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
  1472. >>> coo = coo_array((data, (row, col)), shape=(4, 4))
  1473. >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
  1474. >>> np.max(coo.data)
  1475. 1
  1476. >>> coo.toarray()
  1477. array([[3, 0, 1, 0],
  1478. [0, 2, 0, 0],
  1479. [0, 0, 0, 0],
  1480. [0, 0, 0, 1]])
  1481. """
  1482. class coo_matrix(spmatrix, _coo_base):
  1483. """
  1484. A sparse matrix in COOrdinate format.
  1485. Also known as the 'ijv' or 'triplet' format.
  1486. This can be instantiated in several ways:
  1487. coo_matrix(D)
  1488. where D is a 2-D ndarray
  1489. coo_matrix(S)
  1490. with another sparse array or matrix S (equivalent to S.tocoo())
  1491. coo_matrix((M, N), [dtype])
  1492. to construct an empty matrix with shape (M, N)
  1493. dtype is optional, defaulting to dtype='d'.
  1494. coo_matrix((data, (i, j)), [shape=(M, N)])
  1495. to construct from three arrays:
  1496. 1. data[:] the entries of the matrix, in any order
  1497. 2. i[:] the row indices of the matrix entries
  1498. 3. j[:] the column indices of the matrix entries
  1499. Where ``A[i[k], j[k]] = data[k]``. When shape is not
  1500. specified, it is inferred from the index arrays
  1501. Attributes
  1502. ----------
  1503. dtype : dtype
  1504. Data type of the matrix
  1505. shape : 2-tuple
  1506. Shape of the matrix
  1507. ndim : int
  1508. Number of dimensions (this is always 2)
  1509. nnz
  1510. size
  1511. data
  1512. COO format data array of the matrix
  1513. row
  1514. COO format row index array of the matrix
  1515. col
  1516. COO format column index array of the matrix
  1517. has_canonical_format : bool
  1518. Whether the matrix has sorted indices and no duplicates
  1519. format
  1520. T
  1521. Notes
  1522. -----
  1523. Sparse matrices can be used in arithmetic operations: they support
  1524. addition, subtraction, multiplication, division, and matrix power.
  1525. Advantages of the COO format
  1526. - facilitates fast conversion among sparse formats
  1527. - permits duplicate entries (see example)
  1528. - very fast conversion to and from CSR/CSC formats
  1529. Disadvantages of the COO format
  1530. - does not directly support:
  1531. + arithmetic operations
  1532. + slicing
  1533. Intended Usage
  1534. - COO is a fast format for constructing sparse matrices
  1535. - Once a COO matrix has been constructed, convert to CSR or
  1536. CSC format for fast arithmetic and matrix vector operations
  1537. - By default when converting to CSR or CSC format, duplicate (i,j)
  1538. entries will be summed together. This facilitates efficient
  1539. construction of finite element matrices and the like. (see example)
  1540. Canonical format
  1541. - Entries and coordinates sorted by row, then column.
  1542. - There are no duplicate entries (i.e. duplicate (i,j) locations)
  1543. - Data arrays MAY have explicit zeros.
  1544. Examples
  1545. --------
  1546. >>> # Constructing an empty matrix
  1547. >>> import numpy as np
  1548. >>> from scipy.sparse import coo_matrix
  1549. >>> coo_matrix((3, 4), dtype=np.int8).toarray()
  1550. array([[0, 0, 0, 0],
  1551. [0, 0, 0, 0],
  1552. [0, 0, 0, 0]], dtype=int8)
  1553. >>> # Constructing a matrix using ijv format
  1554. >>> row = np.array([0, 3, 1, 0])
  1555. >>> col = np.array([0, 3, 1, 2])
  1556. >>> data = np.array([4, 5, 7, 9])
  1557. >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
  1558. array([[4, 0, 9, 0],
  1559. [0, 7, 0, 0],
  1560. [0, 0, 0, 0],
  1561. [0, 0, 0, 5]])
  1562. >>> # Constructing a matrix with duplicate coordinates
  1563. >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
  1564. >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
  1565. >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
  1566. >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
  1567. >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
  1568. >>> np.max(coo.data)
  1569. 1
  1570. >>> coo.toarray()
  1571. array([[3, 0, 1, 0],
  1572. [0, 2, 0, 0],
  1573. [0, 0, 0, 0],
  1574. [0, 0, 0, 1]])
  1575. """
  1576. def __setstate__(self, state):
  1577. if 'coords' not in state:
  1578. # For retro-compatibility with the previous attributes
  1579. # storing nnz coordinates for 2D COO matrix.
  1580. state['coords'] = (state.pop('row'), state.pop('col'))
  1581. self.__dict__.update(state)
  1582. def __getitem__(self, key):
  1583. raise TypeError("'coo_matrix' object is not subscriptable")
  1584. def __setitem__(self, key, x):
  1585. raise TypeError("'coo_matrix' object does not support item assignment")