From 484ea2269f6d6272c85026d3354505f709c4e916 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 25 Jun 2026 14:09:43 +0200 Subject: [PATCH 01/37] Fix intersect1d crash with empty arrays --- .../cupy/logic_tests/test_truth.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/dpnp/tests/third_party/cupy/logic_tests/test_truth.py b/dpnp/tests/third_party/cupy/logic_tests/test_truth.py index 0ba353972109..0170f83a6dea 100644 --- a/dpnp/tests/third_party/cupy/logic_tests/test_truth.py +++ b/dpnp/tests/third_party/cupy/logic_tests/test_truth.py @@ -265,6 +265,36 @@ def test_multiple_instances(self, xp, dtype): b = xp.array([4, 6, 2, 5, 7, 6], dtype=dtype) return xp.intersect1d(a, b, return_indices=True) + @testing.numpy_cupy_array_equal() + def test_intersect1d_both_empty(self, xp): + return xp.intersect1d(xp.array([]), xp.array([])) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_intersect1d_empty_array(self, xp, dtype): + a = xp.array([], dtype=dtype) + b = xp.array([0], dtype=dtype) + return xp.intersect1d(a, b, return_indices=True) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_intersect1d_second_empty_array(self, xp, dtype): + a = xp.array([0], dtype=dtype) + b = xp.array([], dtype=dtype) + return xp.intersect1d(a, b, return_indices=True) + + @testing.numpy_cupy_array_equal() + def test_intersect1d_mixed_dtypes_empty(self, xp): + a = xp.array([0], dtype=xp.int64) + b = xp.array([], dtype=xp.float64) + return xp.intersect1d(a, b) + + @testing.numpy_cupy_array_equal() + def test_intersect1d_mixed_dtypes_empty_with_indices(self, xp): + a = xp.array([0], dtype=xp.int64) + b = xp.array([], dtype=xp.float64) + return xp.intersect1d(a, b, return_indices=True) + @pytest.mark.skip("union1d() is not supported yet") class TestUnion1d: From 853c88a09e84a7b9d5479488315aac8bd62c35be Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 25 Jun 2026 14:26:37 +0200 Subject: [PATCH 02/37] Make meshgrid return a tuple not list --- dpnp/tests/third_party/cupy/creation_tests/test_ranges.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py index ce716b10dd37..636d4387f4d3 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py @@ -346,13 +346,16 @@ def test_meshgrid0(self, dtype): ) assert out == () + @testing.with_requires("numpy>=2.5") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_meshgrid1(self, xp, dtype): x = xp.arange(2).astype(dtype) - return xp.meshgrid( + result = xp.meshgrid( x, indexing=self.indexing, sparse=self.sparse, copy=self.copy ) + assert isinstance(result, tuple) + return result @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() From 45f7db981cbc59637f529d71b8e219b0b0af0fad Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 25 Jun 2026 15:03:23 +0200 Subject: [PATCH 03/37] Relax dtype check in views (including zero-copy array constructors) --- .../cupy/creation_tests/test_basic.py | 20 +++++++ .../cupy/creation_tests/test_from_data.py | 60 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py index a9e382d22798..05b2e5e711b2 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py @@ -542,3 +542,23 @@ def test_full_like_reshape_cupy_only(self, dtype): c = cupy.full(self.shape, 1, dtype=dtype) testing.assert_array_equal(b, c) + + +@pytest.mark.skip("void dtypes are not supported") +class TestDTypeUnchecked: + def test_void_dtype(self): + arr = cupy.zeros(3, dtype="V10") + assert not arr.get().view("uint8").any() + + np_arr = numpy.array([b"1", b"2", b"3"], dtype="V10") + arr = cupy.array(np_arr) + testing.assert_array_equal(arr.get(), np_arr) + + def test_subarray_rejected(self): + with pytest.raises(ValueError, match="Unsupported dtype"): + cupy.empty(3, dtype="3i") + + def test_empty_void_rejected(self): + # We could try to allow V0 explicitly, but for now... + with pytest.raises(ValueError, match="Unsupported dtype"): + cupy.empty(3, dtype="V") diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py index a2496c855b02..04b3c4446a7b 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py @@ -716,6 +716,66 @@ def test_big_endian(self): cupy.asarray(b) +@pytest.mark.skip("CUDA array interface is not supported") +class TestCudaArrayInterfaceNonBuiltinDtype: + # CuPy can accept non-builtin dtypes as containers when wrapping + # existing GPU memory via CAI. See cupy/cupy#9709 and cupy/cupy#9712 + # for details. + + @pytest.mark.parametrize("ver", range(max_cuda_array_interface_version + 1)) + @pytest.mark.parametrize("strides", [False, None, True]) + @pytest.mark.parametrize( + "typestr", + [ + "; field info is lost + assert c.dtype == numpy.dtype(dtype.str) + assert c.shape == (3,) + + @pytest.mark.parametrize("ver", range(max_cuda_array_interface_version + 1)) + @pytest.mark.parametrize("strides", [False, None, True]) + def test_datetime64_view(self, ver, strides): + dtype = numpy.dtype("datetime64[ns]") + a = cupy.zeros(4, dtype="i8") + b = DummyObjectWithCudaArrayInterface( + (a.shape, a.strides, dtype.str, dtype.descr, a.data.ptr), + ver, + strides, + ) + c = cupy.asarray(b) + # Should be able to view back as int64 + d = c.view("i8") + assert d.dtype == numpy.dtype("i8") + + @testing.parameterize( *testing.product( { From 4a6385622997fee946682122420eaa4fe7c440e5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 25 Jun 2026 15:03:48 +0200 Subject: [PATCH 04/37] Implemeted inverse_cdf method for cp.quaniles and percentiles --- dpnp/tests/third_party/cupy/statistics_tests/test_order.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py index f35617e18619..ba15fc16b078 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py @@ -12,7 +12,7 @@ from dpnp.tests.third_party.cupy import testing _all_methods = ( - # 'inverted_cdf', # TODO(takagi) Not implemented + "inverted_cdf", # 'averaged_inverted_cdf', # TODO(takagi) Not implemented # 'closest_observation', # TODO(takagi) Not implemented # 'interpolated_inverted_cdf', # TODO(takagi) Not implemented From ae14f468618a386bb670e45b0904c75711e2a77a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 10:53:52 +0200 Subject: [PATCH 05/37] Allow cupy.ndarray as repeats argument to cupy.repeat --- .../cupy/manipulation_tests/test_tiling.py | 239 ++++++++++++++++-- 1 file changed, 224 insertions(+), 15 deletions(-) diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py index a8a1f06da47f..a37544656735 100644 --- a/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py +++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py @@ -24,21 +24,6 @@ def test_array_repeat(self, xp): return xp.repeat(x, self.repeats, self.axis) -class TestRepeatRepeatsNdarray(unittest.TestCase): - - def test_func(self): - a = testing.shaped_arange((2, 3, 4), cupy) - repeats = cupy.array([2, 3], dtype=cupy.int32) - with pytest.raises(ValueError, match=r"repeats"): - cupy.repeat(a, repeats) - - def test_method(self): - a = testing.shaped_arange((2, 3, 4), cupy) - repeats = cupy.array([2, 3], dtype=cupy.int32) - with pytest.raises(ValueError, match=r"repeats"): - a.repeat(repeats) - - @testing.parameterize( {"repeats": [2], "axis": None}, {"repeats": [2], "axis": 1}, @@ -100,6 +85,230 @@ def test_repeat_failure(self): xp.repeat(x, self.repeats, self.axis) +@testing.parameterize( + # 1-D + {"shape": (6,), "reps": [1, 3, 2, 1, 1, 2], "axis": None}, + {"shape": (6,), "reps": [2], "axis": None}, + # 2-D + {"shape": (2, 3), "reps": [2, 1], "axis": 0}, + {"shape": (2, 3), "reps": [1, 3, 2], "axis": 1}, + {"shape": (2, 3), "reps": [2], "axis": 0}, + {"shape": (2, 3), "reps": [2], "axis": 1}, + # 3-D + {"shape": (2, 3, 4), "reps": [1, 2, 3, 4], "axis": 2}, + {"shape": (2, 3, 4), "reps": [0, 3], "axis": 0}, + {"shape": (2, 3, 4), "reps": [1, 2, 3], "axis": 1}, + {"shape": (2, 3, 4), "reps": [4], "axis": 2}, + # negative axis + {"shape": (2, 3, 4), "reps": [1, 2, 3, 4], "axis": -1}, + {"shape": (2, 3, 4), "reps": [1, 2, 3], "axis": -2}, + # axis=None + {"shape": (2, 3), "reps": [1, 2, 3, 4, 5, 0], "axis": None}, + {"shape": (4,), "reps": [0, 0, 0, 0], "axis": None}, + {"shape": (4,), "reps": [5, 0, 3, 1], "axis": None}, + # zeros in reps + {"shape": (4,), "reps": [0, 2, 0, 1], "axis": 0}, + {"shape": (2, 3), "reps": [0, 3, 0], "axis": 1}, + # broadcast + {"shape": (2, 3), "reps": [0], "axis": 0}, + {"shape": (2, 3), "reps": [1], "axis": 1}, + {"shape": (3, 4), "reps": [2], "axis": None}, + # 4-D + {"shape": (2, 3, 4, 5), "reps": [2, 1, 3], "axis": 1}, + # empty + {"shape": (0, 3), "reps": [2], "axis": 0}, + {"shape": (2, 3), "reps": [0, 0, 0], "axis": 1}, +) +class TestRepeatNdarrayRepeats: + """ndarray repeats matches numpy for diverse shapes, axes, and reps.""" + + @testing.numpy_cupy_array_equal() + def test_repeat(self, xp): + x = testing.shaped_arange(self.shape, xp) + return xp.repeat(x, xp.array(self.reps), self.axis) + + +@testing.parameterize( + *[ + {"rep_dtype": d} + for d in [ + numpy.int8, + numpy.int16, + numpy.int32, + numpy.int64, + numpy.uint8, + numpy.uint16, + numpy.uint32, + ] + ] +) +class TestRepeatNdarrayRepsDtype: + """Various integer dtypes for reps are accepted.""" + + @testing.numpy_cupy_array_equal() + def test_repeat(self, xp): + x = testing.shaped_arange((4,), xp) + return xp.repeat(x, xp.array([1, 2, 3, 4], dtype=self.rep_dtype), 0) + + +@testing.parameterize( + *[ + {"a_dtype": d} + for d in [ + numpy.bool_, + numpy.int32, + numpy.float32, + numpy.float64, + numpy.complex64, + ] + ] +) +class TestRepeatNdarrayArrayDtype: + """Output dtype matches input dtype.""" + + @testing.numpy_cupy_array_equal() + def test_dtype_preserved(self, xp): + x = testing.shaped_arange((3, 4), xp, dtype=self.a_dtype) + return xp.repeat(x, xp.array([1, 2, 3, 4]), axis=1) + + +class TestRepeatNdarrayNonContiguous: + + @testing.numpy_cupy_array_equal() + def test_transposed(self, xp): + x = testing.shaped_arange((4, 3), xp).T + return xp.repeat(x, xp.array([2, 1, 3, 0]), axis=1) + + @testing.numpy_cupy_array_equal() + def test_strided(self, xp): + x = testing.shaped_arange((3, 8), xp)[:, ::2] + return xp.repeat(x, xp.array([1, 2, 3, 0]), axis=1) + + @testing.numpy_cupy_array_equal() + def test_reversed(self, xp): + x = testing.shaped_arange((5,), xp)[::-1] + return xp.repeat(x, xp.array([0, 1, 2, 1, 0])) + + +class TestRepeatNdarrayDtypeEdges: + + @testing.numpy_cupy_array_equal() + def test_bool_perelement(self, xp): + return xp.repeat(xp.arange(3), xp.array([True, False, True])) + + @testing.numpy_cupy_array_equal() + def test_bool_broadcast(self, xp): + return xp.repeat( + testing.shaped_arange((3, 4), xp), xp.array([True]), axis=0 + ) + + @testing.numpy_cupy_array_equal() + def test_uint32_accepted(self, xp): + return xp.repeat( + xp.arange(4), xp.array([1, 2, 3, 4], dtype=numpy.uint32) + ) + + +class TestRepeatNdarrayLarge: + + @testing.numpy_cupy_array_equal() + def test_large_single(self, xp): + return xp.repeat( + testing.shaped_arange((3,), xp), xp.array([0, 100000, 0]) + ) + + @testing.numpy_cupy_array_equal() + def test_large_broadcast(self, xp): + return xp.repeat(testing.shaped_arange((3,), xp), xp.array([50000])) + + +class TestRepeatScalarEquivalence: + """All scalar-like repeats inputs produce identical results.""" + + def _check_all_equal(self, a, n, axis): + expected = cupy.array(numpy.repeat(cupy.asnumpy(a), n, axis)) + for form in [ + n, + [n], + cupy.array([n]), + cupy.array(n), + ]: # numpy.intp(n) is not supported + testing.assert_array_equal(cupy.repeat(a, form, axis), expected) + + def test_equivalence(self): + a = cupy.arange(6).reshape(2, 3) + for n, axis in [(3, None), (2, 0), (4, 1), (0, 0), (1, 0)]: + self._check_all_equal(a, n, axis) + + def test_negative_raises(self): + a = cupy.arange(3) + for form in [ + -1, + [-1], + cupy.array([-1]), + cupy.array(-1), + ]: # numpy.intp(n) is not supported + with pytest.raises(ValueError, match=r"positive"): + cupy.repeat(a, form) + + @pytest.mark.skip("dpnp.repeat does not accept numpy scalars as repeats") + def test_numpy_scalar_accepted(self): + a = cupy.arange(3) + testing.assert_array_equal( + cupy.repeat(a, numpy.int64(2)), cupy.repeat(a, 2) + ) + + def test_numpy_ndarray_rejected(self): + a = cupy.arange(3) + with pytest.raises(TypeError, match="numpy.ndarray"): + cupy.repeat(a, numpy.array([1, 2, 3])) + + +class TestRepeatNdarrayErrors: + + def test_length_mismatch(self): + with pytest.raises(ValueError, match=r"must be broadcastable"): + cupy.repeat(cupy.arange(4), cupy.array([1, 2]), axis=0) + + def test_negative(self): + with pytest.raises(ValueError, match=r"positive"): + cupy.repeat(cupy.arange(3), cupy.array([-1, 1, 2])) + + def test_float_dtype_matches_numpy(self): + # Both NumPy and CuPy raise TypeError for unsafe cast + for xp in (numpy, cupy): + with pytest.raises(TypeError): + xp.repeat(xp.arange(3), xp.array([1.0, 1.0, 1.0]), 0) + + @pytest.mark.skip("dpnp accepts uint64 repeats, unlike numpy") + def test_uint64_matches_numpy(self): + # Both reject uint64 (unsigned → signed is unsafe) + for xp in (numpy, cupy): + with pytest.raises(TypeError): + xp.repeat(xp.arange(3), xp.array([1, 2, 3], dtype=numpy.uint64)) + + def test_ndim_gt1_matches_numpy(self): + for xp in (numpy, cupy): + with pytest.raises(ValueError): + xp.repeat(xp.arange(6), xp.array([[1, 2, 3, 4, 5, 6]])) + + @pytest.mark.skip("different message for nested lists") + def test_ndim_gt1_list_rejected(self): + with pytest.raises(ValueError, match=r"too deep"): + cupy.repeat(cupy.arange(6), [[1, 2, 3, 4, 5, 6]]) + + def test_bad_axis(self): + with pytest.raises(Exception): + cupy.repeat( + cupy.arange(12).reshape(3, 4), cupy.array([1, 2, 3]), axis=5 + ) + + def test_method_interface(self): + a = cupy.arange(4) + reps = cupy.array([1, 2, 0, 3]) + testing.assert_array_equal(a.repeat(reps), cupy.repeat(a, reps)) + + @testing.parameterize( {"reps": 0}, {"reps": 1}, From c0de37dd880d9a56ff16ae2015ba7bf777f21cc3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 11:14:44 +0200 Subject: [PATCH 06/37] Fix ZeroDivisionError when sorting along zero-length axis --- dpnp/tests/third_party/cupy/sorting_tests/test_sort.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py index ba64ef949cb0..3bf1c405d0d0 100644 --- a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py +++ b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py @@ -37,6 +37,11 @@ def test_external_sort_zero_dim(self): with pytest.raises(AxisError): xp.sort(a) + @testing.numpy_cupy_array_equal() + def test_sort_zero_length_axis(self, xp): + """Sorting along a zero-length axis is a no-op (#9816).""" + return xp.sort(xp.empty((2, 0)), axis=-1) + @testing.numpy_cupy_array_equal() def test_sort_two_or_more_dim(self, xp): a = testing.shaped_random((2, 3, 3), xp) From d4f36c928efeecf4f382926110171d34c85ac357 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 11:40:55 +0200 Subject: [PATCH 07/37] Fix integer comparisons --- .../cupy/logic_tests/test_comparison.py | 68 ++++++++++++++++--- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py b/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py index 5215191987b4..91a9dc727924 100644 --- a/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py +++ b/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py @@ -36,16 +36,18 @@ def test_equal(self): self.check_binary("equal") -class TestComparisonOperator(unittest.TestCase): +operators = [ + operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.gt, + operator.ge, +] - operators = [ - operator.lt, - operator.le, - operator.eq, - operator.ne, - operator.gt, - operator.ge, - ] + +class TestComparisonOperator: + operators = operators @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_array_equal() @@ -75,6 +77,52 @@ def test_binary_array_pyscalar(self, xp, dtype): b = 3 return [op(a, b) for op in self.operators] + @pytest.mark.skip("SAT-8549") + @pytest.mark.parametrize( + "dtype", [numpy.int8, numpy.int64, numpy.uint8, numpy.uint64] + ) + @pytest.mark.parametrize("scalar", [-1, 0, 2**32, 2**63, 2**64 - 1]) + @pytest.mark.parametrize("op", operators) + @testing.numpy_cupy_array_equal() + @numpy.errstate(over="ignore") + def test_binary_array_pyscalar_int(self, xp, dtype, scalar, op): + # This test also checks large mixed unsigned/signed comparisons. + min_, max_ = numpy.iinfo(dtype).min, numpy.iinfo(dtype).max + + a = xp.array([min_, 0, max_, xp.dtype(dtype).type(0) - 1], dtype=dtype) + b = scalar + return [op(a, b), op(b, a)] + + @pytest.mark.parametrize( + "dtype", [numpy.float16, numpy.float32, numpy.float64] + ) + @pytest.mark.parametrize( + "scalar", [-1, 0, 2**32, 2**31 - 1, 2**31 + 1, 2**63, 2**64 - 1] + ) + @pytest.mark.parametrize("op", operators) + @testing.numpy_cupy_array_equal() + @numpy.errstate(over="ignore") + def test_binary_array_pyscalar_int_and_float(self, xp, dtype, scalar, op): + a = xp.array( + [-1, 0, 2**31 - 1, 2**31 + 1, 2**32, 2**63 - 1, 2**62, 2**62 + 1] + ) + a = a.astype(dtype) # cast (overflow OK) + b = scalar + return [op(a, b), op(b, a)] + + @pytest.mark.skip("SAT-8549") + @pytest.mark.parametrize( + "scalar,safe_scalar", + [(2**63, 2), (2**63 + 100, 2), (-(2**63), -1)], + ) + @pytest.mark.parametrize("op", operators) + def test_binary_array_pyscalar_int_and_bool(self, scalar, safe_scalar, op): + # As of 2.5, NumPy uses the default integer and fails for very large + # Python scalars. But CuPy uses uint64 and succeeds. + a = cupy.array([True, False]) + testing.assert_array_equal(op(a, scalar), op(a, safe_scalar)) + testing.assert_array_equal(op(scalar, a), op(safe_scalar, a)) + class TestArrayEqual(unittest.TestCase): @@ -207,6 +255,7 @@ def test_allclose_finite(self, xp, dtype): @testing.for_all_dtypes() @testing.numpy_cupy_equal() + @numpy.errstate(over="ignore") def test_allclose_min_int(self, xp, dtype): a = xp.array([0]).astype(dtype) b = xp.array([numpy.iinfo("i").min]).astype(dtype) @@ -253,6 +302,7 @@ def test_is_close_finite(self, xp, dtype): @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_array_equal() + @numpy.errstate(over="ignore") def test_is_close_min_int(self, xp, dtype): # In numpy<1.10 this test fails when dtype is bool a = xp.array([0]).astype(dtype) From 3516f892f600e3fb21ef8cfbdd567a281fadaf89 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 11:48:33 +0200 Subject: [PATCH 08/37] Fix delete incompatibilities with NumPy --- .../manipulation_tests/test_add_remove.py | 48 ++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py index 31bbc9691889..ba29b67ba6e3 100644 --- a/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py +++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py @@ -2,6 +2,7 @@ import pytest +import dpnp as cupy from dpnp.tests.helper import has_support_aspect64 from dpnp.tests.third_party.cupy import testing from dpnp.tests.third_party.cupy.testing._loops import ( @@ -10,7 +11,7 @@ ) -class TestDelete(unittest.TestCase): +class TestDelete: @testing.numpy_cupy_array_equal() def test_delete_with_no_axis(self, xp): @@ -56,6 +57,51 @@ def test_delete_with_indices_as_int(self, xp): # pytest.xfail("HIP may have a bug") return xp.delete(arr, indices) + def test_delete_array_like_input(self): + arr = [[0, 1, 2], [3, 4, 5]] + with pytest.raises((TypeError, ValueError)): + cupy.delete(arr, [1], axis=1) + + @pytest.mark.parametrize( + "make_obj", + [ + pytest.param(lambda xp: [], id="empty_list"), + pytest.param(lambda xp: (), id="empty_tuple"), + pytest.param(lambda xp: [0, 2], id="int_list"), + pytest.param( + lambda xp: [True, False, True], id="matching_bool_list" + ), + pytest.param(lambda xp: (0, True), id="mixed_tuple"), + pytest.param( + lambda xp: xp.array([], dtype=xp.int_), id="empty_int_array" + ), + pytest.param(lambda xp: xp.array([0, 2]), id="int_array"), + pytest.param( + lambda xp: xp.array([-1, -3]), id="negative_int_array" + ), + # The following raise ValueError (wrong-size or scalar bool masks). + pytest.param(lambda xp: True, id="scalar_true"), + pytest.param(lambda xp: False, id="scalar_false"), + pytest.param(lambda xp: [True, False], id="wrong_size_bool_list"), + pytest.param(lambda xp: xp.array(True), id="zerodim_bool_array"), + pytest.param( + lambda xp: xp.array([True, False]), id="wrong_size_bool_array" + ), + pytest.param( + lambda xp: xp.array([], dtype=xp.bool_), id="empty_bool_array" + ), + # The following raise IndexError (non-integer index arrays). + pytest.param(lambda xp: xp.array([1.5]), id="float_array_single"), + pytest.param( + lambda xp: xp.array([0.0, 2.0]), id="float_array_multi" + ), + ], + ) + @testing.numpy_cupy_array_equal(accept_error=(ValueError, IndexError)) + def test_delete_obj_variants(self, xp, make_obj): + arr = xp.array([10, 20, 30]) + return xp.delete(arr, make_obj(xp)) + class TestAppend(unittest.TestCase): From 062a6e61d834b15cd41359bdff77a2b47d827227 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 11:52:19 +0200 Subject: [PATCH 09/37] Add fast-path for gufunc (specifically matmul) --- .../cupy/core_tests/test_gufuncs.py | 219 +++++++++++++++++- 1 file changed, 211 insertions(+), 8 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_gufuncs.py b/dpnp/tests/third_party/cupy/core_tests/test_gufuncs.py index 977313e266f9..c14511df0753 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_gufuncs.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_gufuncs.py @@ -13,16 +13,39 @@ class TestGUFuncSignature: @pytest.mark.parametrize( "signature", [ - ("(i,j)->(i,j)", [("i", "j")], [("i", "j")]), - ("->(i)", [()], [("i",)]), - ("(i,j),(j,k)->(k,l)", [("i", "j"), ("j", "k")], [("k", "l")]), - ("()->()", [()], [()]), + ( + "(i,j)->(i,j)", + [(("i", False, False), ("j", False, False))], + [(("i", False, False), ("j", False, False))], + 2, + ), + ("->(i)", [()], [(("i", False, False),)], 1), + ( + "(i,j),(j,k)->(k,l)", + [ + (("i", False, False), ("j", False, False)), + (("j", False, False), ("k", False, False)), + ], + [(("k", False, False), ("l", False, False))], + 4, + ), + ("()->()", [()], [()], 0), + ( + "(i?,j|1),(i?,j)->(i?,j)", + [ + (("i", True, False), ("j", False, True)), + (("i", True, False), ("j", False, False)), + ], + [(("i", True, False), ("j", False, False))], + 2, + ), ], ) def test_signature_parsing(self, signature): - i, o = cupy._core._gufuncs._parse_gufunc_signature(signature[0]) + i, o, n_cd = cupy._core._gufuncs._parse_gufunc_signature(signature[0]) assert i == signature[1] assert o == signature[2] + assert n_cd == signature[3] @pytest.mark.parametrize( "signature", @@ -53,6 +76,15 @@ def func(x): return _GUFunc(func, signature) + def _get_gufunc_scalar_supports_all(self, signature): + def func(x, out=None): + # Does not use keepdims, but gufunc supports it. + return x.sum(axis=-1, out=out) + + return _GUFunc( + func, signature, supports_batched=True, supports_out=True + ) + @pytest.mark.parametrize( "axes", [ @@ -101,14 +133,61 @@ def test_axes_selection_single(self, xp, axes): else: return numpy.moveaxis(x, axes[0], axes[1]) + @pytest.mark.parametrize( + "axes", + [ + [(0, 1), (0, 1), (0, 1)], + [(0, 1), (0, 1), (1, 0)], + [(-2, -1), (-3, 0), (-1, -3)], + ], + ) + @pytest.mark.parametrize("use_out", [True, False]) + @testing.numpy_cupy_array_equal() + def test_axes_matmul(self, xp, axes, use_out): + # Do not use a weird shape, but rather rely on each + # arange transpose giving a unique result. + x = testing.shaped_arange((3, 3, 3, 3), xp=xp) + y = testing.shaped_arange((3, 3, 3, 3), xp=xp) + if use_out: + out = xp.empty((3, 3, 3, 3)) + else: + out = None + + return xp.matmul(x, y, axes=axes, out=out) + + @pytest.mark.parametrize("ax,outer_ax", [(0, 1), (1, 0), ((-1,), 0)]) + @testing.numpy_cupy_array_equal(accept_error=numpy.exceptions.AxisError) + def test_axes_single_matmul(self, xp, ax, outer_ax): + # We do not allow this (just as NumPy), although it may be possible + # to define it in principle. + x = xp.ones((2, 3)) + y = xp.ones((2, 3)) + xp.matmul(x, y, axes=[ax] * 2 + [()]) + # no return, should raise error. + + @pytest.mark.parametrize("axis", [0, 1, 2, 3]) + @pytest.mark.parametrize("keepdims", [True, False]) + @testing.numpy_cupy_array_equal() + def test_axis(self, xp, axis, keepdims): + x = testing.shaped_arange((2, 3, 4, 5), xp=xp) + if xp is cupy: + return self._get_gufunc_scalar("(i)->()")( + x, axis=axis, keepdims=keepdims + ) + else: + return x.sum(axis=axis, keepdims=keepdims) + @pytest.mark.parametrize("axis", [0, 1, 2, 3]) + @pytest.mark.parametrize("keepdims", [True, False]) @testing.numpy_cupy_array_equal() - def test_axis(self, xp, axis): + def test_axis_full_core_support(self, xp, axis, keepdims): x = testing.shaped_arange((2, 3, 4, 5), xp=xp) if xp is cupy: - return self._get_gufunc_scalar("(i)->()")(x, axis=axis) + return self._get_gufunc_scalar_supports_all("(i)->()")( + x, axis=axis, keepdims=keepdims + ) else: - return x.sum(axis=axis) + return x.sum(axis=axis, keepdims=keepdims) def test_axis_invalid(self): x = testing.shaped_arange((2, 3, 4, 5)) @@ -306,3 +385,127 @@ def default(x, y): y = x with pytest.raises(TypeError): gu_func(x, y, casting="unsafe", signature=sig) + + +class TestGUFuncOptional: + def _get_gufunc_ridiculous_optional(self): + signature = "(a?,b,c,d?),(i?,j?,k,l)->(b,c,a?,d?,k,l,j?,i?)" + + def func(x, y): + # The ufunc is always passed all dimensions (filled in with 1) + # if omitted and optional. + res_shape = x.shape[1:-1] + (x.shape[0], x.shape[-1]) + res_shape += y.shape[2:] + (y.shape[1], y.shape[0]) + return cupy.ones(res_shape) + + return _GUFunc(func, signature) + + def _get_forbidden_optional(self): + signature = "(a?,b?),(b,a?)->(a?,b?)" + + def func(x, y): + raise RuntimeError("this will not be called") + + return _GUFunc(func, signature) + + @pytest.mark.parametrize( + "x_ndim, y_ndim", + [ + (2, 2), + (3, 2), + (2, 3), + (3, 3), + (4, 2), + (2, 4), + (4, 3), + (3, 4), + (4, 4), + (6, 6), + ], + ) + def test_ridiculous_optional(self, x_ndim, y_ndim): + gufunc = self._get_gufunc_ridiculous_optional() + + x_shape = tuple(range(1, x_ndim + 1)) + y_shape = tuple(range(1, y_ndim + 1)) + x = cupy.ones(x_shape) + y = cupy.ones(y_shape) + # Succeeds if the correct `func` above matches with allocated output. + res = gufunc(x, y) + + if x_ndim == 6 and y_ndim == 6: + # only test where this is the case + x_shape = x_shape[2:] + y_shape = y_shape[2:] + outer_shape = (1, 2) + else: + outer_shape = () + + # Check that the result shape is actually what we expect it to be. + if x.ndim == 2: # b, c + core_shape = x_shape + elif x.ndim == 3: # b, c, d -> b, c, d + core_shape = x_shape[:-1] + (x_shape[-1],) + else: # a, b, c, d -> b, c, a, d + core_shape = x_shape[1:-1] + (x_shape[0], x_shape[-1]) + + if y.ndim == 2: # k, l + core_shape += y_shape + elif y.ndim == 3: # j, k, l -> k, l, j + core_shape += y_shape[1:] + (y_shape[0],) + else: # i, j, k, l -> k, l, j, i + core_shape += y_shape[2:] + (y_shape[1], y_shape[0]) + + assert res.shape == outer_shape + core_shape + + def test_forbidden_optional(self): + gufunc = self._get_forbidden_optional() + x = cupy.ones(2) + y = cupy.ones((2, 2)) + with pytest.raises(ValueError): + # first op is missing a at front but second is not + gufunc(x, y) + + with pytest.raises(ValueError): + # second op is missing a at end but first is not + gufunc(y, x) + + +class TestGUFuncBroadcastable: + def _get_gufunc(self): + def func(x, y): + shape = cupy.broadcast_shapes(x.shape, y.shape) + return cupy.ones(shape) + + return _GUFunc(func, "(i|1,j|1),(i|1,j)->(i,j)") + + @pytest.mark.parametrize( + "x_shape, y_shape", + [ + ((2, 1), (2, 3)), + ((1, 1), (2, 1)), + ((2, 3), (1, 3)), + ((1, 1), (1, 1)), + ], + ) + def test_broadcastable(self, x_shape, y_shape): + func = self._get_gufunc() + x = cupy.ones(x_shape) + y = cupy.ones(y_shape) + + res = func(x, y) + assert res.shape == cupy.broadcast_shapes(x_shape, y_shape) + + @pytest.mark.parametrize( + "x_shape, y_shape", + [ + ((2, 3), (2, 1)), # second operand 1 is not broadcastable + ], + ) + def test_not_broadcastable(self, x_shape, y_shape): + func = self._get_gufunc() + x = cupy.ones(x_shape) + y = cupy.ones(y_shape) + + with pytest.raises(ValueError): + func(x, y) From b432cae4bcea76e5196bdcd529be1e2d00639814 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:17:24 +0200 Subject: [PATCH 10/37] Do not unload modules/code that have been used --- .../cupy/core_tests/test_multithreading.py | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 dpnp/tests/third_party/cupy/core_tests/test_multithreading.py diff --git a/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py b/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py new file mode 100644 index 000000000000..c5e77d38daa0 --- /dev/null +++ b/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import concurrent.futures +import threading + +import pytest + +import dpnp as cupy + +pytest.skip( + "ElementwiseKernel / create_ufunc internals are not supported", + allow_module_level=True, +) + +# thread_unsafe marker requires pytest-run-parallel, which is not used by dpnp +# pytestmark = pytest.mark.thread_unsafe( +# reason="tests in this module are already explicitly multi-threaded" +# ) + + +def run_threaded( + func, + max_workers=8, + pass_count=False, + pass_barrier=False, + outer_iterations=1, + prepare_args=None, +): + """Runs a function many times in parallel + + This function has been taken from NumPy: + https://github.com/numpy/numpy/blob/a90ef57574c501a780fe834123b20fcea1329f90/numpy/testing/_private/utils.py#L2807 + """ + for _ in range(outer_iterations): + with concurrent.futures.ThreadPoolExecutor( + max_workers=max_workers + ) as tpe: + if prepare_args is None: + args = [] + else: + args = prepare_args() + if pass_barrier: + barrier = threading.Barrier(max_workers) + args.append(barrier) + if pass_count: + all_args = [(func, i, *args) for i in range(max_workers)] + else: + all_args = [(func, *args) for i in range(max_workers)] + try: + futures = [] + for arg in all_args: + futures.append(tpe.submit(*arg)) + except RuntimeError as e: + pytest.skip( + f"Spawning {max_workers} threads failed with " + f"error {e!r} (likely due to resource limits on " + "the system running the tests)" + ) + finally: + if len(futures) < max_workers and pass_barrier: + barrier.abort() + for f in futures: + f.result() + + +@pytest.mark.slow +def test_elementwise_kernel_cache(): + """Checks that a thread always uses the same compiled kernel + which means that we don't unload a kernel that was ever used. + + This matters for graph capture, although in some cases just + unloading a module may create problems and that isn't prevented. + I.e. a race can still mean that multiple threads compile the same code. + When this happens, we test that one version is used everywhere. + """ + + def prepare_args(): + kernel = cupy.ElementwiseKernel("T x", "T y", "y = x;") + assert not kernel._elementwise_kernel_memo + arr = cupy.ones(10) + return [kernel, arr] + + def func(kernel, arr, barrier): + barrier.wait() + kernel(arr) + assert len(kernel._elementwise_kernel_memo) == 1 + cached_obj1 = next(iter(kernel._elementwise_kernel_memo.values())) + kernel(arr) + assert len(kernel._elementwise_kernel_memo) == 1 + cached_obj2 = next(iter(kernel._elementwise_kernel_memo.values())) + assert cached_obj1 is cached_obj2 + + run_threaded( + func, outer_iterations=20, pass_barrier=True, prepare_args=prepare_args + ) + + +@pytest.mark.slow +def test_ufunc_kernel_cache(): + # See test_elementwise_kernel_cache for more details. + def prepare_args(): + ufunc = cupy._core.create_ufunc("cache_test", ("d->d",), "out0 = in0") + assert not ufunc._kernel_memo + arr = cupy.ones(10) + return [ufunc, arr] + + def func(ufunc, arr, barrier): + barrier.wait() + ufunc(arr) + assert len(ufunc._kernel_memo) == 1 + cached_obj1 = next(iter(ufunc._kernel_memo.values())) + ufunc(arr) + assert len(ufunc._kernel_memo) == 1 + cached_obj2 = next(iter(ufunc._kernel_memo.values())) + assert cached_obj1 is cached_obj2 + + run_threaded( + func, outer_iterations=20, pass_barrier=True, prepare_args=prepare_args + ) From 51f12ef5d8018d2ab6ccbdde673d1d44fb89b5f0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:24:28 +0200 Subject: [PATCH 11/37] Support cp.from_dlpack with ml_dtypes.bfloat16 Optionally --- dpnp/tests/third_party/cupy/core_tests/test_dlpack.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py index eb9e958fad0b..5515886bf0c4 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py @@ -21,6 +21,9 @@ def _gen_array(dtype, alloc_q=None): array = numpy.random.random((2, 3)) elif dtype == cupy.bool_: array = numpy.random.randint(0, 2, size=(2, 3)) + # bfloat16 is not supported by dpnp + # elif dtype.name == "bfloat16": + # array = numpy.random.rand(2, 3) else: assert False, f"unrecognized dtype: {dtype}" return cupy.asarray(array, sycl_queue=alloc_q).astype(dtype) @@ -89,6 +92,14 @@ def test_conversion(self, dtype): testing.assert_array_equal(orig_array, out_array) testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr) + @pytest.mark.skip("bfloat16 dtype is not supported") + def test_conversion_bfloat16(self): + ml_dtypes = pytest.importorskip("ml_dtypes") + orig_array = _gen_array(numpy.dtype(ml_dtypes.bfloat16)) + out_array = cupy.from_dlpack(orig_array) + testing.assert_array_equal(orig_array, out_array) + testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr) + @pytest.mark.skip("no limitations in from_dlpack()") def test_from_dlpack_and_conv_errors(self): orig_array = _gen_array("int8") From 8409000a480e90d957ec4b967ec037bf1133777c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:26:42 +0200 Subject: [PATCH 12/37] Deprecate jitify=True support (and jitify=False) --- .../third_party/cupy/core_tests/test_raw.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_raw.py b/dpnp/tests/third_party/cupy/core_tests/test_raw.py index 59581674a856..b00d56c85c3a 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_raw.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_raw.py @@ -1176,6 +1176,7 @@ def test_compile_module(self): # Finally, we test NVCC {"backend": "nvcc", "in_memory": False}, ) +@pytest.mark.filterwarnings("ignore:.*jitify=False:DeprecationWarning") class TestRaw(_TestRawBase, unittest.TestCase): pass @@ -1196,6 +1197,7 @@ class TestRaw(_TestRawBase, unittest.TestCase): @pytest.mark.thread_unsafe( reason="Jitify seems to have problems, skip as largely unmaintained." ) +@pytest.mark.filterwarnings("ignore:jitify=True:DeprecationWarning") class TestRawWithJitify(_TestRawBase, unittest.TestCase): pass @@ -1512,6 +1514,7 @@ def test_jitify5(self): @unittest.skipIf(cupy.cuda.runtime.is_hip, "Jitify does not support ROCm/HIP") @testing.slow +@pytest.mark.filterwarnings("ignore:.*jitify=False:DeprecationWarning") class TestRawJitifyNoJitify(_TestRawJitify, unittest.TestCase): jitify = False @@ -1521,5 +1524,29 @@ class TestRawJitifyNoJitify(_TestRawJitify, unittest.TestCase): @pytest.mark.thread_unsafe( reason="Jitify seems to have problems, skip as largely unmaintained." ) +@pytest.mark.filterwarnings("ignore:jitify=True:DeprecationWarning") class TestRawJitifyJitify(_TestRawJitify, unittest.TestCase): jitify = True + + +@pytest.mark.parametrize( + "jitify,match", + [(True, ".*"), (False, "Avoid passing.*jitify=False")], +) +@unittest.skipIf(cupy.cuda.runtime.is_hip, "Jitify does not support ROCm/HIP") +@testing.slow +@pytest.mark.thread_unsafe(reason="uses temporary cache dir") +@use_temporary_cache_dir() +def test_jitify_deprecation_warning(jitify, match): + with pytest.warns(DeprecationWarning, match=match): + cupy.RawKernel( + _test_source1, "test_sum", backend="nvrtc", jitify=jitify + ) + + with pytest.warns(DeprecationWarning, match=match): + cupy.RawModule(code=_test_source1, backend="nvrtc", jitify=jitify) + + # Not technically part of the rawkernel, but test warning in compile here: + with pytest.warns(DeprecationWarning, match=match): + # compiler is not imported in dpnp (module is skipped) + compiler.compile_using_nvrtc("", options=(), jitify=jitify) From cfebec00a95d20523802103622e6392651ffa0a3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:31:43 +0200 Subject: [PATCH 13/37] Slightly bump SVD test tolerance (but tighten it for float64) --- .../tests/third_party/cupy/linalg_tests/test_decomposition.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py index 697e4ee7988d..5f4e9d48e9ce 100644 --- a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py +++ b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py @@ -298,7 +298,9 @@ def check_usv(self, shape, dtype): ) else: a_gpu_usv = cupy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) - testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) + + tol = numpy.finfo(a_gpu_usv.dtype).eps * 1024 + testing.assert_allclose(a_gpu, a_gpu_usv, rtol=tol, atol=tol) # assert unitary u_len = u_gpu.shape[-1] From 73935f46a36298e06366f8e5349bcf36b76f961d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:34:58 +0200 Subject: [PATCH 14/37] Implement kernel cache save/load abstraction --- .../third_party/cupy/core_tests/test_raw.py | 6 +- .../third_party/cupy/cuda_tests/__init__.py | 0 .../cupy/cuda_tests/test_compiler_cache.py | 98 +++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 dpnp/tests/third_party/cupy/cuda_tests/__init__.py create mode 100644 dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py diff --git a/dpnp/tests/third_party/cupy/core_tests/test_raw.py b/dpnp/tests/third_party/cupy/core_tests/test_raw.py index b00d56c85c3a..2d3b8c153b86 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_raw.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_raw.py @@ -356,11 +356,11 @@ def use_temporary_cache_dir(): # Note uses mock, so not thread-safe (except at class/method level) # tempdir fixture could be used instead. - target1 = "cupy.cuda.compiler.get_cache_dir" + target1 = "cupy.cuda.compiler._kernel_cache_backend._cache_dir" target2 = "cupy.cuda.compiler._empty_file_preprocess_cache" temp_cache = {} with tempfile.TemporaryDirectory() as path: - with mock.patch(target1, lambda: path): + with mock.patch(target1, path): with mock.patch(target2, temp_cache): yield path @@ -391,7 +391,7 @@ def find_nvcc_ver(): cmd = cupy.cuda.get_nvcc_path().split() cmd += ["--version"] - output = compiler._run_cc(cmd, cupy.cuda.compiler.get_cache_dir(), "nvcc") + output = compiler._run_cc(cmd, None, "nvcc") match = re.search(nvcc_ver_pattern, output) assert match diff --git a/dpnp/tests/third_party/cupy/cuda_tests/__init__.py b/dpnp/tests/third_party/cupy/cuda_tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py b/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py new file mode 100644 index 000000000000..3e21e7bd8cc5 --- /dev/null +++ b/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import os +import tempfile + +import pytest + +pytest.skip( + "cupy.cuda._compiler_cache (kernel cache backend) is not supported", + allow_module_level=True, +) + +# _compiler_cache is a CuPy CUDA internal with no dpnp equivalent +# from cupy.cuda._compiler_cache import ( +# DiskKernelCacheBackend, +# _hash_length, +# _default_cache_dir, +# ) + + +class TestDiskKernelCacheBackend: + """Tests for DiskKernelCacheBackend implementation.""" + + def test_init_cache_dir(self): + """Test initialization with default cache directory.""" + backend = DiskKernelCacheBackend() + cupy_cache_dir = os.environ.get("CUPY_CACHE_DIR") + if cupy_cache_dir is None: + assert backend._cache_dir == _default_cache_dir + else: + assert backend._cache_dir == cupy_cache_dir + assert os.path.isdir(backend._cache_dir) + + def test_init_custom_cache_dir(self): + """Test initialization with custom cache directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + cache_dir = os.path.join(tmpdir, "custom_cache") + backend = DiskKernelCacheBackend(cache_dir=cache_dir) + assert backend._cache_dir == cache_dir + assert os.path.isdir(cache_dir) + + def test_save_and_load(self): + """Test basic save and load operations.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + + name = "test_kernel.cubin" + cubin = b"compiled_kernel_binary" + source = 'extern "C" __global__ void test() {}' + + # Save the kernel + backend.save(name, cubin, source) + + # Load it back + loaded_cubin = backend.load(name) + assert loaded_cubin == cubin + + def test_load_nonexistent_file(self): + """Test loading a file that doesn't exist.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + + result = backend.load("nonexistent.cubin") + assert result is None + + def test_load_file_too_short(self): + """Test loading a file that's too short to contain a hash.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + + # Write a file with less than _hash_length bytes + name = "short.cubin" + data = b"too_short" + assert len(data) < _hash_length + path = os.path.join(tmpdir, name) + with open(path, "wb") as f: + f.write(data) + + result = backend.load(name) + assert result is None + + def test_load_corrupted_hash(self): + """Test that corrupted cache files are rejected.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + + name = "corrupted.cubin" + path = os.path.join(tmpdir, name) + + # Write file with wrong hash + cubin = b"kernel_data" + wrong_hash = b"0" * _hash_length # Wrong hash + with open(path, "wb") as f: + f.write(wrong_hash + cubin) + + # Load should return None due to hash mismatch + result = backend.load(name) + assert result is None From 1629b040313b54d63288990732317a3aa6ccc091 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:39:08 +0200 Subject: [PATCH 15/37] Update test_assumed_runtime_version --- .../cupy/cuda_tests/test_runtime.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py diff --git a/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py b/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py new file mode 100644 index 000000000000..0e432da5b653 --- /dev/null +++ b/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import pickle +import sys + +import pytest + +import dpnp as cupy + +pytest.skip( + "cupy.cuda.runtime / nvrtc internals are not supported", + allow_module_level=True, +) + +# CUDA runtime internals have no dpnp equivalent +# from cupy.cuda import driver +# from cupy.cuda import nvrtc +# from cupy.cuda import runtime + + +class TestExceptionPicklable: + + def test(self): + e1 = runtime.CUDARuntimeError(1) + e2 = pickle.loads(pickle.dumps(e1)) + assert e1.args == e2.args + assert str(e1) == str(e2) + + +class TestMemPool: + + @pytest.mark.skipif( + runtime.is_hip, reason="HIP does not support async allocator" + ) + @pytest.mark.skipif( + driver._is_cuda_python() and runtime.runtimeGetVersion() < 11020, + reason="cudaMemPool_t is supported since CUDA 11.2", + ) + @pytest.mark.skipif( + not driver._is_cuda_python() and driver.get_build_version() < 11020, + reason="cudaMemPool_t is supported since CUDA 11.2", + ) + @pytest.mark.skipif( + runtime.deviceGetAttribute(runtime.cudaDevAttrMemoryPoolsSupported, 0) + == 0, + reason="cudaMemPool_t is not supported on device 0", + ) + def test_mallocFromPoolAsync(self): + # also test create/destroy a pool + props = runtime.MemPoolProps( + runtime.cudaMemAllocationTypePinned, + runtime.cudaMemHandleTypeNone, + runtime.cudaMemLocationTypeDevice, + 0, + ) # on device 0 + pool = runtime.memPoolCreate(props) + assert pool > 0 + s = cupy.cuda.Stream() + ptr = runtime.mallocFromPoolAsync(128, pool, s.ptr) + assert ptr > 0 + runtime.freeAsync(ptr, s.ptr) + runtime.memPoolDestroy(pool) + + +@pytest.mark.skipif( + runtime.is_hip, reason="This assumption is correct only in CUDA" +) +def test_assumed_runtime_version(): + # When CUDA Python is enabled, CuPy calculates the CUDA runtime version + # from NVRTC version. This test ensures that the assumption is correct + # by running the same logic in non-CUDA Python environment. + # When this fails, `runtime.runtimeGetVersion()` logic needs to be fixed. + major, minor = nvrtc.getVersion() + local_ver = runtime._getLocalRuntimeVersion() + # On Windows, starting from CUDA 13.0, cudaRuntimeGetVersion() always + # returns major * 1000 regardless of the minor version (nvbugs 5955788, + # 5523579). Accept either form on Windows + CUDA >= 13. + if sys.platform == "win32" and major >= 13: + assert local_ver in (major * 1000, major * 1000 + minor * 10) + else: + assert local_ver == major * 1000 + minor * 10 + + +def test_major_version(): + major = runtime._getCUDAMajorVersion() + if runtime.is_hip: + assert major == 0 + else: + assert 10 < major < 20 From f55b32802887d3a961f41a2d3e39ddeafe49eaf7 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:42:06 +0200 Subject: [PATCH 16/37] Restructure SingleDeviceMemoryPool and locking --- .../cupy/core_tests/test_multithreading.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py b/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py index c5e77d38daa0..ee577b187cff 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_multithreading.py @@ -1,12 +1,17 @@ from __future__ import annotations import concurrent.futures +import gc +import random import threading import pytest import dpnp as cupy +# cupy.cuda.memory.alloc has no dpnp equivalent +# from cupy.cuda.memory import alloc + pytest.skip( "ElementwiseKernel / create_ufunc internals are not supported", allow_module_level=True, @@ -117,3 +122,51 @@ def func(ufunc, arr, barrier): run_threaded( func, outer_iterations=20, pass_barrier=True, prepare_args=prepare_args ) + + +@pytest.mark.slow +# NOTE: With clean=False, this test can OOM, since the cycles may not +# be cleaned up sufficiently in the `gc.collect()` we do on OOM. +@pytest.mark.parametrize("clean", [True, False]) +def test_default_memory_pool_threaded(clean, iterations=500): + # This test is designed to stress-test the memory pool, we will + # create various usage patterns and mix them in a threaded way. + # To seriously stress-test it make the iterations very large and watch + # the long-term behavior. + + def random_allocation(): + # choose a random allocation size, hopefully this will (occasionally) + # lead to allocations being split. + size = random.randint(1, 50_000) + return alloc(size) + + def make_allocations(): + allocations = [] + for i in range(random.randint(1, 50)): + allocations.append(random_allocation()) + + # And now let's make a few that can't be cleaned up easily. + first = [None, random_allocation()] + curr = first + for i in range(2, 50): + node = [curr, random_allocation()] + curr = node + + first[0] = curr # close the circle + + return allocations + + def func(): + for i in range(iterations): # increase to test for longer + _ = make_allocations() + # once in a while, we either collect or free all blocks + # to stress those paths more. But hitting the high-water mark + # with clean=False is also interesting. + if clean: + if i % 10 == 0: + gc.collect() + elif i % 10 == 5: + cupy.get_default_memory_pool().free_all_blocks() + _ = make_allocations() + + run_threaded(func) From 8f6ae4747f169e4f07098419d0687c4abfd4b115 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:43:58 +0200 Subject: [PATCH 17/37] Make sure local cache is warmed up at job start time --- .../cupy/cuda_tests/test_compiler_cache.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py b/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py index 3e21e7bd8cc5..a95a496ec10f 100644 --- a/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py +++ b/dpnp/tests/third_party/cupy/cuda_tests/test_compiler_cache.py @@ -96,3 +96,37 @@ def test_load_corrupted_hash(self): # Load should return None due to hash mismatch result = backend.load(name) assert result is None + + def test_encode_decode_roundtrip(self): + """Test that _encode_cubin/_decode_cubin form a correct round-trip.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + cubin = b"some_kernel_binary" + encoded = backend._encode_cubin(cubin) + # Encoded form is larger and starts with the ASCII hash + assert len(encoded) == _hash_length + len(cubin) + assert encoded[_hash_length:] == cubin + # Decode recovers the original cubin + assert backend._decode_cubin(encoded) == cubin + + def test_decode_cubin_too_short(self): + """Test that _decode_cubin returns None for data shorter than hash.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + assert backend._decode_cubin(b"short") is None + + def test_decode_cubin_bad_hash(self): + """Test that _decode_cubin returns None when hash does not match.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + bad_data = b"0" * _hash_length + b"kernel_data" + assert backend._decode_cubin(bad_data) is None + + def test_write_encoded_readable_by_load(self): + """Test that _write_encoded writes data that load() can read back.""" + with tempfile.TemporaryDirectory() as tmpdir: + backend = DiskKernelCacheBackend(cache_dir=tmpdir) + cubin = b"another_kernel_binary" + name = "test.cubin" + backend._write_encoded(name, backend._encode_cubin(cubin)) + assert backend.load(name) == cubin From b13072dbde672bb4e1fa4f28c4d5cfa2c2225ff8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:56:54 +0200 Subject: [PATCH 18/37] Skip many tests when running with pytest-run-parallel --- .../cupy/creation_tests/test_basic.py | 8 ++++ .../cupy/creation_tests/test_from_data.py | 2 + .../third_party/cupy/fft_tests/test_fft.py | 16 ++++++++ .../cupy/manipulation_tests/test_basic.py | 10 +++++ .../cupy/manipulation_tests/test_join.py | 2 + .../cupy/math_tests/test_sumprod.py | 14 ++++++- .../cupy/random_tests/common_distributions.py | 38 ++++++++++--------- .../cupy/random_tests/test_generator.py | 26 ++++++++----- .../cupy/random_tests/test_generator_api.py | 6 ++- .../cupy/random_tests/test_permutations.py | 4 ++ .../cupy/random_tests/test_random.py | 2 + .../cupy/sorting_tests/test_search.py | 6 +++ .../third_party/cupy/test_numpy_interop.py | 2 + 13 files changed, 106 insertions(+), 30 deletions(-) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py index 05b2e5e711b2..c6d5f371e52f 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py @@ -24,6 +24,8 @@ def test_empty(self, xp, dtype, order): return a @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_empty_huge_size(self): a = cupy.empty((1024, 2048, 1024), dtype="b") a.fill(123) @@ -33,6 +35,8 @@ def test_empty_huge_size(self): # cupy.get_default_memory_pool().free_all_blocks() @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_empty_huge_size_fill0(self): a = cupy.empty((1024, 2048, 1024), dtype="b") a.fill(0) @@ -66,6 +70,8 @@ def test_empty_int(self, xp, dtype, order): return a @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_empty_int_huge_size(self): a = cupy.empty(2**31, dtype="b") a.fill(123) @@ -75,6 +81,8 @@ def test_empty_int_huge_size(self): cupy.get_default_memory_pool().free_all_blocks() @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_empty_int_huge_size_fill0(self): a = cupy.empty(2**31, dtype="b") a.fill(0) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py index 04b3c4446a7b..11ea8a75f20a 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py @@ -799,6 +799,8 @@ def test_masked_array(self, dtype): # marked slow as either numpy or cupy could go OOM in this test @testing.slow +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="too large allocations") @pytest.mark.skip("CUDA array interface is not supported") class TestCudaArrayInterfaceBigArray(unittest.TestCase): def test_with_over_size_array(self): diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py index 369409ba001c..969bb0ae721d 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py @@ -281,6 +281,8 @@ def test_ifft(self, xp, dtype): @pytest.mark.skip("default FFT function is not supported") @testing.with_requires("numpy>=2.0") +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestDefaultPlanType: @nd_planning_states() @@ -404,6 +406,8 @@ def test_fft_allocate(self): ) ) ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestFft2: @nd_planning_states() @@ -496,6 +500,8 @@ def test_ifft2(self, xp, dtype, order, enable_nd): ) ) ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestFftn: @nd_planning_states() @@ -584,6 +590,8 @@ def test_ifftn(self, xp, dtype, order, enable_nd): ) ) ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestPlanCtxManagerFftn: @pytest.fixture(autouse=True) @@ -851,6 +859,8 @@ def test_fft_error_on_wrong_plan(self, dtype): ) ) @pytest.mark.skip("default FFT function is not supported") +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestFftnContiguity: @nd_planning_states([True]) @@ -1120,6 +1130,8 @@ def test_irfft2(self, dtype): ) ) ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestRfftn: @nd_planning_states() @@ -1189,6 +1201,8 @@ def test_irfftn(self, xp, dtype, order, enable_nd): ) ) @pytest.mark.skip("get_fft_plan() is not supported") +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestPlanCtxManagerRfftn: @pytest.fixture(autouse=True) @@ -1269,6 +1283,8 @@ def test_irfftn(self, xp, dtype, enable_nd): ) ) @pytest.mark.skip("default FFT function is not supported") +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="`nd_planning_states` is not thread-safe") class TestRfftnContiguity: @nd_planning_states([True]) diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_basic.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_basic.py index 5b0b486d5c81..1d790eeb7597 100644 --- a/dpnp/tests/third_party/cupy/manipulation_tests/test_basic.py +++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_basic.py @@ -154,18 +154,24 @@ def get_numpy(): @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) @testing.for_all_dtypes() + # cupy._util.PerformanceWarning has no dpnp equivalent + # @pytest.mark.filterwarnings("ignore::cupy._util.PerformanceWarning") def test_copyto_where_multigpu_raises(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 2) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(4) @testing.for_all_dtypes() + # cupy._util.PerformanceWarning has no dpnp equivalent + # @pytest.mark.filterwarnings("ignore::cupy._util.PerformanceWarning") def test_copyto_where_multigpu_raises_4(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 4) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(6) @testing.for_all_dtypes() + # cupy._util.PerformanceWarning has no dpnp equivalent + # @pytest.mark.filterwarnings("ignore::cupy._util.PerformanceWarning") def test_copyto_where_multigpu_raises_6(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 6) @@ -173,6 +179,8 @@ def test_copyto_where_multigpu_raises_6(self, dtype): @testing.multi_gpu(2) @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() + # cupy._util.PerformanceWarning has no dpnp equivalent + # @pytest.mark.filterwarnings("ignore::cupy._util.PerformanceWarning") def test_copyto_multigpu(self, xp, dtype): with cuda.Device(0): a = testing.shaped_arange((2, 3, 4), xp, dtype) @@ -184,6 +192,8 @@ def test_copyto_multigpu(self, xp, dtype): @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) @testing.for_all_dtypes() + # cupy._util.PerformanceWarning has no dpnp equivalent + # @pytest.mark.filterwarnings("ignore::cupy._util.PerformanceWarning") def test_copyto_multigpu_noncontinguous(self, dtype): with cuda.Device(0): src = testing.shaped_arange((2, 3, 4), cupy, dtype) diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_join.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_join.py index 9e8a6b027e85..09c874661aec 100644 --- a/dpnp/tests/third_party/cupy/manipulation_tests/test_join.py +++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_join.py @@ -135,6 +135,8 @@ def test_concatenate_many_multi_dtype(self, xp): return xp.concatenate((a, b) * 1024, axis=1) @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_concatenate_32bit_boundary(self): a = cupy.zeros((2**30,), dtype=cupy.int8) b = cupy.zeros((2**30,), dtype=cupy.int8) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py index b1c1e569ae2f..6d8c803e567f 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py @@ -65,6 +65,8 @@ def test_sum_axis(self, xp, dtype): @testing.slow @testing.numpy_cupy_allclose() + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="too large allocations") def test_sum_axis_huge(self, xp): a = testing.shaped_random((2048, 1, 1024), xp, "b") a = xp.broadcast_to(a, (2048, 1024, 1024)) @@ -232,6 +234,8 @@ def setUp(self): _acc.set_routine_accelerators(old_routine_accelerators) _acc.set_reduction_accelerators(old_reduction_accelerators) + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_contiguous_axes() # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @@ -283,6 +287,8 @@ def test_cub_sum_empty_axis(self, xp, dtype): a = xp.asfortranarray(a) return a.sum(axis=()) + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_contiguous_axes() # prod supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @@ -325,6 +331,8 @@ def test_cub_prod(self, xp, dtype, axis): # TODO(leofang): test axis after support is added # don't test float16 as it's not as accurate? + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_dtypes("bhilBHILfdF") @testing.numpy_cupy_allclose(rtol=1e-4) def test_cub_cumsum(self, xp, dtype): @@ -350,6 +358,8 @@ def test_cub_cumsum(self, xp, dtype): # TODO(leofang): test axis after support is added # don't test float16 as it's not as accurate? + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_dtypes("bhilBHILfdF") @testing.numpy_cupy_allclose(rtol=1e-4) def test_cub_cumprod(self, xp, dtype): @@ -400,8 +410,8 @@ def _mitigate_cumprod(self, xp, dtype, result): @pytest.mark.skip("cutensor is not supported") class TestCuTensorReduction: - @pytest.fixture(autouse=True) - def setUp(self): + @pytest.fixture(autouse=True, scope="class") + def setup(self): old_accelerators = cupy._core.get_routine_accelerators() cupy._core.set_routine_accelerators(["cutensor"]) yield diff --git a/dpnp/tests/third_party/cupy/random_tests/common_distributions.py b/dpnp/tests/third_party/cupy/random_tests/common_distributions.py index 9c500d17258b..c7f7f9776316 100644 --- a/dpnp/tests/third_party/cupy/random_tests/common_distributions.py +++ b/dpnp/tests/third_party/cupy/random_tests/common_distributions.py @@ -38,28 +38,29 @@ class BaseGeneratorTestCase(unittest.TestCase): target_method = None def get_rng(self, xp, seed): - pass + raise NotImplementedError - def set_rng_seed(self, seed): - pass + def set_rng_seed(self, rng, seed): + raise NotImplementedError - def setUp(self): - self.__seed = testing.generate_seed() + def _get_rng_and_seed(self): + seed = testing.generate_seed() # rng will be a new or old generator API object - self.rng = self.get_rng(cupy, self.__seed) + rng = self.get_rng(cupy, seed) + return rng, seed - def _get_generator_func(self, *args, **kwargs): + def _get_generator_func(self, rng, *args, **kwargs): assert isinstance( self.target_method, str ), "generate_method must be overridden" - f = getattr(self.rng, self.target_method) + f = getattr(rng, self.target_method) return lambda: f(*args, **kwargs) - def _generate_check_repro(self, func, seed): + def _generate_check_repro(self, func, rng, seed): # Sample a random array while checking reproducibility - self.set_rng_seed(seed) + self.set_rng_seed(rng, seed) x = func() - self.set_rng_seed(seed) + self.set_rng_seed(rng, seed) y = func() testing.assert_array_equal( x, y, "Randomly generated arrays with the same seed did not match" @@ -69,8 +70,9 @@ def _generate_check_repro(self, func, seed): def generate(self, *args, **kwargs): # Pick one sample from generator. # Reproducibility is checked by repeating seed-and-sample cycle twice. - func = self._get_generator_func(*args, **kwargs) - return self._generate_check_repro(func, self.__seed) + rng, seed = self._get_rng_and_seed() + func = self._get_generator_func(rng, *args, **kwargs) + return self._generate_check_repro(func, rng, seed) def generate_many(self, *args, **kwargs): # Pick many samples from generator. @@ -78,12 +80,13 @@ def generate_many(self, *args, **kwargs): # because it's very slow to set seed every time. _count = kwargs.pop("_count", None) assert _count is not None, "_count is required" - func = self._get_generator_func(*args, **kwargs) + rng, seed = self._get_rng_and_seed() + func = self._get_generator_func(rng, *args, **kwargs) if _count == 0: return [] - vals = [self._generate_check_repro(func, self.__seed)] + vals = [self._generate_check_repro(func, rng, seed)] for _ in range(1, _count): vals.append(func()) return vals @@ -99,7 +102,8 @@ def _check_ks( assert "size" in kwargs # cupy - func = self._get_generator_func(*args, **kwargs) + rng, seed = self._get_rng_and_seed() + func = self._get_generator_func(rng, *args, **kwargs) vals_cupy = func() assert vals_cupy.size > 0 count = 1 + (cupy_len - 1) // vals_cupy.size @@ -111,7 +115,7 @@ def _check_ks( # numpy kwargs["size"] = numpy_len dtype = kwargs.pop("dtype", None) - numpy_rng = self.get_rng(numpy, self.__seed) + numpy_rng = self.get_rng(numpy, seed) vals_numpy = getattr(numpy_rng, self.target_method)(*args, **kwargs) if dtype is not None: vals_numpy = vals_numpy.astype(dtype, copy=False) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index 23a86d88d8ff..5bef94c6d57d 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -82,8 +82,8 @@ class RandomGeneratorTestCase(common_distributions.BaseGeneratorTestCase): def get_rng(self, xp, seed): return xp.random.RandomState(seed=seed) - def set_rng_seed(self, seed): - self.rng.seed(seed) + def set_rng_seed(self, rng, seed): + rng.seed(seed) def _xp_random(xp, method_name): @@ -103,12 +103,8 @@ def f(*args, **kwargs): @testing.fix_random() class TestRandomState(unittest.TestCase): - - def setUp(self): - self.rs = _generator.RandomState(seed=testing.generate_seed()) - def check_seed(self, seed): - rs = self.rs + rs = _generator.RandomState(seed=testing.generate_seed()) rs.seed(seed) xs1 = [rs.uniform() for _ in range(100)] @@ -131,13 +127,15 @@ def test_seed_not_none(self, dtype): @testing.for_dtypes([numpy.complex128]) def test_seed_invalid_type_complex(self, dtype): + rs = _generator.RandomState(seed=testing.generate_seed()) with self.assertRaises(TypeError): - self.rs.seed(dtype(0)) + rs.seed(dtype(0)) @testing.for_float_dtypes() def test_seed_invalid_type_float(self, dtype): + rs = _generator.RandomState(seed=testing.generate_seed()) with self.assertRaises(TypeError): - self.rs.seed(dtype(0)) + rs.seed(dtype(0)) def test_array_seed(self): self.check_seed(numpy.random.randint(0, 2**31, size=40)) @@ -1263,12 +1261,16 @@ def test_choice_invalid_value(self): class TestResetStates(unittest.TestCase): + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="mutates global _generator.") def test_reset_states(self): _generator._random_states = "dummy" _generator.reset_states() assert {} == _generator._random_states +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="mutates global _generator.") class TestGetRandomState(unittest.TestCase): def setUp(self): @@ -1294,6 +1296,8 @@ def test_get_random_state_memoized(self): assert "expected" == rs +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="mutates global _generator.") class TestSetRandomState(unittest.TestCase): def setUp(self): @@ -1335,6 +1339,8 @@ def test_triangular(self): ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="Mutates global rng instance.") class TestRandomStateThreadSafe(unittest.TestCase): def setUp(self): @@ -1389,6 +1395,8 @@ def _f(func, args=()): assert cupy.random.get_random_state() is rs +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="mutates global random states") class TestGetRandomState2(unittest.TestCase): def setUp(self): diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py index 74c689ce664a..1bd858c7f0b8 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py @@ -26,8 +26,8 @@ def get_rng(self, xp, seed): else: return numpy.random.Generator(numpy.random.MT19937(seed)) - def set_rng_seed(self, seed): - self.rng.bit_generator = random._bit_generator.Philox4x3210(seed=seed) + def set_rng_seed(self, rng, seed): + rng.bit_generator = random._bit_generator.Philox4x3210(seed=seed) class InvalidOutsMixin: @@ -333,6 +333,8 @@ class TestDrichlet(common_distributions.Dirichlet, GeneratorTestCase): @testing.slow class TestLarge: + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="allocates large memory") def test_large(self): gen = random.Generator(random.XORWOW(1234)) gen.random(2**31 + 1, dtype=cupy.int8) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_permutations.py b/dpnp/tests/third_party/cupy/random_tests/test_permutations.py index 9a2d3b19f90f..ac36d985b2c0 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_permutations.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_permutations.py @@ -66,6 +66,8 @@ def test_permutation_sort_ndim(self, dtype): # Test seed + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="relies on global random state") @testing.for_all_dtypes() def test_permutation_seed1(self, dtype): flag = cupy.issubdtype(dtype, cupy.unsignedinteger) @@ -128,6 +130,8 @@ def test_shuffle_sort_ndim(self, dtype): # Test seed + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="relies on global random state") @testing.for_all_dtypes() def test_shuffle_seed1(self, dtype): flag = cupy.issubdtype(dtype, cupy.unsignedinteger) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_random.py b/dpnp/tests/third_party/cupy/random_tests/test_random.py index 60b1f391dce4..994667bdcde8 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_random.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_random.py @@ -9,6 +9,8 @@ @pytest.mark.skip("random.get_random_state() is not supported yet") class TestResetSeed(unittest.TestCase): + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="uses global random state") @testing.for_float_dtypes(no_float16=True) def test_reset_seed(self, dtype): rs = random.get_random_state() diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_search.py b/dpnp/tests/third_party/cupy/sorting_tests/test_search.py index 70b076ce1aaa..1e41e8853012 100644 --- a/dpnp/tests/third_party/cupy/sorting_tests/test_search.py +++ b/dpnp/tests/third_party/cupy/sorting_tests/test_search.py @@ -83,6 +83,8 @@ def test_argmax_zero_size_axis1(self, xp, dtype): return a.argmax(axis=1) @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="allocation too large.") def test_argmax_int32_overflow(self): a = testing.shaped_arange((2**32 + 1,), cupy, numpy.float64) assert a.argmax().item() == 2**32 @@ -162,6 +164,8 @@ def test_argmin_zero_size_axis1(self, xp, dtype): return a.argmin(axis=1) @testing.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="allocation too large.") def test_argmin_int32_overflow(self): a = testing.shaped_arange((2**32 + 1,), cupy, numpy.float64) cupy.negative(a, out=a) @@ -186,6 +190,8 @@ def _skip_cuda90(dtype): } ) ) +# thread_unsafe marker requires pytest-run-parallel, not used by dpnp +# @pytest.mark.thread_unsafe(reason="unsafe setUp and counts function calls.") @pytest.mark.skip("The CUB routine is not enabled") class TestCubReduction: diff --git a/dpnp/tests/third_party/cupy/test_numpy_interop.py b/dpnp/tests/third_party/cupy/test_numpy_interop.py index 0409c3fdaadc..c9f86d9a5353 100644 --- a/dpnp/tests/third_party/cupy/test_numpy_interop.py +++ b/dpnp/tests/third_party/cupy/test_numpy_interop.py @@ -164,6 +164,8 @@ def test_asnumpy_out(self): reason="blocking or not is irrelevant when zero-copy is on", ) @pytest.mark.parametrize("blocking", (True, False)) + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="allocation too large.") def test_asnumpy_blocking(self, blocking): prefactor = 4 a = cupy.random.random( From 4975bfa7621a34593e27c1d682162aaa8e99467c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 12:59:45 +0200 Subject: [PATCH 19/37] Prevent hypergeometric infinite loops and other consequences of invalid inputs --- .../cupy/random_tests/test_generator.py | 40 +++++++++++++++++++ .../cupy/random_tests/test_generator_api.py | 40 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index 5bef94c6d57d..8b3f61c7b4de 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -238,6 +238,46 @@ class TestHypergeometric( pass +class TestHypergeometricValidation: + + @pytest.fixture(autouse=True) + def setup(self): + self.rs = _generator.RandomState(seed=0) + + def test_hypergeometric_ngood_negative(self): + with pytest.raises(ValueError): + self.rs.hypergeometric(-1, 10, 5, size=10) + + def test_hypergeometric_nbad_negative(self): + with pytest.raises(ValueError): + self.rs.hypergeometric(10, -1, 5, size=10) + + def test_hypergeometric_nsample_zero(self): + with pytest.raises(ValueError): + self.rs.hypergeometric(10, 10, 0, size=10) + + def test_hypergeometric_nsample_negative(self): + with pytest.raises(ValueError): + self.rs.hypergeometric(10, 10, -1, size=10) + + def test_hypergeometric_nsample_too_large(self): + with pytest.raises(ValueError): + self.rs.hypergeometric(5, 10, 16, size=10) + + def test_hypergeometric_nsample_equals_total(self): + # nsample == ngood + nbad is valid (deterministic) + out = self.rs.hypergeometric(5, 10, 15, size=10) + testing.assert_array_equal(out, cupy.full(10, 5)) + + def test_hypergeometric_ngood_zero(self): + out = self.rs.hypergeometric(0, 10, 5, size=10) + testing.assert_array_equal(out, cupy.zeros(10)) + + def test_hypergeometric_nbad_zero(self): + out = self.rs.hypergeometric(5, 0, 5, size=10) + testing.assert_array_equal(out, cupy.full(10, 5)) + + @testing.fix_random() class TestLaplace(RandomGeneratorTestCase): diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py index 1bd858c7f0b8..3ed6032029d2 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py @@ -265,6 +265,46 @@ class TestHypergeometric( pass +class TestHypergeometricValidation: + + @pytest.fixture(autouse=True) + def setup(self): + self.gen = random.default_rng(seed=0) + + def test_hypergeometric_ngood_negative(self): + with pytest.raises(ValueError): + self.gen.hypergeometric(-1, 10, 5, size=10) + + def test_hypergeometric_nbad_negative(self): + with pytest.raises(ValueError): + self.gen.hypergeometric(10, -1, 5, size=10) + + def test_hypergeometric_nsample_negative(self): + with pytest.raises(ValueError): + self.gen.hypergeometric(10, 10, -1, size=10) + + def test_hypergeometric_nsample_too_large(self): + with pytest.raises(ValueError): + self.gen.hypergeometric(5, 10, 16, size=10) + + def test_hypergeometric_nsample_zero(self): + # Generator API allows nsample=0 (returns zeros), unlike legacy API + out = self.gen.hypergeometric(5, 10, 0, size=10) + testing.assert_array_equal(out, cupy.zeros(10, dtype=cupy.int64)) + + def test_hypergeometric_nsample_equals_total(self): + out = self.gen.hypergeometric(5, 10, 15, size=10) + testing.assert_array_equal(out, cupy.full(10, 5, dtype=cupy.int64)) + + def test_hypergeometric_ngood_zero(self): + out = self.gen.hypergeometric(0, 10, 5, size=10) + testing.assert_array_equal(out, cupy.zeros(10, dtype=cupy.int64)) + + def test_hypergeometric_nbad_zero(self): + out = self.gen.hypergeometric(5, 0, 5, size=10) + testing.assert_array_equal(out, cupy.full(10, 5, dtype=cupy.int64)) + + @testing.parameterize(*common_distributions.power_params) @testing.fix_random() class TestPower(common_distributions.Power, GeneratorTestCase): From 09f2ce6f6f1471d7ed0f9b0f13632046201b43db Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:02:32 +0200 Subject: [PATCH 20/37] Fixup some more tests (mainly cupyx) for free-threading --- dpnp/tests/third_party/cupy/core_tests/test_raw.py | 6 ++++-- dpnp/tests/third_party/cupy/statistics_tests/test_order.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_raw.py b/dpnp/tests/third_party/cupy/core_tests/test_raw.py index 2d3b8c153b86..e582a4f4f31e 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_raw.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_raw.py @@ -8,6 +8,7 @@ import subprocess import sys import tempfile +import threading import unittest from unittest import mock @@ -580,8 +581,9 @@ def _generate_file(self, ext: str): code = compiler._convert_to_hip_source(_test_source5, None, False) # split() is needed because nvcc could come from the env var NVCC cmd = cc.split() - source = "{}/test_load_cubin.cu".format(self.cache_dir) - file_path = self.cache_dir + "test_load_cubin" + thread_id = threading.get_ident() + source = f"{self.cache_dir}/test_load_cubin_{thread_id}.cu" + file_path = self.cache_dir + f"test_load_cubin_{thread_id}" with open(source, "w") as f: f.write(code) if not cupy.cuda.runtime.is_hip: diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py index ba15fc16b078..97d464fa63f2 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py @@ -65,6 +65,8 @@ def test_percentile_unexpected_method(self, dtype): # See gh-4453 @testing.for_float_dtypes() + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="allocator setting not thread-safe") def test_percentile_memory_access(self, dtype): # Create an allocator that guarantees array allocated in # cupy.percentile call will be followed by a NaN From b10344f8ea62e70a1ba51fa1575fabd8c26e7797 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:06:02 +0200 Subject: [PATCH 21/37] Fix incomplete size guard for CUB segmented reduce and scan --- .../cupy/math_tests/test_sumprod.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py index 6d8c803e567f..86998a0adf1f 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py @@ -398,6 +398,123 @@ def _mitigate_cumprod(self, xp, dtype, result): return result +INT32_MAX = numpy.iinfo(numpy.int32).max + + +# CUB is not supported by dpnp; the original skipif on cupy.cuda.cub.available +# cannot be evaluated (dpnp has no cupy.cuda), so skip unconditionally. +# @pytest.mark.skipif( +# not cupy.cuda.cub.available, reason="The CUB routine is not enabled") +@pytest.mark.skip("CUB reduction is not supported") +@testing.slow +class TestReductionSizeOverInt32Max: + + @pytest.fixture(autouse=True) + def _cub_device_and_memory(self): + cupy.get_default_memory_pool().free_all_blocks() + cupy.get_default_pinned_memory_pool().free_all_blocks() + old_routine = _acc.get_routine_accelerators() + old_red = _acc.get_reduction_accelerators() + _acc.set_routine_accelerators(["cub"]) + _acc.set_reduction_accelerators([]) + yield + _acc.set_routine_accelerators(old_routine) + _acc.set_reduction_accelerators(old_red) + cupy.get_default_memory_pool().free_all_blocks() + cupy.get_default_pinned_memory_pool().free_all_blocks() + + @pytest.mark.parametrize( + "shape,axis,dtype,part", + [ + ((INT32_MAX + 1024,), None, "int8", "first_part"), + ((4, 2**30 + 512), 1, "float32", "second_part"), + ((INT32_MAX + 1024, 2), 0, "int8", "first_part"), + ((INT32_MAX + 1024, 2), 1, "int32", "second_part"), + ], + ) + def test_reduce(self, shape, axis, dtype, part): + try: + a = cupy.ones(shape, dtype=dtype) + # Make first and last element along each slice interesting + if axis is None: + a[[0, -1]] = [3, -1] + elif axis == 0: + a[[0, -1], :] = [[3], [-1]] + else: + a[:, [0, -1]] = [[3, -1]] + + # Test only half of the reductions per test for better speed + # (it is still very slow.) + if part == "first_part": + if axis is None: + # Full reduction: one segment, one 2 and (size-1) ones + assert a.sum() == a.size + assert a.max() == 3 + assert a.argmin() == a.size - 1 + else: + s = a.sum(axis=axis) + expected_sum = shape[axis] + testing.assert_array_equal( + s, cupy.full(s.shape, expected_sum, dtype=s.dtype) + ) + testing.assert_array_equal( + a.max(axis=axis), cupy.full(s.shape, 3, dtype=dtype) + ) + testing.assert_array_equal( + a.argmin(axis), + cupy.full(s.shape, a.shape[axis] - 1), + ) + else: + if axis is None: + # Full reduction: one segment, one 2 and (size-1) ones + assert a.prod() == -3 + assert a.min() == -1 + assert a.argmax() == 0 + else: + p = a.prod(axis=axis) + testing.assert_array_equal( + p, cupy.full(p.shape, -3, dtype=p.dtype) + ) + testing.assert_array_equal( + a.min(axis=axis), cupy.full(p.shape, -1, dtype=dtype) + ) + testing.assert_array_equal( + a.argmax(axis), cupy.full(p.shape, 0) + ) + except MemoryError: + pytest.skip("out of memory in test.") + + @pytest.mark.parametrize("dtype", [numpy.int8, numpy.int32, numpy.float32]) + def test_cumsum_size_over_int32_max(self, dtype): + """CUB device_scan with size > INT32_MAX.""" + try: + n = INT32_MAX + 1024 + a = cupy.ones(n, dtype=dtype) + a[0] = 3 + a[-1] = -1 + out = a.cumsum() + expected = n + if dtype in (numpy.float32, numpy.float64): + testing.assert_allclose(float(out[-1]), expected, rtol=2e-4) + else: + assert int(out[-1]) == expected + except MemoryError: + pytest.skip("out of memory in test.") + + @pytest.mark.parametrize("dtype", [numpy.int8, numpy.int32, numpy.float32]) + def test_cumprod_size_over_int32_max(self, dtype): + """CUB device_scan (cumprod) with size > INT32_MAX.""" + try: + n = INT32_MAX + 1024 + a = cupy.ones(n, dtype=dtype) + a[0] = 2 + a[-1] = 3 + out = a.cumprod() + assert out[-1] == 6 # product of array + except MemoryError: + pytest.skip("out of memory in test.") + + # This class compares cuTENSOR results against NumPy's @testing.parameterize( *testing.product( From 4f5e9712ef170fcd7f24663075c091c2e09eeda6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:17:37 +0200 Subject: [PATCH 22/37] Fix regression for 32bit index flag in .real and broadcast --- .../cupy/creation_tests/test_basic.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py index c6d5f371e52f..9a5e349bf1a1 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py @@ -337,6 +337,61 @@ def test_full_like_subok(self): with pytest.raises(NotImplementedError): cupy.full_like(a, 1, subok=True) + @pytest.mark.skip("_index_32_bits attribute is not supported by dpnp") + @pytest.mark.slow + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe(reason="large allocations") + @pytest.mark.parametrize( + "arr_factory,expected", + [ + (lambda: cupy.empty(2**31 - 1, dtype=cupy.int8), True), + (lambda: cupy.empty(2**31, dtype=cupy.int8), True), + (lambda: cupy.empty(2**31 + 1, dtype=cupy.int8)[::2], False), + (lambda: cupy.empty(2**31 // 8, dtype=cupy.complex64), True), + (lambda: cupy.empty(2**31 // 8 + 1, dtype=cupy.complex64), False), + # Regression test for gh-9750: + (lambda: cupy.empty(2**31 // 8, dtype=cupy.complex64).real, True), + ( + lambda: cupy.empty(2**31 // 8 + 1, dtype=cupy.complex64).real, + False, + ), + # broadcasting also causes this, test both broadcast_to and normal: + ( + lambda: cupy.broadcast_to( + cupy.empty(2**30 + 1, dtype=cupy.int8), (2, 2**30 + 1) + ), + False, + ), + ( + lambda: cupy.broadcast_arrays( + cupy.empty(2**30 + 1, dtype=cupy.int8), cupy.empty((2, 1)) + )[0], + False, + ), + # Also test raw "broadcasting path": + ( + lambda: cupy.ndarray( + shape=(2**30 + 1, 2), strides=(1, 0), dtype=cupy.int8 + ), + False, + ), + # These ones are debatable, the start pointers are OK, but the + # range extends beyond 32bits on a byte level: + (lambda: cupy.empty((2**31 + 1) // 3, dtype="i1,i1,i1"), False), + # Same cupy.byte_bounds as above, but strided + # (size * itemsize is OK): + ( + lambda: cupy.empty((2**31 + 1) // 3, dtype="i1,i1,i1")[ + ::2 + ].view(), + False, + ), + ], + ) + def test_index_32_bits(self, arr_factory, expected): + assert arr_factory()._index_32_bits == expected + cupy.get_default_memory_pool().free_all_blocks() + @testing.parameterize( *testing.product( From 764216be3126d2d57492ae57dd21cac3a01fca12 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:20:36 +0200 Subject: [PATCH 23/37] Skip test_solve_singular_empty on NumPy >= 2.4 --- dpnp/tests/third_party/cupy/linalg_tests/test_solve.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py index bac6591bb7f0..856a8f66af2d 100644 --- a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py +++ b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py @@ -80,6 +80,7 @@ def check_shape(self, a_shape, b_shape, error_types): # NumPy with OpenBLAS returns an empty array # while numpy with OneMKL raises LinAlgError @pytest.mark.skip("Undefined behavior") + @testing.with_requires("numpy<2.4") @testing.numpy_cupy_allclose() def test_solve_singular_empty(self, xp): a = xp.zeros((3, 3)) # singular From aeed1edfb32663d624d6aac654784b9a55e25fd6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:23:58 +0200 Subject: [PATCH 24/37] Cherry pick rocm fixes --- .../third_party/cupy/random_tests/test_bit_generator.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_bit_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_bit_generator.py index a94202cf19b7..339b2457c490 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_bit_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_bit_generator.py @@ -49,9 +49,6 @@ def test_array_seed(self): @testing.with_requires("numpy>=1.17.0") @testing.fix_random() -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason="HIP does not support this" -) class TestBitGeneratorXORWOW(BitGeneratorTestCase, unittest.TestCase): def setUp(self): super().setUp() @@ -60,9 +57,6 @@ def setUp(self): @testing.with_requires("numpy>=1.17.0") @testing.fix_random() -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason="HIP does not support this" -) class TestBitGeneratorMRG32k3a(BitGeneratorTestCase, unittest.TestCase): def setUp(self): super().setUp() @@ -71,9 +65,6 @@ def setUp(self): @testing.with_requires("numpy>=1.17.0") @testing.fix_random() -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason="HIP does not support this" -) class TestBitGeneratorPhilox4x3210(BitGeneratorTestCase, unittest.TestCase): def setUp(self): super().setUp() From 65197e2e14f7f4b65edaca0f94ea922ea1a1191f Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:25:53 +0200 Subject: [PATCH 25/37] Make cutensor bindings threadsafe (and some small fixes) --- dpnp/tests/third_party/cupy/core_tests/test_raw.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_raw.py b/dpnp/tests/third_party/cupy/core_tests/test_raw.py index e582a4f4f31e..43002cba815e 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_raw.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_raw.py @@ -450,12 +450,8 @@ def tearDown(self): # kernel uses nvcc, with which I/O cannot be avoided files = os.listdir(self.cache_dir) for f in files: - if f == "test_load_cubin.cu": - count = 1 - break - else: - count = 0 - assert len(files) == count + # only test_load_cubin_*.cu files should be present + assert re.match(r"test_load_cubin_(\d+)\.cu", f) self.in_memory_context.__exit__(*sys.exc_info()) self.temporary_cache_dir_context.__exit__(*sys.exc_info()) From 6c56f3b6fc51fc1128e37a35a39430bc92b3f900 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:27:44 +0200 Subject: [PATCH 26/37] Validate hypergeometric inputs without syncing --- .../third_party/cupy/random_tests/test_generator.py | 11 +++++++---- .../cupy/random_tests/test_generator_api.py | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index 8b3f61c7b4de..ecd545c7cd63 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -260,15 +260,18 @@ def test_hypergeometric_nsample_negative(self): with pytest.raises(ValueError): self.rs.hypergeometric(10, 10, -1, size=10) - def test_hypergeometric_nsample_too_large(self): - with pytest.raises(ValueError): - self.rs.hypergeometric(5, 10, 16, size=10) - def test_hypergeometric_nsample_equals_total(self): # nsample == ngood + nbad is valid (deterministic) out = self.rs.hypergeometric(5, 10, 15, size=10) testing.assert_array_equal(out, cupy.full(10, 5)) + def test_hypergeometric_nsample_exceeds_total(self): + # nsample > ngood + nbad would previously cause an infinite + # loop in the HRUA kernel. The kernel now routes this through + # the HYP path which handles it safely. + out = self.rs.hypergeometric(5, 10, 16, size=10) + testing.assert_array_equal(out, cupy.full(10, 5)) + def test_hypergeometric_ngood_zero(self): out = self.rs.hypergeometric(0, 10, 5, size=10) testing.assert_array_equal(out, cupy.zeros(10)) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py index 3ed6032029d2..1e03d0a4246b 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator_api.py @@ -283,10 +283,6 @@ def test_hypergeometric_nsample_negative(self): with pytest.raises(ValueError): self.gen.hypergeometric(10, 10, -1, size=10) - def test_hypergeometric_nsample_too_large(self): - with pytest.raises(ValueError): - self.gen.hypergeometric(5, 10, 16, size=10) - def test_hypergeometric_nsample_zero(self): # Generator API allows nsample=0 (returns zeros), unlike legacy API out = self.gen.hypergeometric(5, 10, 0, size=10) @@ -296,6 +292,13 @@ def test_hypergeometric_nsample_equals_total(self): out = self.gen.hypergeometric(5, 10, 15, size=10) testing.assert_array_equal(out, cupy.full(10, 5, dtype=cupy.int64)) + def test_hypergeometric_nsample_exceeds_total(self): + # nsample > ngood + nbad would previously cause an infinite + # loop in the HRUA kernel. The kernel now routes this through + # the HYP path which handles it safely. + out = self.gen.hypergeometric(5, 10, 16, size=10) + testing.assert_array_equal(out, cupy.full(10, 5, dtype=cupy.int64)) + def test_hypergeometric_ngood_zero(self): out = self.gen.hypergeometric(0, 10, 5, size=10) testing.assert_array_equal(out, cupy.zeros(10, dtype=cupy.int64)) From b6a26dcf3632ddb8ce8baf712c8c43670a8732f1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:29:37 +0200 Subject: [PATCH 27/37] Remove NumericTraits specializations for complex types --- dpnp/tests/third_party/cupy/math_tests/test_sumprod.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py index 86998a0adf1f..c102a88a8153 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py @@ -333,7 +333,7 @@ def test_cub_prod(self, xp, dtype, axis): # don't test float16 as it's not as accurate? # thread_unsafe marker requires pytest-run-parallel, not used by dpnp # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") - @testing.for_dtypes("bhilBHILfdF") + @testing.for_dtypes("bhilBHILfdFD") @testing.numpy_cupy_allclose(rtol=1e-4) def test_cub_cumsum(self, xp, dtype): if self.backend == "block": @@ -360,7 +360,7 @@ def test_cub_cumsum(self, xp, dtype): # don't test float16 as it's not as accurate? # thread_unsafe marker requires pytest-run-parallel, not used by dpnp # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") - @testing.for_dtypes("bhilBHILfdF") + @testing.for_dtypes("bhilBHILfdFD") @testing.numpy_cupy_allclose(rtol=1e-4) def test_cub_cumprod(self, xp, dtype): if self.backend == "block": From 60dd6168622defb42e96a7786933c9b2cef82fc5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:33:04 +0200 Subject: [PATCH 28/37] Fix silent corruption in thrust sort/argsort/lexsort under --- .../cupy/sorting_tests/test_sort.py | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py index 3bf1c405d0d0..9a6ab8981711 100644 --- a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py +++ b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py @@ -454,6 +454,88 @@ def test_sort_complex_nan(self, xp, dtype): return a, xp.sort_complex(a) +@pytest.mark.skip( + "cupy.cuda.using_allocator / memory pool internals are not supported" +) +class TestThrustWorkspaceOOM: + """Regression tests for cupy/cupy#9894. + + When thrust's workspace allocation fails, sort/argsort/lexsort must + raise ``MemoryError`` instead of silently producing corrupt results. + + Each op may make several pre-thrust allocations (e.g. ``data.copy()`` + and ``idx_array``) before reaching thrust. Failing the *first* + allocation only exercises pre-existing OOM behavior, not this fix. + To target thrust's workspace specifically, we count allocations during + a successful run, then re-run with the *last* allocation forced to + fail. Since thrust is called last in each routine, the final + allocation is always inside thrust's workspace request. + """ + + @staticmethod + def _verify_workspace_oom_raises(op): + pool = cupy.get_default_memory_pool() + n = [0] + + def counting(size): + n[0] += 1 + return pool.malloc(size) + + with cupy.cuda.using_allocator(counting): + op() + assert n[0] >= 1, "expected at least one allocation" + total = n[0] + + seen = [0] + + def fail_on_last(size): + seen[0] += 1 + if seen[0] >= total: + raise cupy.cuda.memory.OutOfMemoryError(size, 0, 0) + return pool.malloc(size) + + with cupy.cuda.using_allocator(fail_on_last): + with pytest.raises(MemoryError): + op() + + def test_sort_workspace_oom(self): + self._verify_workspace_oom_raises( + lambda: cupy.arange(100_000, dtype=cupy.float32).sort() + ) + + def test_argsort_workspace_oom(self): + self._verify_workspace_oom_raises( + lambda: cupy.arange(100_000, dtype=cupy.float32).argsort() + ) + + def test_lexsort_workspace_oom(self): + self._verify_workspace_oom_raises( + lambda: cupy.lexsort( + cupy.arange(100_000, dtype=cupy.float32).reshape(2, 50_000) + ) + ) + + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe( + # reason="contextlib.redirect_stderr replaces sys.stderr globally") + def test_no_stderr_noise_on_workspace_oom(self): + # The thrust allocator's `noexcept`-driven stderr trace was + # confusing to users (cupy/cupy#9894). After the fix, OOM produces a + # clean MemoryError with no "Exception ignored" trace and no + # OutOfMemoryError print on stderr. + import contextlib + import io + + buf = io.StringIO() + with contextlib.redirect_stderr(buf): + self._verify_workspace_oom_raises( + lambda: cupy.arange(100_000, dtype=cupy.float32).sort() + ) + stderr = buf.getvalue() + assert "Exception ignored" not in stderr, stderr + assert "OutOfMemoryError" not in stderr, stderr + + @testing.parameterize( *testing.product( { From b72960efa54931eda1dae0b5ee5e5ecdfc214895 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:35:42 +0200 Subject: [PATCH 29/37] Remove test_assumed_runtime_version --- .../cupy/cuda_tests/test_runtime.py | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py b/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py index 0e432da5b653..d6a8b3bd5f57 100644 --- a/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py +++ b/dpnp/tests/third_party/cupy/cuda_tests/test_runtime.py @@ -1,7 +1,6 @@ from __future__ import annotations import pickle -import sys import pytest @@ -14,7 +13,6 @@ # CUDA runtime internals have no dpnp equivalent # from cupy.cuda import driver -# from cupy.cuda import nvrtc # from cupy.cuda import runtime @@ -62,25 +60,6 @@ def test_mallocFromPoolAsync(self): runtime.memPoolDestroy(pool) -@pytest.mark.skipif( - runtime.is_hip, reason="This assumption is correct only in CUDA" -) -def test_assumed_runtime_version(): - # When CUDA Python is enabled, CuPy calculates the CUDA runtime version - # from NVRTC version. This test ensures that the assumption is correct - # by running the same logic in non-CUDA Python environment. - # When this fails, `runtime.runtimeGetVersion()` logic needs to be fixed. - major, minor = nvrtc.getVersion() - local_ver = runtime._getLocalRuntimeVersion() - # On Windows, starting from CUDA 13.0, cudaRuntimeGetVersion() always - # returns major * 1000 regardless of the minor version (nvbugs 5955788, - # 5523579). Accept either form on Windows + CUDA >= 13. - if sys.platform == "win32" and major >= 13: - assert local_ver in (major * 1000, major * 1000 + minor * 10) - else: - assert local_ver == major * 1000 + minor * 10 - - def test_major_version(): major = runtime._getCUDAMajorVersion() if runtime.is_hip: From 1fc346184826853bc4a11bb9ff898115e0c109c8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:40:06 +0200 Subject: [PATCH 30/37] Avoid hard pytest dependency in cupy.testing (and test) --- dpnp/tests/third_party/cupy/test_init.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dpnp/tests/third_party/cupy/test_init.py b/dpnp/tests/third_party/cupy/test_init.py index a4f1ad78f7f5..2a533e94dc89 100644 --- a/dpnp/tests/third_party/cupy/test_init.py +++ b/dpnp/tests/third_party/cupy/test_init.py @@ -57,6 +57,28 @@ def test_import_error(self): assert stdoutdata in (b"", b"RuntimeError\n") +@pytest.mark.skip("dpnp has no public dpnp.testing module") +def test_testing_import_does_not_require_pytest(): + # cupy.testing import is lazy, but some environments tend to inspect + # it anyway. Check that even an * import doesn't require pytest. + returncode, stdoutdata, stderrdata = _run_script(""" +import sys + +class BlockPytest: + def find_spec(self, fullname, path=None, target=None): + if fullname == 'pytest' or fullname.startswith('_pytest'): + raise ModuleNotFoundError(fullname) + return None +import sys +sys.meta_path.insert(0, BlockPytest()) + +# non-lazy import should succeed even if pytest is not available +from dpnp.testing import * +""") + assert returncode == 0, "stderr: {!r}".format(stderrdata) + assert stdoutdata == b"" + + # if not cupy.cuda.runtime.is_hip: # visible = "CUDA_VISIBLE_DEVICES" # else: From 5b5747b276fe1947f196a14e16620a503380a9f1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:42:28 +0200 Subject: [PATCH 31/37] Advertise free-threading support and add linux CI run --- dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py index 2adcbfe6ed32..9765f4c10fa0 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py @@ -148,6 +148,9 @@ def test_can_use_cub_oversize_input4(self): b = cupy.empty((), dtype=cupy.int8) assert self.can_use([a], [b], (1,), (0,)) is None + # thread_unsafe marker requires pytest-run-parallel, not used by dpnp + # @pytest.mark.thread_unsafe( + # reason="AssertFunctionIsCalled and accelerate mutation.") def test_can_use_accelerator_set_unset(self): # ensure we use CUB block reduction and not CUB device reduction old_routine_accelerators = _accelerator.get_routine_accelerators() From 1f981c489dd0bfb42c7d81fadc00bd9ab71e52fc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:44:32 +0200 Subject: [PATCH 32/37] Use cuda.pathfinder for CUDA component discovery --- .../third_party/cupy/core_tests/test_include.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_include.py b/dpnp/tests/third_party/cupy/core_tests/test_include.py index 1e738f7977bf..cc593d0c9a77 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_include.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_include.py @@ -1,7 +1,5 @@ from __future__ import annotations -import os - import pytest import dpnp as cupy @@ -58,12 +56,17 @@ def _get_cuda_archs(self): return archs def _get_options(self): - return ( + from cuda.pathfinder import find_nvidia_header_directory + + include_dir = find_nvidia_header_directory("cudart") + opts = [ "-std=c++17", *cupy._core.core._get_cccl_include_options(), "-I{}".format(cupy._core.core._get_header_dir_path()), - "-I{}".format(os.path.join(cupy.cuda.get_cuda_path(), "include")), - ) + ] + if include_dir is not None: + opts.append("-I{}".format(include_dir)) + return tuple(opts) def test_nvcc(self): options = self._get_options() From e629924ea46635b12c622dc48b6e36b47f5622cc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:51:22 +0200 Subject: [PATCH 33/37] Assert cupy.linalg.solve throws LinAlgError --- .../cupy/linalg_tests/test_solve.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py index 856a8f66af2d..3b789480fd8b 100644 --- a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py +++ b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py @@ -76,18 +76,31 @@ def check_shape(self, a_shape, b_shape, error_types): with pytest.raises(error_type): xp.linalg.solve(a, b) - # Undefined behavior is implementation-dependent: - # NumPy with OpenBLAS returns an empty array - # while numpy with OneMKL raises LinAlgError - @pytest.mark.skip("Undefined behavior") - @testing.with_requires("numpy<2.4") + def test_solve_singular_empty(self): + a = cupy.zeros((3, 3)) # singular + b = cupy.empty((3, 0)) # nrhs = 0 + c = cupy.linalg.solve(a, b) + assert c.size == 0 + @testing.numpy_cupy_allclose() - def test_solve_singular_empty(self, xp): - a = xp.zeros((3, 3)) # singular + def test_solve_non_singular_empty(self, xp): + a = xp.eye(3) # non-singular b = xp.empty((3, 0)) # nrhs = 0 - # LinAlgError("Singular matrix") is not raised return xp.linalg.solve(a, b) + @pytest.mark.skip("cupyx.errstate(linalg='raise') is not supported") + def test_solve_singular_empty__assert_raises(self): + # OpenBLAS with NumPy 2.4.3 started raising a LinAlgError here, + # which seems correct. We raise currently (do not test against + # NumPy as the behavior may depend on the BLAS version used) + a = cupy.zeros((3, 3)) # singular + b = cupy.empty((3, 0)) # nrhs = 0 + # errstate is 'ignore' by default since enabling it causes + # synchronization + with cupyx.errstate(linalg="raise"): + with pytest.raises(numpy.linalg.LinAlgError): + cupy.linalg.solve(a, b) + @testing.with_requires("numpy>=2.0") def test_invalid_shape(self): linalg_errors = { From 52aaf4871543e42cee8ccc07cb4dccc13ddb83f5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:54:54 +0200 Subject: [PATCH 34/37] Drop stale xfail on TestChoiceChi.test_goodness_of_fit_2 --- dpnp/tests/third_party/cupy/random_tests/test_generator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index ecd545c7cd63..1cda3a3dc7cf 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -893,7 +893,6 @@ def test_goodness_of_fit(self): assert _hypothesis.chi_square_test(counts, expected) @_condition.repeat(3, 10) - # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug") def test_goodness_of_fit_2(self): vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get() counts = numpy.histogram(vals, bins=numpy.arange(4))[0] From f099e5900c87a3b4d6508e849f8389d89751ca3b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 13:57:40 +0200 Subject: [PATCH 35/37] Make new pytest versions happy --- dpnp/tests/third_party/cupy/math_tests/test_sumprod.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py index c102a88a8153..e6411e298fdd 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py @@ -528,7 +528,8 @@ def test_cumprod_size_over_int32_max(self, dtype): class TestCuTensorReduction: @pytest.fixture(autouse=True, scope="class") - def setup(self): + @classmethod + def setup(cls): old_accelerators = cupy._core.get_routine_accelerators() cupy._core.set_routine_accelerators(["cutensor"]) yield From 4787a251d55a99eda6476e65a916db3224c63e1e Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 14:15:12 +0200 Subject: [PATCH 36/37] Add pytest support to @testing.for_contiguous_axes decorator --- .../cupy/core_tests/test_cub_reduction.py | 39 +- .../cupy/core_tests/test_ndarray_reduction.py | 351 ++++++++--------- .../cupy/math_tests/test_sumprod.py | 358 ++++++++---------- dpnp/tests/third_party/cupy/testing/_loops.py | 6 +- 4 files changed, 341 insertions(+), 413 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py index 9765f4c10fa0..0bbc1296a3f7 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py @@ -1,7 +1,6 @@ from __future__ import annotations import sys -import unittest from itertools import combinations import pytest @@ -21,13 +20,14 @@ # This test class and its children below only test if CUB backend can be used # or not; they don't verify its correctness as it's already extensively covered # by existing tests -class CubReductionTestBase(unittest.TestCase): +class CubReductionTestBase: """ Note: call self.can_use() when arrays are already allocated, otherwise call self._test_can_use(). """ - def setUp(self): + @pytest.fixture(autouse=True) + def configure(self): if _environment.get_cub_path() is None: pytest.skip("CUB not found") if cupy.cuda.runtime.is_hip: @@ -38,8 +38,7 @@ def setUp(self): self.old_accelerators = _accelerator.get_reduction_accelerators() _accelerator.set_reduction_accelerators(["cub"]) - - def tearDown(self): + yield _accelerator.set_reduction_accelerators(self.old_accelerators) def _test_can_use(self, i_shape, o_shape, r_axis, o_axis, order, expected): @@ -53,40 +52,32 @@ def _test_can_use(self, i_shape, o_shape, r_axis, o_axis, order, expected): assert result is expected -@testing.parameterize( - *testing.product( - { - "shape": [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)], - "order": ("C", "F"), - } - ) -) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize("order", ["C", "F"]) class TestSimpleCubReductionKernelContiguity(CubReductionTestBase): @testing.for_contiguous_axes() - def test_can_use_cub_contiguous(self, axis): + def test_can_use_cub_contiguous(self, axis, shape, order): r_axis = axis - i_shape = self.shape + i_shape = shape o_axis = tuple(i for i in range(len(i_shape)) if i not in r_axis) - o_shape = tuple(self.shape[i] for i in o_axis) - self._test_can_use(i_shape, o_shape, r_axis, o_axis, self.order, True) + o_shape = tuple(shape[i] for i in o_axis) + self._test_can_use(i_shape, o_shape, r_axis, o_axis, order, True) @testing.for_contiguous_axes() - def test_can_use_cub_non_contiguous(self, axis): + def test_can_use_cub_non_contiguous(self, axis, shape, order): # array is contiguous, but reduce_axis is not - dim = len(self.shape) + dim = len(shape) r_dim = len(axis) non_contiguous_axes = [ i for i in combinations(range(dim), r_dim) if i != axis ] - i_shape = self.shape + i_shape = shape for r_axis in non_contiguous_axes: o_axis = tuple(i for i in range(dim) if i not in r_axis) - o_shape = tuple(self.shape[i] for i in o_axis) - self._test_can_use( - i_shape, o_shape, r_axis, o_axis, self.order, False - ) + o_shape = tuple(shape[i] for i in o_axis) + self._test_can_use(i_shape, o_shape, r_axis, o_axis, order, False) class TestSimpleCubReductionKernelMisc(CubReductionTestBase): diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py index a0fa09d7661d..b774cdff4394 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py @@ -1,5 +1,7 @@ from __future__ import annotations +from itertools import combinations + import numpy import pytest @@ -7,13 +9,7 @@ from dpnp.tests.third_party.cupy import testing -@testing.parameterize( - *testing.product( - { - "order": ("C", "F"), - } - ) -) +@pytest.mark.parametrize("order", ["C", "F"]) class TestArrayReduction: @pytest.fixture(scope="class") @@ -21,17 +17,17 @@ class TestArrayReduction: def exclude_cutensor(cls): # cuTENSOR seems to have issues in handling inf/nan in reduction-based # routines, so we use this fixture to skip testing it - # self.old_routine_accelerators = _acc.get_routine_accelerators() - # self.old_reduction_accelerators = _acc.get_reduction_accelerators() + # old_routine_accelerators = _acc.get_routine_accelerators() + # old_reduction_accelerators = _acc.get_reduction_accelerators() - # rot_acc = self.old_routine_accelerators.copy() + # rot_acc = old_routine_accelerators.copy() # try: # rot_acc.remove(_acc.ACCELERATOR_CUTENSOR) # except ValueError: # pass # _acc.set_routine_accelerators(rot_acc) - # red_acc = self.old_reduction_accelerators.copy() + # red_acc = old_reduction_accelerators.copy() # try: # red_acc.remove(_acc.ACCELERATOR_CUTENSOR) # except ValueError: @@ -40,343 +36,318 @@ def exclude_cutensor(cls): # yield - # _acc.set_routine_accelerators(self.old_routine_accelerators) - # _acc.set_reduction_accelerators(self.old_reduction_accelerators) + # _acc.set_routine_accelerators(old_routine_accelerators) + # _acc.set_reduction_accelerators(old_reduction_accelerators) pass @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_all(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_max_all(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.max() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_all_keepdims(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_max_all_keepdims(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.max(keepdims=True) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_axis_large(self, xp, dtype): - a = testing.shaped_random((3, 1000), xp, dtype, order=self.order) + def test_max_axis_large(self, xp, dtype, order): + a = testing.shaped_random((3, 1000), xp, dtype, order=order) return a.max(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_axis0(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_max_axis0(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.max(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_axis1(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_max_axis1(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.max(axis=1) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_axis2(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_max_axis2(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.max(axis=2) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_multiple_axes(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_max_multiple_axes(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.max(axis=(1, 2)) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_multiple_axes_keepdims(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_max_multiple_axes_keepdims(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.max(axis=(1, 2), keepdims=True) @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_nan(self, xp, dtype, exclude_cutensor): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_max_nan(self, exclude_cutensor, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.max() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_nan_real(self, xp, dtype): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_max_nan_real(self, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.max() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_nan_imag(self, xp, dtype): - a = xp.array( - [float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=self.order - ) + def test_max_nan_imag(self, xp, dtype, order): + a = xp.array([float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=order) return a.max() @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_max_inf(self, exclude_cutensor, xp, dtype): + def test_max_inf(self, exclude_cutensor, xp, dtype, order): # cupy/cupy#8180 - a = xp.array([-float("inf"), -float("inf")], dtype, order=self.order) + a = xp.array([-float("inf"), -float("inf")], dtype, order=order) return a.max() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_all(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_min_all(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.min() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_all_keepdims(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_min_all_keepdims(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.min(keepdims=True) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_axis_large(self, xp, dtype): - a = testing.shaped_random((3, 1000), xp, dtype, order=self.order) + def test_min_axis_large(self, xp, dtype, order): + a = testing.shaped_random((3, 1000), xp, dtype, order=order) return a.min(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_axis0(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_min_axis0(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.min(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_axis1(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_min_axis1(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.min(axis=1) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_axis2(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_min_axis2(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.min(axis=2) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_multiple_axes(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_min_multiple_axes(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.min(axis=(1, 2)) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_multiple_axes_keepdims(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_min_multiple_axes_keepdims(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.min(axis=(1, 2), keepdims=True) @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_nan(self, xp, dtype, exclude_cutensor): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_min_nan(self, exclude_cutensor, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.min() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_nan_real(self, xp, dtype): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_min_nan_real(self, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.min() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_nan_imag(self, xp, dtype): - a = xp.array( - [float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=self.order - ) + def test_min_nan_imag(self, xp, dtype, order): + a = xp.array([float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=order) return a.min() @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_min_inf(self, xp, dtype, exclude_cutensor): + def test_min_inf(self, exclude_cutensor, xp, dtype, order): # cupy/cupy#8180 - a = xp.array([float("inf"), float("inf")], dtype, order=self.order) + a = xp.array([float("inf"), float("inf")], dtype, order=order) return a.min() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_all(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_argmax_all(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.argmax() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_axis_large(self, xp, dtype): - a = testing.shaped_random((3, 1000), xp, dtype, order=self.order) + def test_argmax_axis_large(self, xp, dtype, order): + a = testing.shaped_random((3, 1000), xp, dtype, order=order) return a.argmax(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_axis0(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmax_axis0(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmax(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_axis1(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmax_axis1(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmax(axis=1) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_axis2(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmax_axis2(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmax(axis=2) @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_nan(self, xp, dtype, exclude_cutensor): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_argmax_nan(self, exclude_cutensor, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.argmax() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_nan_real(self, xp, dtype): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_argmax_nan_real(self, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.argmax() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmax_nan_imag(self, xp, dtype): - a = xp.array( - [float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=self.order - ) + def test_argmax_nan_imag(self, xp, dtype, order): + a = xp.array([float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=order) return a.argmax() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_all(self, xp, dtype): - a = testing.shaped_random((2, 3), xp, dtype, order=self.order) + def test_argmin_all(self, xp, dtype, order): + a = testing.shaped_random((2, 3), xp, dtype, order=order) return a.argmin() @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_axis_large(self, xp, dtype): - a = testing.shaped_random((3, 1000), xp, dtype, order=self.order) + def test_argmin_axis_large(self, xp, dtype, order): + a = testing.shaped_random((3, 1000), xp, dtype, order=order) return a.argmin(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_axis0(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmin_axis0(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmin(axis=0) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_axis1(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmin_axis1(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmin(axis=1) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_axis2(self, xp, dtype): - a = testing.shaped_random((2, 3, 4), xp, dtype, order=self.order) + def test_argmin_axis2(self, xp, dtype, order): + a = testing.shaped_random((2, 3, 4), xp, dtype, order=order) return a.argmin(axis=2) @testing.for_float_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_nan(self, xp, dtype, exclude_cutensor): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_argmin_nan(self, exclude_cutensor, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.argmin() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_nan_real(self, xp, dtype): - a = xp.array([float("nan"), 1, -1], dtype, order=self.order) + def test_argmin_nan_real(self, xp, dtype, order): + a = xp.array([float("nan"), 1, -1], dtype, order=order) return a.argmin() @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_argmin_nan_imag(self, xp, dtype): - a = xp.array( - [float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=self.order - ) + def test_argmin_nan_imag(self, xp, dtype, order): + a = xp.array([float("nan") * 1.0j, 1.0j, -1.0j], dtype, order=order) return a.argmin() -@testing.parameterize( - *testing.product( - { - # TODO(leofang): make a @testing.for_all_axes decorator - "shape_and_axis": [ - ((), None), - ((0,), (0,)), - ((0, 2), (0,)), - ((0, 2), (1,)), - ((0, 2), (0, 1)), - ((2, 0), (0,)), - ((2, 0), (1,)), - ((2, 0), (0, 1)), - ((0, 2, 3), (0,)), - ((0, 2, 3), (1,)), - ((0, 2, 3), (2,)), - ((0, 2, 3), (0, 1)), - ((0, 2, 3), (1, 2)), - ((0, 2, 3), (0, 2)), - ((0, 2, 3), (0, 1, 2)), - ((2, 0, 3), (0,)), - ((2, 0, 3), (1,)), - ((2, 0, 3), (2,)), - ((2, 0, 3), (0, 1)), - ((2, 0, 3), (1, 2)), - ((2, 0, 3), (0, 2)), - ((2, 0, 3), (0, 1, 2)), - ((2, 3, 0), (0,)), - ((2, 3, 0), (1,)), - ((2, 3, 0), (2,)), - ((2, 3, 0), (0, 1)), - ((2, 3, 0), (1, 2)), - ((2, 3, 0), (0, 2)), - ((2, 3, 0), (0, 1, 2)), - ], - "order": ("C", "F"), - "func": ("min", "max", "argmax", "argmin"), - } - ) +def _axes_for_shape(shape): + if shape == (): + return [None] + ndim = len(shape) + return [ + combo + for r in range(1, ndim + 1) + for combo in combinations(range(ndim), r) + ] + + +@pytest.mark.parametrize( + "shape,axis", + [ + (shape, axis) + for shape in [ + (), + (0,), + (0, 2), + (2, 0), + (0, 2, 3), + (2, 0, 3), + (2, 3, 0), + ] + for axis in _axes_for_shape(shape) + ], ) +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("func", ["min", "max", "argmax", "argmin"]) class TestArrayReductionZeroSize: @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError ) - def test_zero_size(self, xp): - shape, axis = self.shape_and_axis + def test_zero_size(self, xp, shape, axis, order, func): # NumPy only supports axis being an int - if self.func in ("argmax", "argmin"): + if func in ("argmax", "argmin"): if axis is not None and len(axis) == 1: axis = axis[0] else: - pytest.skip( - f"NumPy does not support axis={axis} for {self.func}" - ) + pytest.skip(f"NumPy does not support axis={axis} for {func}") # dtype is irrelevant here, just pick one - a = testing.shaped_random(shape, xp, xp.float32, order=self.order) - return getattr(a, self.func)(axis=axis) + a = testing.shaped_random(shape, xp, xp.float32, order=order) + return getattr(a, func)(axis=axis) # This class compares CUB results against NumPy's. ("fallback" is CuPy's # original kernel, also tested here to reduce code duplication.) -@testing.parameterize( - *testing.product( - { - "shape": [ - (10,), - (10, 20), - (10, 20, 30), - (10, 20, 30, 40), - # skip (2, 3, 0) because it would not hit the CUB code path - (0,), - (2, 0), - (0, 2), - (0, 2, 3), - (2, 3, 0), - ], - "order": ("C", "F"), - "backend": ("device", "block", "fallback"), - } - ) +@pytest.mark.parametrize( + "shape", + [ + (10,), + (10, 20), + (10, 20, 30), + (10, 20, 30, 40), + # skip (2, 3, 0) because it would not hit the CUB code path + (0,), + (2, 0), + (0, 2), + (0, 2, 3), + (2, 3, 0), + ], ) +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("backend", ["device", "block", "fallback"]) @pytest.mark.skip("CUB reduction is not supported") # @pytest.mark.skipif( # not cupy.cuda.cub.available, reason="The CUB routine is not enabled" @@ -385,16 +356,16 @@ def test_zero_size(self, xp): class TestCubReduction: @pytest.fixture(autouse=True) - def setUp(self): + def setUp(self, backend): self.old_routine_accelerators = _acc.get_routine_accelerators() self.old_reduction_accelerators = _acc.get_reduction_accelerators() - if self.backend == "device": + if backend == "device": _acc.set_routine_accelerators(["cub"]) _acc.set_reduction_accelerators([]) - elif self.backend == "block": + elif backend == "block": _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators(["cub"]) - elif self.backend == "fallback": + elif backend == "fallback": _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators([]) yield @@ -406,28 +377,28 @@ def setUp(self): @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError ) - def test_cub_min(self, xp, dtype, axis): - a = testing.shaped_random(self.shape, xp, dtype, order=self.order) + def test_cub_min(self, xp, dtype, axis, shape, order, backend): + a = testing.shaped_random(shape, xp, dtype, order=order) if xp is numpy: return a.min(axis=axis) # xp is cupy, first ensure we really use CUB ret = cupy.empty(()) # Cython checks return type, need to fool it - if self.backend == "device": + if backend == "device": func_name = "cupy._core._routines_statistics.cub." - if len(axis) == len(self.shape): + if len(axis) == len(shape): func_name += "device_reduce" else: func_name += "device_segmented_reduce" with testing.AssertFunctionIsCalled(func_name, return_value=ret): a.min(axis=axis) - elif self.backend == "block": + elif backend == "block": # this is the only function we can mock; the rest is cdef'd func_name = "cupy._core._cub_reduction." func_name += "_SimpleCubReductionKernel_get_cached_function" func = _cub_reduction._SimpleCubReductionKernel_get_cached_function - if len(axis) == len(self.shape): + if len(axis) == len(shape): times_called = 2 # two passes else: times_called = 1 # one pass @@ -437,15 +408,17 @@ def test_cub_min(self, xp, dtype, axis): func_name, wraps=func, times_called=times_called ): a.min(axis=axis) - elif self.backend == "fallback": + elif backend == "fallback": pass # ...then perform the actual computation return a.min(axis=axis) @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose(contiguous_check=False) - def test_cub_min_empty_axis(self, xp, dtype, contiguous_check=False): - a = testing.shaped_random(self.shape, xp, dtype, order=self.order) + def test_cub_min_empty_axis( + self, xp, dtype, shape, order, contiguous_check=False + ): + a = testing.shaped_random(shape, xp, dtype, order=order) return a.min(axis=()) @testing.for_contiguous_axes() @@ -453,28 +426,28 @@ def test_cub_min_empty_axis(self, xp, dtype, contiguous_check=False): @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError ) - def test_cub_max(self, xp, dtype, axis): - a = testing.shaped_random(self.shape, xp, dtype, order=self.order) + def test_cub_max(self, xp, dtype, axis, shape, order, backend): + a = testing.shaped_random(shape, xp, dtype, order=order) if xp is numpy: return a.max(axis=axis) # xp is cupy, first ensure we really use CUB ret = cupy.empty(()) # Cython checks return type, need to fool it - if self.backend == "device": + if backend == "device": func_name = "cupy._core._routines_statistics.cub." - if len(axis) == len(self.shape): + if len(axis) == len(shape): func_name += "device_reduce" else: func_name += "device_segmented_reduce" with testing.AssertFunctionIsCalled(func_name, return_value=ret): a.max(axis=axis) - elif self.backend == "block": + elif backend == "block": # this is the only function we can mock; the rest is cdef'd func_name = "cupy._core._cub_reduction." func_name += "_SimpleCubReductionKernel_get_cached_function" func = _cub_reduction._SimpleCubReductionKernel_get_cached_function - if len(axis) == len(self.shape): + if len(axis) == len(shape): times_called = 2 # two passes else: times_called = 1 # one pass @@ -484,13 +457,13 @@ def test_cub_max(self, xp, dtype, axis): func_name, wraps=func, times_called=times_called ): a.max(axis=axis) - elif self.backend == "fallback": + elif backend == "fallback": pass # ...then perform the actual computation return a.max(axis=axis) @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose(contiguous_check=False) - def test_cub_max_empty_axis(self, xp, dtype): - a = testing.shaped_random(self.shape, xp, dtype, order=self.order) + def test_cub_max_empty_axis(self, xp, dtype, shape, order): + a = testing.shaped_random(shape, xp, dtype, order=order) return a.max(axis=()) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py index e6411e298fdd..7fc5a3b80d2e 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py @@ -1,4 +1,5 @@ import math +from itertools import product as iproduct import numpy import pytest @@ -208,26 +209,22 @@ def test_prod_dtype(self, xp, src_dtype, dst_dtype): # This class compares CUB results against NumPy's -@testing.parameterize( - *testing.product( - { - "shape": [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)], - "order": ("C", "F"), - "backend": ("device", "block"), - } - ) +@pytest.mark.parametrize( + "shape", [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)] ) +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("backend", ["device", "block"]) @pytest.mark.skip("_cub_reduction is not supported") class TestCubReduction: @pytest.fixture(autouse=True) - def setUp(self): + def setUp(self, backend): old_routine_accelerators = _acc.get_routine_accelerators() old_reduction_accelerators = _acc.get_reduction_accelerators() - if self.backend == "device": + if backend == "device": _acc.set_routine_accelerators(["cub"]) _acc.set_reduction_accelerators([]) - elif self.backend == "block": + elif backend == "block": _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators(["cub"]) yield @@ -240,11 +237,11 @@ def setUp(self): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @testing.numpy_cupy_allclose(rtol=1e-5) - def test_cub_sum(self, xp, dtype, axis): - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + def test_cub_sum(self, xp, dtype, axis, shape, order, backend): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) if xp is numpy: @@ -252,20 +249,20 @@ def test_cub_sum(self, xp, dtype, axis): # xp is cupy, first ensure we really use CUB ret = cupy.empty(()) # Cython checks return type, need to fool it - if self.backend == "device": + if backend == "device": func_name = "cupy._core._routines_math.cub." - if len(axis) == len(self.shape): + if len(axis) == len(shape): func_name += "device_reduce" else: func_name += "device_segmented_reduce" with testing.AssertFunctionIsCalled(func_name, return_value=ret): a.sum(axis=axis) - elif self.backend == "block": + elif backend == "block": # this is the only function we can mock; the rest is cdef'd func_name = "cupy._core._cub_reduction." func_name += "_SimpleCubReductionKernel_get_cached_function" func = _cub_reduction._SimpleCubReductionKernel_get_cached_function - if len(axis) == len(self.shape): + if len(axis) == len(shape): times_called = 2 # two passes else: times_called = 1 # one pass @@ -279,11 +276,11 @@ def test_cub_sum(self, xp, dtype, axis): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @testing.numpy_cupy_allclose(rtol=1e-5, contiguous_check=False) - def test_cub_sum_empty_axis(self, xp, dtype): - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + def test_cub_sum_empty_axis(self, xp, dtype, shape, order, backend): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) return a.sum(axis=()) @@ -293,11 +290,11 @@ def test_cub_sum_empty_axis(self, xp, dtype): # prod supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @testing.numpy_cupy_allclose(rtol=1e-5) - def test_cub_prod(self, xp, dtype, axis): - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + def test_cub_prod(self, xp, dtype, axis, shape, order, backend): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) if xp is numpy: @@ -305,20 +302,20 @@ def test_cub_prod(self, xp, dtype, axis): # xp is cupy, first ensure we really use CUB ret = cupy.empty(()) # Cython checks return type, need to fool it - if self.backend == "device": + if backend == "device": func_name = "cupy._core._routines_math.cub." - if len(axis) == len(self.shape): + if len(axis) == len(shape): func_name += "device_reduce" else: func_name += "device_segmented_reduce" with testing.AssertFunctionIsCalled(func_name, return_value=ret): a.prod(axis=axis) - elif self.backend == "block": + elif backend == "block": # this is the only function we can mock; the rest is cdef'd func_name = "cupy._core._cub_reduction." func_name += "_SimpleCubReductionKernel_get_cached_function" func = _cub_reduction._SimpleCubReductionKernel_get_cached_function - if len(axis) == len(self.shape): + if len(axis) == len(shape): times_called = 2 # two passes else: times_called = 1 # one pass @@ -335,14 +332,14 @@ def test_cub_prod(self, xp, dtype, axis): # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_dtypes("bhilBHILfdFD") @testing.numpy_cupy_allclose(rtol=1e-4) - def test_cub_cumsum(self, xp, dtype): - if self.backend == "block": + def test_cub_cumsum(self, xp, dtype, shape, order, backend): + if backend == "block": pytest.skip("does not support") - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) if xp is numpy: @@ -362,14 +359,14 @@ def test_cub_cumsum(self, xp, dtype): # @pytest.mark.thread_unsafe(reason="unsafe AssertFunctionIsCalled.") @testing.for_dtypes("bhilBHILfdFD") @testing.numpy_cupy_allclose(rtol=1e-4) - def test_cub_cumprod(self, xp, dtype): - if self.backend == "block": + def test_cub_cumprod(self, xp, dtype, shape, order, backend): + if backend == "block": pytest.skip("does not support") - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) if xp is numpy: @@ -516,14 +513,10 @@ def test_cumprod_size_over_int32_max(self, dtype): # This class compares cuTENSOR results against NumPy's -@testing.parameterize( - *testing.product( - { - "shape": [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)], - "order": ("C", "F"), - } - ) +@pytest.mark.parametrize( + "shape", [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)] ) +@pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.skip("cutensor is not supported") class TestCuTensorReduction: @@ -539,11 +532,11 @@ def setup(cls): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @testing.numpy_cupy_allclose(rtol=1e-5, contiguous_check=False) - def test_cutensor_sum(self, xp, dtype, axis): - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + def test_cutensor_sum(self, xp, dtype, axis, shape, order): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) if xp is numpy: @@ -560,134 +553,121 @@ def test_cutensor_sum(self, xp, dtype, axis): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes("qQfdFD") @testing.numpy_cupy_allclose(rtol=1e-5, contiguous_check=False) - def test_cutensor_sum_empty_axis(self, xp, dtype): - a = testing.shaped_random(self.shape, xp, dtype) - if self.order in ("c", "C"): + def test_cutensor_sum_empty_axis(self, xp, dtype, shape, order): + a = testing.shaped_random(shape, xp, dtype) + if order in ("c", "C"): a = xp.ascontiguousarray(a) - elif self.order in ("f", "F"): + elif order in ("f", "F"): a = xp.asfortranarray(a) return a.sum(axis=()) -@testing.parameterize( - *testing.product( - { - "shape": [(2, 3, 4), (20, 30, 40)], - "axis": [0, 1], - "transpose_axes": [True, False], - "keepdims": [True, False], - "func": ["nansum", "nanprod"], - } - ) -) +@pytest.mark.parametrize("shape", [(2, 3, 4), (20, 30, 40)]) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("transpose_axes", [True, False]) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func", ["nansum", "nanprod"]) class TestNansumNanprodLong: - def _do_transposed_axis_test(self): - return not self.transpose_axes and self.axis != 1 + def _do_transposed_axis_test(self, transpose_axes, axis): + return not transpose_axes and axis != 1 - def _numpy_nanprod_implemented(self): + def _numpy_nanprod_implemented(self, func): return ( - self.func == "nanprod" + func == "nanprod" and numpy.__version__ >= numpy.lib.NumpyVersion("1.10.0") ) - def _test(self, xp, dtype): - shape = self.shape + def _test(self, xp, dtype, shape, axis, transpose_axes, keepdims, func): # Reduce the shape of the input array to avoid overflow warning # for nanprod with float32, shape=(20, 30, 40), axis=0 and transpose_axes=False if ( - self.func == "nanprod" + func == "nanprod" and dtype == xp.float32 - and self.shape == (20, 30, 40) - and self.axis == 0 - and not self.transpose_axes + and shape == (20, 30, 40) + and axis == 0 + and not transpose_axes ): shape = (10, 20, 30) a = testing.shaped_arange(shape, xp, dtype) - if self.transpose_axes: + if transpose_axes: a = a.transpose(2, 0, 1) if not issubclass(dtype, xp.integer): a[:, 1] = xp.nan - func = getattr(xp, self.func) - return func(a, axis=self.axis, keepdims=self.keepdims) + func = getattr(xp, func) + return func(a, axis=axis, keepdims=keepdims) @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) - def test_nansum_all(self, xp, dtype): - if ( - not self._numpy_nanprod_implemented() - or not self._do_transposed_axis_test() - ): + def test_nansum_all( + self, xp, dtype, shape, axis, transpose_axes, keepdims, func + ): + if not self._numpy_nanprod_implemented( + func + ) or not self._do_transposed_axis_test(transpose_axes, axis): return xp.array(()) - return self._test(xp, dtype) + return self._test( + xp, dtype, shape, axis, transpose_axes, keepdims, func + ) @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) - def test_nansum_axis_transposed(self, xp, dtype): - if ( - not self._numpy_nanprod_implemented() - or not self._do_transposed_axis_test() - ): + def test_nansum_axis_transposed( + self, xp, dtype, shape, axis, transpose_axes, keepdims, func + ): + if not self._numpy_nanprod_implemented( + func + ) or not self._do_transposed_axis_test(transpose_axes, axis): return xp.array(()) - return self._test(xp, dtype) + return self._test( + xp, dtype, shape, axis, transpose_axes, keepdims, func + ) -@testing.parameterize( - *testing.product( - { - "shape": [(2, 3, 4), (20, 30, 40)], - } - ) -) +@pytest.mark.parametrize("shape", [(2, 3, 4), (20, 30, 40)]) class TestNansumNanprodExtra: - def test_nansum_axis_float16(self): + def test_nansum_axis_float16(self, shape): # Note that the above test example overflows in float16. We use a # smaller array instead, just return if array is too large. - if numpy.prod(self.shape) > 24: + if numpy.prod(shape) > 24: return - a = testing.shaped_arange(self.shape, dtype="e") + a = testing.shaped_arange(shape, dtype="e") a[:, 1] = cupy.nan sa = cupy.nansum(a, axis=1) - b = testing.shaped_arange(self.shape, numpy, dtype="f") + b = testing.shaped_arange(shape, numpy, dtype="f") b[:, 1] = numpy.nan sb = numpy.nansum(b, axis=1) testing.assert_allclose(sa, sb.astype("e")) @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose() - def test_nansum_out(self, xp, dtype): - a = testing.shaped_arange(self.shape, xp, dtype) + def test_nansum_out(self, xp, dtype, shape): + a = testing.shaped_arange(shape, xp, dtype) if not issubclass(dtype, xp.integer): a[:, 1] = xp.nan - b = xp.empty((self.shape[0], self.shape[2]), dtype=dtype) + b = xp.empty((shape[0], shape[2]), dtype=dtype) xp.nansum(a, axis=1, out=b) return b - def test_nansum_out_wrong_shape(self): - a = testing.shaped_arange(self.shape) + def test_nansum_out_wrong_shape(self, shape): + a = testing.shaped_arange(shape) a[:, 1] = cupy.nan b = cupy.empty((2, 3)) with pytest.raises(ValueError): cupy.nansum(a, axis=1, out=b) -@testing.parameterize( - *testing.product( - { - "shape": [(2, 3, 4, 5), (20, 30, 40, 50)], - "axis": [(1, 3), (0, 2, 3)], - } - ) -) +@pytest.mark.parametrize("shape", [(2, 3, 4, 5), (20, 30, 40, 50)]) +@pytest.mark.parametrize("axis", [(1, 3), (0, 2, 3)]) class TestNansumNanprodAxes: @testing.for_all_dtypes(no_bool=True, no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-6) - def test_nansum_axes(self, xp, dtype): - a = testing.shaped_arange(self.shape, xp, dtype) + def test_nansum_axes(self, xp, dtype, shape, axis): + a = testing.shaped_arange(shape, xp, dtype) if not issubclass(dtype, xp.integer): a[:, 1] = xp.nan - return xp.nansum(a, axis=self.axis) + return xp.nansum(a, axis=axis) class TestNansumNanprodHuge: @@ -715,7 +695,6 @@ def test_nansum_axis_huge_halfnan(self, xp): axes = [0, 1, 2] -@testing.parameterize(*testing.product({"axis": axes})) class TestCumsum: def _cumsum(self, xp, a, *args, **kwargs): @@ -752,48 +731,53 @@ def test_cumsum_2dim(self, xp, dtype): a = testing.shaped_arange((4, 5), xp, dtype) return self._cumsum(xp, a) + @pytest.mark.parametrize("axis", axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_cumsum_axis(self, xp, dtype): + def test_cumsum_axis(self, xp, dtype, axis): n = len(axes) a = testing.shaped_arange(tuple(range(4, 4 + n)), xp, dtype) - return self._cumsum(xp, a, axis=self.axis) + return self._cumsum(xp, a, axis=axis) + @pytest.mark.parametrize("axis", axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - def test_cumsum_axis_out(self, xp, dtype): + def test_cumsum_axis_out(self, xp, dtype, axis): n = len(axes) shape = tuple(range(4, 4 + n)) a = testing.shaped_arange(shape, xp, dtype) out = xp.zeros(shape, dtype=dtype) - self._cumsum(xp, a, axis=self.axis, out=out) + self._cumsum(xp, a, axis=axis, out=out) return out + @pytest.mark.parametrize("axis", axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - def test_cumsum_axis_out_noncontiguous(self, xp, dtype): + def test_cumsum_axis_out_noncontiguous(self, xp, dtype, axis): n = len(axes) shape = tuple(range(4, 4 + n)) a = testing.shaped_arange(shape, xp, dtype) out = xp.zeros((8,) + shape[1:], dtype=dtype)[ ::2 ] # Non contiguous view - self._cumsum(xp, a, axis=self.axis, out=out) + self._cumsum(xp, a, axis=axis, out=out) return out + @pytest.mark.parametrize("axis", axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) - def test_ndarray_cumsum_axis(self, xp, dtype): + def test_ndarray_cumsum_axis(self, xp, dtype, axis): n = len(axes) a = testing.shaped_arange(tuple(range(4, 4 + n)), xp, dtype) - return a.cumsum(axis=self.axis) + return a.cumsum(axis=axis) + @pytest.mark.parametrize("axis", axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - def test_cumsum_axis_empty(self, xp, dtype): + def test_cumsum_axis_empty(self, xp, dtype, axis): n = len(axes) a = testing.shaped_arange(tuple(range(0, n)), xp, dtype) - return self._cumsum(xp, a, axis=self.axis) + return self._cumsum(xp, a, axis=axis) @testing.for_all_dtypes() def test_invalid_axis_lower1(self, dtype): @@ -930,29 +914,21 @@ def test_cumprod_numpy_array(self, dtype): @pytest.mark.usefixtures("suppress_invalid_numpy_warnings") -@testing.parameterize( - *testing.product( - { - "shape": [(20,), (7, 6), (3, 4, 5)], - "axis": [None, 0, 1, 2], - "func": ("nancumsum", "nancumprod"), - } - ) -) +@pytest.mark.parametrize("shape", [(20,), (7, 6), (3, 4, 5)]) +@pytest.mark.parametrize("axis", [None, 0, 1, 2]) +@pytest.mark.parametrize("func", ["nancumsum", "nancumprod"]) class TestNanCumSumProd: zero_density = 0.25 - def _make_array(self, dtype): + def _make_array(self, dtype, shape): dtype = numpy.dtype(dtype) if dtype.char in "efdFD": r_dtype = dtype.char.lower() - a = testing.shaped_random(self.shape, numpy, dtype=r_dtype, scale=1) + a = testing.shaped_random(shape, numpy, dtype=r_dtype, scale=1) if dtype.char in "FD": ai = a - aj = testing.shaped_random( - self.shape, numpy, dtype=r_dtype, scale=1 - ) + aj = testing.shaped_random(shape, numpy, dtype=r_dtype, scale=1) ai[ai < math.sqrt(self.zero_density)] = 0 aj[aj < math.sqrt(self.zero_density)] = 0 a = ai + 1j * aj @@ -960,30 +936,30 @@ def _make_array(self, dtype): a[a < self.zero_density] = 0 a = a / a else: - a = testing.shaped_random(self.shape, numpy, dtype=dtype) + a = testing.shaped_random(shape, numpy, dtype=dtype) return a @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - def test_nancumsumprod(self, xp, dtype): - if self.axis is not None and self.axis >= len(self.shape): + def test_nancumsumprod(self, xp, dtype, shape, axis, func): + if axis is not None and axis >= len(shape): pytest.skip() - a = xp.array(self._make_array(dtype)) - out = getattr(xp, self.func)(a, axis=self.axis) + a = xp.array(self._make_array(dtype, shape)) + out = getattr(xp, func)(a, axis=axis) return xp.ascontiguousarray(out) @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - def test_nancumsumprod_out(self, xp, dtype): + def test_nancumsumprod_out(self, xp, dtype, shape, axis, func): dtype = numpy.dtype(dtype) - if self.axis is not None and self.axis >= len(self.shape): + if axis is not None and axis >= len(shape): pytest.skip() - if len(self.shape) > 1 and self.axis is None: + if len(shape) > 1 and axis is None: # Skip the cases where np.nancum{sum|prod} raise AssertionError. pytest.skip() - a = xp.array(self._make_array(dtype)) - out = xp.empty(self.shape, dtype=dtype) - getattr(xp, self.func)(a, axis=self.axis, out=out) + a = xp.array(self._make_array(dtype, shape)) + out = xp.empty(shape, dtype=dtype) + getattr(xp, func)(a, axis=axis, out=out) return xp.ascontiguousarray(out) @@ -1057,35 +1033,30 @@ def test_diff_invalid_axis(self): # This class compares CUB results against NumPy's -@testing.parameterize( - *testing.product_dict( - testing.product( - { - "shape": [()], - "axis": [None, ()], - "spacing": [(), (1.2,)], - } - ) - + testing.product( - { - "shape": [(33,)], - "axis": [None, 0, -1, (0,)], - "spacing": [(), (1.2,), "sequence of int", "arrays"], - } +@pytest.mark.parametrize( + "shape,axis,spacing", + list(iproduct([()], [None, ()], [(), (1.2,)])) + + list( + iproduct( + [(33,)], + [None, 0, -1, (0,)], + [(), (1.2,), "sequence of int", "arrays"], ) - + testing.product( - { - "shape": [(10, 20), (10, 20, 30)], - "axis": [None, 0, -1, (0, -1), (1, 0)], - "spacing": [(), (1.2,), "sequence of int", "arrays", "mixed"], - } - ), - testing.product( - { - "edge_order": [1, 2], - } - ), ) + + list( + iproduct( + [(10, 20), (10, 20, 30)], + [None, 0, -1, (0, -1), (1, 0)], + [(), (1.2,), "sequence of int", "arrays", "mixed"], + ) + ), +) +@pytest.mark.parametrize( + "edge_order", + [ + pytest.param(1, id="edge_order"), + pytest.param(2, id="edge_order"), + ], ) class TestGradient: @@ -1123,10 +1094,10 @@ def _gradient(self, xp, dtype, shape, spacing, axis, edge_order): @testing.for_dtypes("fFdD") @testing.numpy_cupy_allclose(atol=1e-6, rtol=1e-5) - def test_gradient_floating(self, xp, dtype): - return self._gradient( - xp, dtype, self.shape, self.spacing, self.axis, self.edge_order - ) + def test_gradient_floating( + self, xp, dtype, shape, axis, spacing, edge_order + ): + return self._gradient(xp, dtype, shape, spacing, axis, edge_order) # unsigned int behavior fixed in 1.18.1 # https://github.com/numpy/numpy/issues/15207 @@ -1135,20 +1106,13 @@ def test_gradient_floating(self, xp, dtype): @testing.numpy_cupy_allclose( atol=1e-6, rtol=1e-5, type_check=has_support_aspect64() ) - def test_gradient_int(self, xp, dtype): - return self._gradient( - xp, dtype, self.shape, self.spacing, self.axis, self.edge_order - ) + def test_gradient_int(self, xp, dtype, shape, axis, spacing, edge_order): + return self._gradient(xp, dtype, shape, spacing, axis, edge_order) @testing.numpy_cupy_allclose(atol=2e-2, rtol=1e-3) - def test_gradient_float16(self, xp): + def test_gradient_float16(self, xp, shape, axis, spacing, edge_order): return self._gradient( - xp, - numpy.float16, - self.shape, - self.spacing, - self.axis, - self.edge_order, + xp, numpy.float16, shape, spacing, axis, edge_order ) diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py index 03232642b221..c85481e20928 100644 --- a/dpnp/tests/third_party/cupy/testing/_loops.py +++ b/dpnp/tests/third_party/cupy/testing/_loops.py @@ -1523,8 +1523,8 @@ def for_contiguous_axes(name="axis"): def decorator(impl): @_wraps_partial(impl, name) def test_func(self, *args, **kw): - ndim = len(self.shape) - order = self.order + ndim = len(kw["shape"]) + order = kw["order"] for i in range(ndim): a = () if order in ("c", "C"): @@ -1546,7 +1546,7 @@ def test_func(self, *args, **kw): ", ndim is", ndim, ", shape is", - self.shape, + kw["shape"], ", order is", order, ) From d02678ac3fde62bfc46798e17fe51a9a3fcdd681 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Jun 2026 15:14:23 +0200 Subject: [PATCH 37/37] Update new tests to handle a device with no fp64 support --- dpnp/tests/third_party/cupy/linalg_tests/test_solve.py | 2 +- dpnp/tests/third_party/cupy/logic_tests/test_comparison.py | 6 ++---- .../third_party/cupy/manipulation_tests/test_tiling.py | 3 ++- dpnp/tests/third_party/cupy/sorting_tests/test_sort.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py index 3b789480fd8b..eeb5712d84ef 100644 --- a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py +++ b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py @@ -82,7 +82,7 @@ def test_solve_singular_empty(self): c = cupy.linalg.solve(a, b) assert c.size == 0 - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_solve_non_singular_empty(self, xp): a = xp.eye(3) # non-singular b = xp.empty((3, 0)) # nrhs = 0 diff --git a/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py b/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py index 91a9dc727924..de788ba0d473 100644 --- a/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py +++ b/dpnp/tests/third_party/cupy/logic_tests/test_comparison.py @@ -93,16 +93,14 @@ def test_binary_array_pyscalar_int(self, xp, dtype, scalar, op): b = scalar return [op(a, b), op(b, a)] - @pytest.mark.parametrize( - "dtype", [numpy.float16, numpy.float32, numpy.float64] - ) @pytest.mark.parametrize( "scalar", [-1, 0, 2**32, 2**31 - 1, 2**31 + 1, 2**63, 2**64 - 1] ) @pytest.mark.parametrize("op", operators) + @testing.for_float_dtypes(no_float16=False) @testing.numpy_cupy_array_equal() @numpy.errstate(over="ignore") - def test_binary_array_pyscalar_int_and_float(self, xp, dtype, scalar, op): + def test_binary_array_pyscalar_int_and_float(self, xp, scalar, op, dtype): a = xp.array( [-1, 0, 2**31 - 1, 2**31 + 1, 2**32, 2**63 - 1, 2**62, 2**62 + 1] ) diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py index a37544656735..a0ea1f35865f 100644 --- a/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py +++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_tiling.py @@ -4,6 +4,7 @@ import pytest import dpnp as cupy +from dpnp.tests.helper import has_support_aspect64 from dpnp.tests.third_party.cupy import testing @@ -166,7 +167,7 @@ def test_repeat(self, xp): class TestRepeatNdarrayArrayDtype: """Output dtype matches input dtype.""" - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_dtype_preserved(self, xp): x = testing.shaped_arange((3, 4), xp, dtype=self.a_dtype) return xp.repeat(x, xp.array([1, 2, 3, 4]), axis=1) diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py index 9a6ab8981711..196f64fffa74 100644 --- a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py +++ b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py @@ -37,7 +37,7 @@ def test_external_sort_zero_dim(self): with pytest.raises(AxisError): xp.sort(a) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_sort_zero_length_axis(self, xp): """Sorting along a zero-length axis is a no-op (#9816).""" return xp.sort(xp.empty((2, 0)), axis=-1)