From 8ab72a9906643f4440958fde6df08e986a1b94cd Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 13:49:52 -0500
Subject: [PATCH 01/11] feat(pathfinder): add CTK root canary probe for
 non-standard-path libs

Libraries like nvvm whose shared object lives in a subdirectory
(/nvvm/lib64/) that is not on the system linker path cannot
be found via bare dlopen on system CTK installs without CUDA_HOME.

Add a "canary probe" search step: when direct system search fails,
system-load a well-known CTK lib that IS on the linker path (cudart),
derive the CTK installation root from its resolved path, and look for
the target lib relative to that root via the existing anchor-point
logic. The mechanism is generic -- any future lib with a non-standard
path just needs its entry in _find_lib_dir_using_anchor_point.

The canary probe is intentionally placed after CUDA_HOME in the search
cascade to preserve backward compatibility: users who have CUDA_HOME
set expect it to be authoritative, and existing code relying on that
ordering should not silently change behavior.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../_dynamic_libs/find_nvidia_dynamic_lib.py  |  61 +++++
 .../_dynamic_libs/load_nvidia_dynamic_lib.py  |  60 ++++-
 .../tests/test_ctk_root_discovery.py          | 251 ++++++++++++++++++
 3 files changed, 368 insertions(+), 4 deletions(-)
 create mode 100644 cuda_pathfinder/tests/test_ctk_root_discovery.py

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
index 65c9f4bf3c..d5c376012c 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
@@ -152,6 +152,57 @@ def _find_dll_using_lib_dir(
     return None
 
 
+def _derive_ctk_root_linux(resolved_lib_path: str) -> str | None:
+    """Derive the CTK installation root from a resolved library path on Linux.
+
+    Standard system CTK layout: ``$CTK_ROOT/lib64/libfoo.so.XX``
+    (some installs use ``lib`` instead of ``lib64``).
+
+    Returns None if the path doesn't match a recognized layout.
+    """
+    lib_dir = os.path.dirname(resolved_lib_path)
+    basename = os.path.basename(lib_dir)
+    if basename in ("lib64", "lib"):
+        return os.path.dirname(lib_dir)
+    return None
+
+
+def _derive_ctk_root_windows(resolved_lib_path: str) -> str | None:
+    """Derive the CTK installation root from a resolved library path on Windows.
+
+    Handles two CTK layouts:
+    - CTK 13: ``$CTK_ROOT/bin/x64/foo.dll``
+    - CTK 12: ``$CTK_ROOT/bin/foo.dll``
+
+    Returns None if the path doesn't match a recognized layout.
+
+    Uses ``ntpath`` explicitly so the function is testable on any platform.
+    """
+    import ntpath
+
+    lib_dir = ntpath.dirname(resolved_lib_path)
+    basename = ntpath.basename(lib_dir).lower()
+    if basename == "x64":
+        parent = ntpath.dirname(lib_dir)
+        if ntpath.basename(parent).lower() == "bin":
+            return ntpath.dirname(parent)
+    elif basename == "bin":
+        return ntpath.dirname(lib_dir)
+    return None
+
+
+def derive_ctk_root(resolved_lib_path: str) -> str | None:
+    """Derive the CTK installation root from a resolved library path.
+
+    Given the absolute path of a loaded CTK shared library, walk up the
+    directory tree to find the CTK root.  Returns None if the path doesn't
+    match any recognized CTK directory layout.
+    """
+    if IS_WINDOWS:
+        return _derive_ctk_root_windows(resolved_lib_path)
+    return _derive_ctk_root_linux(resolved_lib_path)
+
+
 class _FindNvidiaDynamicLib:
     def __init__(self, libname: str):
         self.libname = libname
@@ -185,6 +236,16 @@ def try_with_conda_prefix(self) -> str | None:
     def try_with_cuda_home(self) -> str | None:
         return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))
 
+    def try_via_ctk_root(self, ctk_root: str) -> str | None:
+        """Find the library under a derived CTK root directory.
+
+        Uses :func:`_find_lib_dir_using_anchor_point` which already knows
+        about non-standard sub-paths (e.g. ``nvvm/lib64`` for nvvm).
+        """
+        return self._find_using_lib_dir(
+            _find_lib_dir_using_anchor_point(self.libname, anchor_point=ctk_root, linux_lib_dir="lib64")
+        )
+
     def _find_using_lib_dir(self, lib_dir: str | None) -> str | None:
         if lib_dir is None:
             return None
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
index 8de2a5511e..cf249bce56 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -5,7 +5,10 @@
 import struct
 import sys
 
-from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import _FindNvidiaDynamicLib
+from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
+    _FindNvidiaDynamicLib,
+    derive_ctk_root,
+)
 from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL, load_dependencies
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
     SUPPORTED_LINUX_SONAMES,
@@ -60,6 +63,37 @@ def _load_driver_lib_no_cache(libname: str) -> LoadedDL:
     )
 
 
+# Libs that reside on the standard linker path in system CTK installs.
+# Used to discover the CTK root when a lib with a non-standard path
+# (e.g. nvvm under $CTK_ROOT/nvvm/lib64) can't be found directly.
+_CTK_ROOT_CANARY_LIBNAMES = ("cudart",)
+
+
+def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None:
+    """Derive the CTK root from a system-installed canary lib.
+
+    For libs like nvvm whose shared object doesn't reside on the standard
+    linker path, we locate a well-known CTK lib that IS on the linker path
+    via system search, derive the CTK installation root from its resolved
+    path, and then look for the target lib relative to that root.
+
+    The canary lib is loaded as a side-effect but this is harmless: it stays
+    loaded (handles are never closed) and will be reused by
+    :func:`load_nvidia_dynamic_lib` if requested later.
+    """
+    for canary_libname in _CTK_ROOT_CANARY_LIBNAMES:
+        canary = load_with_system_search(canary_libname)
+        if canary is None or canary.abs_path is None:
+            continue
+        ctk_root = derive_ctk_root(canary.abs_path)
+        if ctk_root is None:
+            continue
+        abs_path = finder.try_via_ctk_root(ctk_root)
+        if abs_path is not None:
+            return abs_path
+    return None
+
+
 def _load_lib_no_cache(libname: str) -> LoadedDL:
     if libname in _DRIVER_ONLY_LIBNAMES:
         return _load_driver_lib_no_cache(libname)
@@ -90,11 +124,21 @@ def _load_lib_no_cache(libname: str) -> LoadedDL:
         loaded = load_with_system_search(libname)
         if loaded is not None:
             return loaded
+
         abs_path = finder.try_with_cuda_home()
-        if abs_path is None:
-            finder.raise_not_found_error()
-        else:
+        if abs_path is not None:
             found_via = "CUDA_HOME"
+        else:
+            # Canary probe: if the direct system search and CUDA_HOME both
+            # failed (e.g. nvvm isn't on the linker path and CUDA_HOME is
+            # unset), try to discover the CTK root by system-loading a
+            # well-known CTK lib that IS on the linker path, then look for
+            # the target lib relative to that root.
+            abs_path = _try_ctk_root_canary(finder)
+            if abs_path is not None:
+                found_via = "system-ctk-root"
+            else:
+                finder.raise_not_found_error()
 
     return load_with_abs_path(libname, abs_path, found_via)
 
@@ -164,6 +208,14 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
 
            - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
 
+        5. **CTK root canary probe**
+
+           - For libraries whose shared object doesn't reside on the standard
+             linker path (e.g. ``libnvvm.so`` lives under ``$CTK_ROOT/nvvm/lib64``),
+             attempt to discover the CTK installation root by system-loading a
+             well-known CTK library (``cudart``) that *is* on the linker path, then
+             derive the root from its resolved absolute path.
+
     **Driver libraries** (``"cuda"``, ``"nvml"``):
 
         These are part of the NVIDIA display driver (not the CUDA Toolkit) and
diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
new file mode 100644
index 0000000000..b85015e11e
--- /dev/null
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -0,0 +1,251 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
+    _derive_ctk_root_linux,
+    _derive_ctk_root_windows,
+    _FindNvidiaDynamicLib,
+    derive_ctk_root,
+)
+from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
+from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import (
+    _load_lib_no_cache,
+    _try_ctk_root_canary,
+)
+
+# ---------------------------------------------------------------------------
+# derive_ctk_root
+# ---------------------------------------------------------------------------
+
+
+def test_derive_ctk_root_linux_lib64():
+    assert _derive_ctk_root_linux("/usr/local/cuda-13/lib64/libcudart.so.13") == "/usr/local/cuda-13"
+
+
+def test_derive_ctk_root_linux_lib():
+    assert _derive_ctk_root_linux("/opt/cuda/lib/libcudart.so.12") == "/opt/cuda"
+
+
+def test_derive_ctk_root_linux_unrecognized():
+    assert _derive_ctk_root_linux("/some/weird/path/libcudart.so.13") is None
+
+
+def test_derive_ctk_root_linux_root_level():
+    assert _derive_ctk_root_linux("/lib64/libcudart.so.13") == "/"
+
+
+def test_derive_ctk_root_windows_ctk13():
+    path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\x64\cudart64_13.dll"
+    assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0"
+
+
+def test_derive_ctk_root_windows_ctk12():
+    path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\cudart64_12.dll"
+    assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8"
+
+
+def test_derive_ctk_root_windows_unrecognized():
+    assert _derive_ctk_root_windows(r"C:\weird\cudart64_13.dll") is None
+
+
+def test_derive_ctk_root_windows_case_insensitive_bin():
+    assert _derive_ctk_root_windows(r"C:\CUDA\Bin\cudart64_12.dll") == r"C:\CUDA"
+
+
+def test_derive_ctk_root_windows_case_insensitive_x64():
+    assert _derive_ctk_root_windows(r"C:\CUDA\BIN\X64\cudart64_13.dll") == r"C:\CUDA"
+
+
+def test_derive_ctk_root_dispatches_to_linux():
+    with patch("cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib.IS_WINDOWS", False):
+        assert derive_ctk_root("/usr/local/cuda/lib64/libcudart.so.13") == "/usr/local/cuda"
+
+
+def test_derive_ctk_root_dispatches_to_windows():
+    with patch("cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib.IS_WINDOWS", True):
+        assert derive_ctk_root(r"C:\CUDA\v13\bin\cudart64_13.dll") == r"C:\CUDA\v13"
+
+
+# ---------------------------------------------------------------------------
+# _FindNvidiaDynamicLib.try_via_ctk_root
+# ---------------------------------------------------------------------------
+
+
+def test_try_via_ctk_root_finds_nvvm(tmp_path):
+    ctk_root = tmp_path / "cuda-13"
+    nvvm_dir = ctk_root / "nvvm" / "lib64"
+    nvvm_dir.mkdir(parents=True)
+    nvvm_so = nvvm_dir / "libnvvm.so"
+    nvvm_so.write_bytes(b"fake")
+
+    assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) == str(nvvm_so)
+
+
+def test_try_via_ctk_root_returns_none_when_dir_missing(tmp_path):
+    ctk_root = tmp_path / "cuda-13"
+    ctk_root.mkdir()
+
+    assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) is None
+
+
+def test_try_via_ctk_root_regular_lib(tmp_path):
+    ctk_root = tmp_path / "cuda-13"
+    lib_dir = ctk_root / "lib64"
+    lib_dir.mkdir(parents=True)
+    cudart_so = lib_dir / "libcudart.so"
+    cudart_so.write_bytes(b"fake")
+
+    assert _FindNvidiaDynamicLib("cudart").try_via_ctk_root(str(ctk_root)) == str(cudart_so)
+
+
+# ---------------------------------------------------------------------------
+# _try_ctk_root_canary
+# ---------------------------------------------------------------------------
+
+
+def _patch_system_search(return_value):
+    return patch(
+        "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib.load_with_system_search",
+        return_value=return_value,
+    )
+
+
+def test_canary_finds_nvvm(tmp_path):
+    ctk_root = tmp_path / "cuda-13"
+    (ctk_root / "lib64").mkdir(parents=True)
+    nvvm_dir = ctk_root / "nvvm" / "lib64"
+    nvvm_dir.mkdir(parents=True)
+    nvvm_so = nvvm_dir / "libnvvm.so"
+    nvvm_so.write_bytes(b"fake")
+
+    canary = LoadedDL(str(ctk_root / "lib64" / "libcudart.so.13"), False, 0xDEAD, "system-search")
+
+    with _patch_system_search(canary):
+        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_so)
+
+
+def test_canary_returns_none_when_system_search_fails():
+    with _patch_system_search(None):
+        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+
+
+def test_canary_returns_none_when_ctk_root_unrecognized():
+    canary = LoadedDL("/weird/path/libcudart.so.13", False, 0xDEAD, "system-search")
+    with _patch_system_search(canary):
+        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+
+
+def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path):
+    ctk_root = tmp_path / "cuda-13"
+    (ctk_root / "lib64").mkdir(parents=True)
+
+    canary = LoadedDL(str(ctk_root / "lib64" / "libcudart.so.13"), False, 0xDEAD, "system-search")
+    with _patch_system_search(canary):
+        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+
+
+def test_canary_skips_when_abs_path_none():
+    canary = LoadedDL(None, False, 0xDEAD, "system-search")
+    with _patch_system_search(canary):
+        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+
+
+# ---------------------------------------------------------------------------
+# _load_lib_no_cache search-order
+# ---------------------------------------------------------------------------
+
+
+def _make_loaded_dl(path, found_via):
+    return LoadedDL(path, False, 0xDEAD, found_via)
+
+
+_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib"
+_FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib"
+
+
+@pytest.fixture
+def _isolate_load_cascade():
+    """Disable the search steps that run before system-search in _load_lib_no_cache.
+
+    This lets the ordering tests focus on system-search, CUDA_HOME, and the
+    canary probe without needing a real site-packages or conda environment.
+    """
+    with (
+        # No wheels installed
+        patch.object(_FindNvidiaDynamicLib, "try_site_packages", return_value=None),
+        # No conda env
+        patch.object(_FindNvidiaDynamicLib, "try_with_conda_prefix", return_value=None),
+        # Lib not already loaded by another component
+        patch(f"{_MODULE}.check_if_already_loaded_from_elsewhere", return_value=None),
+        # Skip transitive dependency loading
+        patch(f"{_MODULE}.load_dependencies"),
+    ):
+        yield
+
+
+@pytest.mark.usefixtures("_isolate_load_cascade")
+def test_cuda_home_takes_priority_over_canary(tmp_path):
+    # Two competing CTK roots: one from CUDA_HOME, one the canary would find.
+    cuda_home_root = tmp_path / "cuda-home"
+    nvvm_home = cuda_home_root / "nvvm" / "lib64"
+    nvvm_home.mkdir(parents=True)
+    nvvm_home_so = nvvm_home / "libnvvm.so"
+    nvvm_home_so.write_bytes(b"home")
+
+    canary_root = tmp_path / "cuda-system"
+    (canary_root / "lib64").mkdir(parents=True)
+    nvvm_canary = canary_root / "nvvm" / "lib64"
+    nvvm_canary.mkdir(parents=True)
+    (nvvm_canary / "libnvvm.so").write_bytes(b"canary")
+
+    canary_mock = MagicMock(
+        return_value=_make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
+    )
+
+    with (
+        # System search finds nothing for nvvm; canary would find cudart
+        patch(
+            f"{_MODULE}.load_with_system_search", side_effect=lambda name: None if name == "nvvm" else canary_mock(name)
+        ),
+        # CUDA_HOME points to a separate root that also has nvvm
+        patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root)),
+        # Capture the final load call
+        patch(f"{_MODULE}.load_with_abs_path", side_effect=lambda _libname, path, via: _make_loaded_dl(path, via)),
+    ):
+        result = _load_lib_no_cache("nvvm")
+
+    # CUDA_HOME must win; the canary should never have been consulted
+    assert result.found_via == "CUDA_HOME"
+    assert result.abs_path == str(nvvm_home_so)
+    canary_mock.assert_not_called()
+
+
+@pytest.mark.usefixtures("_isolate_load_cascade")
+def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path):
+    canary_root = tmp_path / "cuda-system"
+    (canary_root / "lib64").mkdir(parents=True)
+    nvvm_dir = canary_root / "nvvm" / "lib64"
+    nvvm_dir.mkdir(parents=True)
+    nvvm_so = nvvm_dir / "libnvvm.so"
+    nvvm_so.write_bytes(b"canary")
+
+    canary_result = _make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
+
+    with (
+        # System search: nvvm not on linker path, but cudart (canary) is
+        patch(
+            f"{_MODULE}.load_with_system_search", side_effect=lambda name: canary_result if name == "cudart" else None
+        ),
+        # No CUDA_HOME set
+        patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None),
+        # Capture the final load call
+        patch(f"{_MODULE}.load_with_abs_path", side_effect=lambda _libname, path, via: _make_loaded_dl(path, via)),
+    ):
+        result = _load_lib_no_cache("nvvm")
+
+    assert result.found_via == "system-ctk-root"
+    assert result.abs_path == str(nvvm_so)

From 30bc53d08ac03a871868763ef4f0dd6791668faa Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 13:50:51 -0500
Subject: [PATCH 02/11] style(pathfinder): update copyright header date in test
 file

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cuda_pathfinder/tests/test_ctk_root_discovery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
index b85015e11e..2351a7e17f 100644
--- a/cuda_pathfinder/tests/test_ctk_root_discovery.py
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from unittest.mock import MagicMock, patch

From 718cfa740557748f40ad6e52d88fa8e7d108818d Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 13:53:22 -0500
Subject: [PATCH 03/11] refactor(pathfinder): use pytest-mock instead of
 unittest.mock in tests

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../tests/test_ctk_root_discovery.py          | 148 +++++++++---------
 1 file changed, 71 insertions(+), 77 deletions(-)

diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
index 2351a7e17f..fc85f9aaea 100644
--- a/cuda_pathfinder/tests/test_ctk_root_discovery.py
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -1,8 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-from unittest.mock import MagicMock, patch
-
 import pytest
 
 from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
@@ -17,6 +15,10 @@
     _try_ctk_root_canary,
 )
 
+_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib"
+_FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib"
+
+
 # ---------------------------------------------------------------------------
 # derive_ctk_root
 # ---------------------------------------------------------------------------
@@ -60,14 +62,14 @@ def test_derive_ctk_root_windows_case_insensitive_x64():
     assert _derive_ctk_root_windows(r"C:\CUDA\BIN\X64\cudart64_13.dll") == r"C:\CUDA"
 
 
-def test_derive_ctk_root_dispatches_to_linux():
-    with patch("cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib.IS_WINDOWS", False):
-        assert derive_ctk_root("/usr/local/cuda/lib64/libcudart.so.13") == "/usr/local/cuda"
+def test_derive_ctk_root_dispatches_to_linux(mocker):
+    mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", False)
+    assert derive_ctk_root("/usr/local/cuda/lib64/libcudart.so.13") == "/usr/local/cuda"
 
 
-def test_derive_ctk_root_dispatches_to_windows():
-    with patch("cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib.IS_WINDOWS", True):
-        assert derive_ctk_root(r"C:\CUDA\v13\bin\cudart64_13.dll") == r"C:\CUDA\v13"
+def test_derive_ctk_root_dispatches_to_windows(mocker):
+    mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", True)
+    assert derive_ctk_root(r"C:\CUDA\v13\bin\cudart64_13.dll") == r"C:\CUDA\v13"
 
 
 # ---------------------------------------------------------------------------
@@ -107,14 +109,11 @@ def test_try_via_ctk_root_regular_lib(tmp_path):
 # ---------------------------------------------------------------------------
 
 
-def _patch_system_search(return_value):
-    return patch(
-        "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib.load_with_system_search",
-        return_value=return_value,
-    )
+def _make_loaded_dl(path, found_via):
+    return LoadedDL(path, False, 0xDEAD, found_via)
 
 
-def test_canary_finds_nvvm(tmp_path):
+def test_canary_finds_nvvm(tmp_path, mocker):
     ctk_root = tmp_path / "cuda-13"
     (ctk_root / "lib64").mkdir(parents=True)
     nvvm_dir = ctk_root / "nvvm" / "lib64"
@@ -122,36 +121,36 @@ def test_canary_finds_nvvm(tmp_path):
     nvvm_so = nvvm_dir / "libnvvm.so"
     nvvm_so.write_bytes(b"fake")
 
-    canary = LoadedDL(str(ctk_root / "lib64" / "libcudart.so.13"), False, 0xDEAD, "system-search")
+    canary = _make_loaded_dl(str(ctk_root / "lib64" / "libcudart.so.13"), "system-search")
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
 
-    with _patch_system_search(canary):
-        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_so)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_so)
 
 
-def test_canary_returns_none_when_system_search_fails():
-    with _patch_system_search(None):
-        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+def test_canary_returns_none_when_system_search_fails(mocker):
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
-def test_canary_returns_none_when_ctk_root_unrecognized():
-    canary = LoadedDL("/weird/path/libcudart.so.13", False, 0xDEAD, "system-search")
-    with _patch_system_search(canary):
-        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+def test_canary_returns_none_when_ctk_root_unrecognized(mocker):
+    canary = _make_loaded_dl("/weird/path/libcudart.so.13", "system-search")
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
-def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path):
+def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path, mocker):
     ctk_root = tmp_path / "cuda-13"
     (ctk_root / "lib64").mkdir(parents=True)
 
-    canary = LoadedDL(str(ctk_root / "lib64" / "libcudart.so.13"), False, 0xDEAD, "system-search")
-    with _patch_system_search(canary):
-        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+    canary = _make_loaded_dl(str(ctk_root / "lib64" / "libcudart.so.13"), "system-search")
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
-def test_canary_skips_when_abs_path_none():
-    canary = LoadedDL(None, False, 0xDEAD, "system-search")
-    with _patch_system_search(canary):
-        assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
+def test_canary_skips_when_abs_path_none(mocker):
+    canary = _make_loaded_dl(None, "system-search")
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
 # ---------------------------------------------------------------------------
@@ -159,36 +158,25 @@ def test_canary_skips_when_abs_path_none():
 # ---------------------------------------------------------------------------
 
 
-def _make_loaded_dl(path, found_via):
-    return LoadedDL(path, False, 0xDEAD, found_via)
-
-
-_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib"
-_FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib"
-
-
 @pytest.fixture
-def _isolate_load_cascade():
+def _isolate_load_cascade(mocker):
     """Disable the search steps that run before system-search in _load_lib_no_cache.
 
     This lets the ordering tests focus on system-search, CUDA_HOME, and the
     canary probe without needing a real site-packages or conda environment.
     """
-    with (
-        # No wheels installed
-        patch.object(_FindNvidiaDynamicLib, "try_site_packages", return_value=None),
-        # No conda env
-        patch.object(_FindNvidiaDynamicLib, "try_with_conda_prefix", return_value=None),
-        # Lib not already loaded by another component
-        patch(f"{_MODULE}.check_if_already_loaded_from_elsewhere", return_value=None),
-        # Skip transitive dependency loading
-        patch(f"{_MODULE}.load_dependencies"),
-    ):
-        yield
+    # No wheels installed
+    mocker.patch.object(_FindNvidiaDynamicLib, "try_site_packages", return_value=None)
+    # No conda env
+    mocker.patch.object(_FindNvidiaDynamicLib, "try_with_conda_prefix", return_value=None)
+    # Lib not already loaded by another component
+    mocker.patch(f"{_MODULE}.check_if_already_loaded_from_elsewhere", return_value=None)
+    # Skip transitive dependency loading
+    mocker.patch(f"{_MODULE}.load_dependencies")
 
 
 @pytest.mark.usefixtures("_isolate_load_cascade")
-def test_cuda_home_takes_priority_over_canary(tmp_path):
+def test_cuda_home_takes_priority_over_canary(tmp_path, mocker):
     # Two competing CTK roots: one from CUDA_HOME, one the canary would find.
     cuda_home_root = tmp_path / "cuda-home"
     nvvm_home = cuda_home_root / "nvvm" / "lib64"
@@ -202,21 +190,24 @@ def test_cuda_home_takes_priority_over_canary(tmp_path):
     nvvm_canary.mkdir(parents=True)
     (nvvm_canary / "libnvvm.so").write_bytes(b"canary")
 
-    canary_mock = MagicMock(
+    canary_mock = mocker.MagicMock(
         return_value=_make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
     )
 
-    with (
-        # System search finds nothing for nvvm; canary would find cudart
-        patch(
-            f"{_MODULE}.load_with_system_search", side_effect=lambda name: None if name == "nvvm" else canary_mock(name)
-        ),
-        # CUDA_HOME points to a separate root that also has nvvm
-        patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root)),
-        # Capture the final load call
-        patch(f"{_MODULE}.load_with_abs_path", side_effect=lambda _libname, path, via: _make_loaded_dl(path, via)),
-    ):
-        result = _load_lib_no_cache("nvvm")
+    # System search finds nothing for nvvm; canary would find cudart
+    mocker.patch(
+        f"{_MODULE}.load_with_system_search",
+        side_effect=lambda name: None if name == "nvvm" else canary_mock(name),
+    )
+    # CUDA_HOME points to a separate root that also has nvvm
+    mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root))
+    # Capture the final load call
+    mocker.patch(
+        f"{_MODULE}.load_with_abs_path",
+        side_effect=lambda _libname, path, via: _make_loaded_dl(path, via),
+    )
+
+    result = _load_lib_no_cache("nvvm")
 
     # CUDA_HOME must win; the canary should never have been consulted
     assert result.found_via == "CUDA_HOME"
@@ -225,7 +216,7 @@ def test_cuda_home_takes_priority_over_canary(tmp_path):
 
 
 @pytest.mark.usefixtures("_isolate_load_cascade")
-def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path):
+def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker):
     canary_root = tmp_path / "cuda-system"
     (canary_root / "lib64").mkdir(parents=True)
     nvvm_dir = canary_root / "nvvm" / "lib64"
@@ -235,17 +226,20 @@ def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path):
 
     canary_result = _make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
 
-    with (
-        # System search: nvvm not on linker path, but cudart (canary) is
-        patch(
-            f"{_MODULE}.load_with_system_search", side_effect=lambda name: canary_result if name == "cudart" else None
-        ),
-        # No CUDA_HOME set
-        patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None),
-        # Capture the final load call
-        patch(f"{_MODULE}.load_with_abs_path", side_effect=lambda _libname, path, via: _make_loaded_dl(path, via)),
-    ):
-        result = _load_lib_no_cache("nvvm")
+    # System search: nvvm not on linker path, but cudart (canary) is
+    mocker.patch(
+        f"{_MODULE}.load_with_system_search",
+        side_effect=lambda name: canary_result if name == "cudart" else None,
+    )
+    # No CUDA_HOME set
+    mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None)
+    # Capture the final load call
+    mocker.patch(
+        f"{_MODULE}.load_with_abs_path",
+        side_effect=lambda _libname, path, via: _make_loaded_dl(path, via),
+    )
+
+    result = _load_lib_no_cache("nvvm")
 
     assert result.found_via == "system-ctk-root"
     assert result.abs_path == str(nvvm_so)

From b2b78173661e3c9abc3d4f22ade59c69d0b1a16e Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 13:59:42 -0500
Subject: [PATCH 04/11] chore: fix typing

---
 .../cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
index cf249bce56..9085812b51 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -88,7 +88,7 @@ def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None:
         ctk_root = derive_ctk_root(canary.abs_path)
         if ctk_root is None:
             continue
-        abs_path = finder.try_via_ctk_root(ctk_root)
+        abs_path: str | None = finder.try_via_ctk_root(ctk_root)
         if abs_path is not None:
             return abs_path
     return None

From 52a9ccaac5b11284ac1db00d6a70c41c8814c961 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 14:32:10 -0500
Subject: [PATCH 05/11] fix(pathfinder): make CTK root discovery tests
 platform-aware

Tests that create fake CTK directory layouts were hardcoded to Linux
paths (lib64/, libnvvm.so) and failed on Windows where the code
expects Windows layouts (bin/, nvvm64.dll).

Extract platform-aware helpers (_create_nvvm_in_ctk, _create_cudart_in_ctk,
_fake_canary_path) that create the right layout and filenames based on
IS_WINDOWS.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../tests/test_ctk_root_discovery.py          | 99 ++++++++++++-------
 1 file changed, 63 insertions(+), 36 deletions(-)

diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
index fc85f9aaea..40b84899be 100644
--- a/cuda_pathfinder/tests/test_ctk_root_discovery.py
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+import os
+
 import pytest
 
 from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
@@ -14,11 +16,52 @@
     _load_lib_no_cache,
     _try_ctk_root_canary,
 )
+from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
 
 _MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib"
 _FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib"
 
 
+# ---------------------------------------------------------------------------
+# Platform-aware test helpers
+# ---------------------------------------------------------------------------
+
+
+def _create_nvvm_in_ctk(ctk_root):
+    """Create a fake nvvm lib in the platform-appropriate CTK subdirectory."""
+    if IS_WINDOWS:
+        nvvm_dir = ctk_root / "nvvm" / "bin"
+        nvvm_dir.mkdir(parents=True)
+        nvvm_lib = nvvm_dir / "nvvm64.dll"
+    else:
+        nvvm_dir = ctk_root / "nvvm" / "lib64"
+        nvvm_dir.mkdir(parents=True)
+        nvvm_lib = nvvm_dir / "libnvvm.so"
+    nvvm_lib.write_bytes(b"fake")
+    return nvvm_lib
+
+
+def _create_cudart_in_ctk(ctk_root):
+    """Create a fake cudart lib in the platform-appropriate CTK subdirectory."""
+    if IS_WINDOWS:
+        lib_dir = ctk_root / "bin"
+        lib_dir.mkdir(parents=True)
+        lib_file = lib_dir / "cudart64_12.dll"
+    else:
+        lib_dir = ctk_root / "lib64"
+        lib_dir.mkdir(parents=True)
+        lib_file = lib_dir / "libcudart.so"
+    lib_file.write_bytes(b"fake")
+    return lib_file
+
+
+def _fake_canary_path(ctk_root):
+    """Return the path a system-loaded canary lib would resolve to."""
+    if IS_WINDOWS:
+        return str(ctk_root / "bin" / "cudart64_13.dll")
+    return str(ctk_root / "lib64" / "libcudart.so.13")
+
+
 # ---------------------------------------------------------------------------
 # derive_ctk_root
 # ---------------------------------------------------------------------------
@@ -79,12 +122,9 @@ def test_derive_ctk_root_dispatches_to_windows(mocker):
 
 def test_try_via_ctk_root_finds_nvvm(tmp_path):
     ctk_root = tmp_path / "cuda-13"
-    nvvm_dir = ctk_root / "nvvm" / "lib64"
-    nvvm_dir.mkdir(parents=True)
-    nvvm_so = nvvm_dir / "libnvvm.so"
-    nvvm_so.write_bytes(b"fake")
+    nvvm_lib = _create_nvvm_in_ctk(ctk_root)
 
-    assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) == str(nvvm_so)
+    assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) == str(nvvm_lib)
 
 
 def test_try_via_ctk_root_returns_none_when_dir_missing(tmp_path):
@@ -96,12 +136,9 @@ def test_try_via_ctk_root_returns_none_when_dir_missing(tmp_path):
 
 def test_try_via_ctk_root_regular_lib(tmp_path):
     ctk_root = tmp_path / "cuda-13"
-    lib_dir = ctk_root / "lib64"
-    lib_dir.mkdir(parents=True)
-    cudart_so = lib_dir / "libcudart.so"
-    cudart_so.write_bytes(b"fake")
+    cudart_lib = _create_cudart_in_ctk(ctk_root)
 
-    assert _FindNvidiaDynamicLib("cudart").try_via_ctk_root(str(ctk_root)) == str(cudart_so)
+    assert _FindNvidiaDynamicLib("cudart").try_via_ctk_root(str(ctk_root)) == str(cudart_lib)
 
 
 # ---------------------------------------------------------------------------
@@ -115,16 +152,13 @@ def _make_loaded_dl(path, found_via):
 
 def test_canary_finds_nvvm(tmp_path, mocker):
     ctk_root = tmp_path / "cuda-13"
-    (ctk_root / "lib64").mkdir(parents=True)
-    nvvm_dir = ctk_root / "nvvm" / "lib64"
-    nvvm_dir.mkdir(parents=True)
-    nvvm_so = nvvm_dir / "libnvvm.so"
-    nvvm_so.write_bytes(b"fake")
+    _create_cudart_in_ctk(ctk_root)
+    nvvm_lib = _create_nvvm_in_ctk(ctk_root)
 
-    canary = _make_loaded_dl(str(ctk_root / "lib64" / "libcudart.so.13"), "system-search")
+    canary = _make_loaded_dl(_fake_canary_path(ctk_root), "system-search")
     mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
 
-    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_so)
+    assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_lib)
 
 
 def test_canary_returns_none_when_system_search_fails(mocker):
@@ -140,9 +174,10 @@ def test_canary_returns_none_when_ctk_root_unrecognized(mocker):
 
 def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path, mocker):
     ctk_root = tmp_path / "cuda-13"
-    (ctk_root / "lib64").mkdir(parents=True)
+    # Create only the canary lib dir, not nvvm
+    _create_cudart_in_ctk(ctk_root)
 
-    canary = _make_loaded_dl(str(ctk_root / "lib64" / "libcudart.so.13"), "system-search")
+    canary = _make_loaded_dl(_fake_canary_path(ctk_root), "system-search")
     mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
@@ -179,19 +214,14 @@ def _isolate_load_cascade(mocker):
 def test_cuda_home_takes_priority_over_canary(tmp_path, mocker):
     # Two competing CTK roots: one from CUDA_HOME, one the canary would find.
     cuda_home_root = tmp_path / "cuda-home"
-    nvvm_home = cuda_home_root / "nvvm" / "lib64"
-    nvvm_home.mkdir(parents=True)
-    nvvm_home_so = nvvm_home / "libnvvm.so"
-    nvvm_home_so.write_bytes(b"home")
+    nvvm_home_lib = _create_nvvm_in_ctk(cuda_home_root)
 
     canary_root = tmp_path / "cuda-system"
-    (canary_root / "lib64").mkdir(parents=True)
-    nvvm_canary = canary_root / "nvvm" / "lib64"
-    nvvm_canary.mkdir(parents=True)
-    (nvvm_canary / "libnvvm.so").write_bytes(b"canary")
+    _create_cudart_in_ctk(canary_root)
+    _create_nvvm_in_ctk(canary_root)
 
     canary_mock = mocker.MagicMock(
-        return_value=_make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
+        return_value=_make_loaded_dl(_fake_canary_path(canary_root), "system-search")
     )
 
     # System search finds nothing for nvvm; canary would find cudart
@@ -211,20 +241,17 @@ def test_cuda_home_takes_priority_over_canary(tmp_path, mocker):
 
     # CUDA_HOME must win; the canary should never have been consulted
     assert result.found_via == "CUDA_HOME"
-    assert result.abs_path == str(nvvm_home_so)
+    assert result.abs_path == str(nvvm_home_lib)
     canary_mock.assert_not_called()
 
 
 @pytest.mark.usefixtures("_isolate_load_cascade")
 def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker):
     canary_root = tmp_path / "cuda-system"
-    (canary_root / "lib64").mkdir(parents=True)
-    nvvm_dir = canary_root / "nvvm" / "lib64"
-    nvvm_dir.mkdir(parents=True)
-    nvvm_so = nvvm_dir / "libnvvm.so"
-    nvvm_so.write_bytes(b"canary")
+    _create_cudart_in_ctk(canary_root)
+    nvvm_lib = _create_nvvm_in_ctk(canary_root)
 
-    canary_result = _make_loaded_dl(str(canary_root / "lib64" / "libcudart.so.13"), "system-search")
+    canary_result = _make_loaded_dl(_fake_canary_path(canary_root), "system-search")
 
     # System search: nvvm not on linker path, but cudart (canary) is
     mocker.patch(
@@ -242,4 +269,4 @@ def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker):
     result = _load_lib_no_cache("nvvm")
 
     assert result.found_via == "system-ctk-root"
-    assert result.abs_path == str(nvvm_so)
+    assert result.abs_path == str(nvvm_lib)

From ce7da17f74f991705bca71807dd01fc487c7da55 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 14:34:47 -0500
Subject: [PATCH 06/11] chore: style

---
 cuda_pathfinder/tests/test_ctk_root_discovery.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
index 40b84899be..a1668112d0 100644
--- a/cuda_pathfinder/tests/test_ctk_root_discovery.py
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-import os
 
 import pytest
 
@@ -220,9 +219,7 @@ def test_cuda_home_takes_priority_over_canary(tmp_path, mocker):
     _create_cudart_in_ctk(canary_root)
     _create_nvvm_in_ctk(canary_root)
 
-    canary_mock = mocker.MagicMock(
-        return_value=_make_loaded_dl(_fake_canary_path(canary_root), "system-search")
-    )
+    canary_mock = mocker.MagicMock(return_value=_make_loaded_dl(_fake_canary_path(canary_root), "system-search"))
 
     # System search finds nothing for nvvm; canary would find cudart
     mocker.patch(

From 038a4cbbc4723a373396cccadfe264180f70319e Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 10 Feb 2026 15:50:44 -0500
Subject: [PATCH 07/11] fix(pathfinder): normalize paths from
 _find_lib_dir_using_anchor_point

The rel_paths for nvvm use forward slashes (e.g. "nvvm/bin") which
os.path.join on Windows doesn't normalize, producing mixed-separator
paths like "...\nvvm/bin\nvvm64.dll". Apply os.path.normpath to the
returned directory so all separators are consistent.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
index d5c376012c..6265992e4a 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
@@ -101,7 +101,7 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_
     for rel_path in rel_paths:
         for dirname in sorted(glob.glob(os.path.join(anchor_point, rel_path))):
             if os.path.isdir(dirname):
-                return dirname
+                return os.path.normpath(dirname)
 
     return None
 

From 65997b984069bf8c476a990a0e0d4e18c273e714 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 17:51:14 -0500
Subject: [PATCH 08/11] refactor(pathfinder): isolate CTK canary probe in
 subprocess

Resolve CTK canary absolute paths in a spawned Python process so probing cudart does not mutate loader state in the caller process while preserving the nvvm discovery fallback order. Keep JSON as the child-to-parent wire format because it cleanly represents both path and no-result states and avoids fragile stdout/path parsing across platforms.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../_dynamic_libs/canary_probe_subprocess.py  | 32 ++++++++++
 .../_dynamic_libs/load_nvidia_dynamic_lib.py  | 61 +++++++++++++++----
 .../tests/test_ctk_root_discovery.py          | 49 ++++++++-------
 3 files changed, 110 insertions(+), 32 deletions(-)
 create mode 100644 cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
new file mode 100644
index 0000000000..96c9ab46a9
--- /dev/null
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import sys
+
+from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
+
+if IS_WINDOWS:
+    from cuda.pathfinder._dynamic_libs.load_dl_windows import load_with_system_search
+else:
+    from cuda.pathfinder._dynamic_libs.load_dl_linux import load_with_system_search
+
+
+def _probe_canary_abs_path(libname: str) -> str | None:
+    loaded = load_with_system_search(libname)
+    if loaded is None:
+        return None
+    return loaded.abs_path
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = sys.argv[1:] if argv is None else argv
+    if len(args) != 1:
+        return 2
+    print(json.dumps(_probe_canary_abs_path(args[0])))  # noqa: T201
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
index 9085812b51..0c6b21b520 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import functools
+import json
+import subprocess
 import struct
 import sys
 
@@ -69,6 +71,44 @@ def _load_driver_lib_no_cache(libname: str) -> LoadedDL:
 _CTK_ROOT_CANARY_LIBNAMES = ("cudart",)
 
 
+def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
+    """Resolve a library's system-search absolute path in a child process.
+
+    This keeps any side-effects of loading the canary library scoped to the
+    child process instead of polluting the current process.
+    """
+    cmd = [
+        sys.executable,
+        "-m",
+        "cuda.pathfinder._dynamic_libs.canary_probe_subprocess",
+        libname,
+    ]
+    try:
+        result = subprocess.run(  # noqa: S603
+            cmd,
+            check=False,
+            capture_output=True,
+            text=True,
+            timeout=10.0,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return None
+    if result.returncode != 0:
+        return None
+
+    # Read the final non-empty stdout line in case earlier lines are emitted.
+    lines = [line for line in result.stdout.splitlines() if line.strip()]
+    if not lines:
+        return None
+    try:
+        payload = json.loads(lines[-1])
+    except json.JSONDecodeError:
+        return None
+    if isinstance(payload, str):
+        return payload
+    return None
+
+
 def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None:
     """Derive the CTK root from a system-installed canary lib.
 
@@ -77,15 +117,14 @@ def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None:
     via system search, derive the CTK installation root from its resolved
     path, and then look for the target lib relative to that root.
 
-    The canary lib is loaded as a side-effect but this is harmless: it stays
-    loaded (handles are never closed) and will be reused by
-    :func:`load_nvidia_dynamic_lib` if requested later.
+    The canary load is performed in a subprocess to avoid introducing loader
+    state into the current process.
     """
     for canary_libname in _CTK_ROOT_CANARY_LIBNAMES:
-        canary = load_with_system_search(canary_libname)
-        if canary is None or canary.abs_path is None:
+        canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname)
+        if canary_abs_path is None:
             continue
-        ctk_root = derive_ctk_root(canary.abs_path)
+        ctk_root = derive_ctk_root(canary_abs_path)
         if ctk_root is None:
             continue
         abs_path: str | None = finder.try_via_ctk_root(ctk_root)
@@ -131,9 +170,9 @@ def _load_lib_no_cache(libname: str) -> LoadedDL:
         else:
             # Canary probe: if the direct system search and CUDA_HOME both
             # failed (e.g. nvvm isn't on the linker path and CUDA_HOME is
-            # unset), try to discover the CTK root by system-loading a
-            # well-known CTK lib that IS on the linker path, then look for
-            # the target lib relative to that root.
+            # unset), try to discover the CTK root by loading a well-known CTK
+            # lib in a subprocess, then look for the target lib relative to
+            # that root.
             abs_path = _try_ctk_root_canary(finder)
             if abs_path is not None:
                 found_via = "system-ctk-root"
@@ -213,8 +252,8 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
            - For libraries whose shared object doesn't reside on the standard
              linker path (e.g. ``libnvvm.so`` lives under ``$CTK_ROOT/nvvm/lib64``),
              attempt to discover the CTK installation root by system-loading a
-             well-known CTK library (``cudart``) that *is* on the linker path, then
-             derive the root from its resolved absolute path.
+             well-known CTK library (``cudart``) in a subprocess, then derive
+             the root from its resolved absolute path.
 
     **Driver libraries** (``"cuda"``, ``"nvml"``):
 
diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py
index a1668112d0..71a61c86c0 100644
--- a/cuda_pathfinder/tests/test_ctk_root_discovery.py
+++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py
@@ -154,20 +154,27 @@ def test_canary_finds_nvvm(tmp_path, mocker):
     _create_cudart_in_ctk(ctk_root)
     nvvm_lib = _create_nvvm_in_ctk(ctk_root)
 
-    canary = _make_loaded_dl(_fake_canary_path(ctk_root), "system-search")
-    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    probe = mocker.patch(
+        f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess",
+        return_value=_fake_canary_path(ctk_root),
+    )
+    parent_system_loader = mocker.patch(f"{_MODULE}.load_with_system_search")
 
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_lib)
+    probe.assert_called_once_with("cudart")
+    parent_system_loader.assert_not_called()
 
 
-def test_canary_returns_none_when_system_search_fails(mocker):
-    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None)
+def test_canary_returns_none_when_subprocess_probe_fails(mocker):
+    mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None)
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
 def test_canary_returns_none_when_ctk_root_unrecognized(mocker):
-    canary = _make_loaded_dl("/weird/path/libcudart.so.13", "system-search")
-    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    mocker.patch(
+        f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess",
+        return_value="/weird/path/libcudart.so.13",
+    )
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
@@ -176,14 +183,15 @@ def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path, mocker):
     # Create only the canary lib dir, not nvvm
     _create_cudart_in_ctk(ctk_root)
 
-    canary = _make_loaded_dl(_fake_canary_path(ctk_root), "system-search")
-    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    mocker.patch(
+        f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess",
+        return_value=_fake_canary_path(ctk_root),
+    )
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
 def test_canary_skips_when_abs_path_none(mocker):
-    canary = _make_loaded_dl(None, "system-search")
-    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=canary)
+    mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None)
     assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None
 
 
@@ -219,13 +227,12 @@ def test_cuda_home_takes_priority_over_canary(tmp_path, mocker):
     _create_cudart_in_ctk(canary_root)
     _create_nvvm_in_ctk(canary_root)
 
-    canary_mock = mocker.MagicMock(return_value=_make_loaded_dl(_fake_canary_path(canary_root), "system-search"))
+    canary_mock = mocker.MagicMock(return_value=_fake_canary_path(canary_root))
 
-    # System search finds nothing for nvvm; canary would find cudart
-    mocker.patch(
-        f"{_MODULE}.load_with_system_search",
-        side_effect=lambda name: None if name == "nvvm" else canary_mock(name),
-    )
+    # System search finds nothing for nvvm.
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None)
+    # Canary subprocess probe would find cudart if consulted.
+    mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", side_effect=canary_mock)
     # CUDA_HOME points to a separate root that also has nvvm
     mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root))
     # Capture the final load call
@@ -248,12 +255,12 @@ def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker):
     _create_cudart_in_ctk(canary_root)
     nvvm_lib = _create_nvvm_in_ctk(canary_root)
 
-    canary_result = _make_loaded_dl(_fake_canary_path(canary_root), "system-search")
-
-    # System search: nvvm not on linker path, but cudart (canary) is
+    # System search: nvvm not on linker path.
+    mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None)
+    # Canary subprocess probe finds cudart under a system CTK root.
     mocker.patch(
-        f"{_MODULE}.load_with_system_search",
-        side_effect=lambda name: canary_result if name == "cudart" else None,
+        f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess",
+        return_value=_fake_canary_path(canary_root),
     )
     # No CUDA_HOME set
     mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None)

From a1d2ebc346c851bab6b5ff14d3896e26f3b0afdb Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 18:05:17 -0500
Subject: [PATCH 09/11] fix(pathfinder): satisfy pre-commit typing for canary
 probe

Make canary subprocess path extraction explicitly typed and validated so mypy does not treat platform-specific loader results as Any while keeping probe behavior unchanged. Keep import ordering aligned with Ruff so pre-commit is green.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../pathfinder/_dynamic_libs/canary_probe_subprocess.py   | 8 ++++++--
 .../pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py   | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
index 96c9ab46a9..4435b7e433 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
@@ -5,6 +5,7 @@
 import json
 import sys
 
+from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
 from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
 
 if IS_WINDOWS:
@@ -14,10 +15,13 @@
 
 
 def _probe_canary_abs_path(libname: str) -> str | None:
-    loaded = load_with_system_search(libname)
+    loaded: LoadedDL | None = load_with_system_search(libname)
     if loaded is None:
         return None
-    return loaded.abs_path
+    abs_path = loaded.abs_path
+    if not isinstance(abs_path, str):
+        return None
+    return abs_path
 
 
 def main(argv: list[str] | None = None) -> int:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
index 0c6b21b520..7c645108db 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -3,8 +3,8 @@
 
 import functools
 import json
-import subprocess
 import struct
+import subprocess
 import sys
 
 from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (

From a4ea4b7ba2b756f8162d5a9c979484eaf276cf5b Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Fri, 20 Feb 2026 10:54:49 -0500
Subject: [PATCH 10/11] fix(pathfinder): use spawn isolation for CTK canary
 probing

Switch canary path resolution from subprocess.run to a shared multiprocessing spawn runner so child probes do not inherit potentially preloaded CUDA libraries from a forked parent. Reuse that runner from tests to keep one implementation for spawned process behavior.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../_dynamic_libs/canary_probe_subprocess.py  |   6 +-
 .../_dynamic_libs/load_nvidia_dynamic_lib.py  |  19 +--
 .../_utils/spawned_process_runner.py          | 127 +++++++++++++++++
 .../tests/spawned_process_runner.py           | 132 +-----------------
 4 files changed, 145 insertions(+), 139 deletions(-)
 create mode 100644 cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
index 4435b7e433..902b57d6e4 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py
@@ -24,11 +24,15 @@ def _probe_canary_abs_path(libname: str) -> str | None:
     return abs_path
 
 
+def probe_canary_abs_path_and_print_json(libname: str) -> None:
+    print(json.dumps(_probe_canary_abs_path(libname)))  # noqa: T201
+
+
 def main(argv: list[str] | None = None) -> int:
     args = sys.argv[1:] if argv is None else argv
     if len(args) != 1:
         return 2
-    print(json.dumps(_probe_canary_abs_path(args[0])))  # noqa: T201
+    probe_canary_abs_path_and_print_json(args[0])
     return 0
 
 
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
index 7c645108db..1597a5b8b8 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -4,9 +4,9 @@
 import functools
 import json
 import struct
-import subprocess
 import sys
 
+from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json
 from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
     _FindNvidiaDynamicLib,
     derive_ctk_root,
@@ -17,6 +17,7 @@
     SUPPORTED_WINDOWS_DLLS,
 )
 from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
+from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
 
 if IS_WINDOWS:
     from cuda.pathfinder._dynamic_libs.load_dl_windows import (
@@ -77,21 +78,13 @@ def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
     This keeps any side-effects of loading the canary library scoped to the
     child process instead of polluting the current process.
     """
-    cmd = [
-        sys.executable,
-        "-m",
-        "cuda.pathfinder._dynamic_libs.canary_probe_subprocess",
-        libname,
-    ]
     try:
-        result = subprocess.run(  # noqa: S603
-            cmd,
-            check=False,
-            capture_output=True,
-            text=True,
+        result = run_in_spawned_child_process(
+            probe_canary_abs_path_and_print_json,
+            args=(libname,),
             timeout=10.0,
         )
-    except (OSError, subprocess.SubprocessError):
+    except (OSError, RuntimeError):
         return None
     if result.returncode != 0:
         return None
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py
new file mode 100644
index 0000000000..1908695fe5
--- /dev/null
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py
@@ -0,0 +1,127 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import multiprocessing
+import queue  # for Empty
+import sys
+import traceback
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+from io import StringIO
+from typing import Any
+
+PROCESS_KILLED = -9
+PROCESS_NO_RESULT = -999
+
+
+# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess
+# (args, check_returncode() are intentionally not supported here.)
+@dataclass
+class CompletedProcess:
+    returncode: int
+    stdout: str
+    stderr: str
+
+
+class ChildProcessWrapper:
+    def __init__(self, result_queue, target, args, kwargs):
+        self.target = target
+        self.args = () if args is None else args
+        self.kwargs = {} if kwargs is None else kwargs
+        self.result_queue = result_queue
+
+    def __call__(self):
+        # Capture stdout/stderr
+        old_stdout = sys.stdout
+        old_stderr = sys.stderr
+        sys.stdout = StringIO()
+        sys.stderr = StringIO()
+
+        try:
+            self.target(*self.args, **self.kwargs)
+            returncode = 0
+        except SystemExit as e:  # Handle sys.exit()
+            returncode = e.code if isinstance(e.code, int) else 0
+        except BaseException:
+            traceback.print_exc()
+            returncode = 1
+        finally:
+            # Collect outputs and restore streams
+            stdout = sys.stdout.getvalue()
+            stderr = sys.stderr.getvalue()
+            sys.stdout = old_stdout
+            sys.stderr = old_stderr
+            try:  # noqa: SIM105
+                self.result_queue.put((returncode, stdout, stderr))
+            except Exception:  # noqa: S110
+                # If the queue is broken (e.g., parent gone), best effort logging
+                pass
+
+
+def run_in_spawned_child_process(
+    target: Callable[..., None],
+    *,
+    args: Sequence[Any] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    timeout: float | None = None,
+    rethrow: bool = False,
+) -> CompletedProcess:
+    """Run `target` in a spawned child process, capturing stdout/stderr.
+
+    The provided `target` must be defined at the top level of a module, and must
+    be importable in the spawned child process. Lambdas, closures, or interactively
+    defined functions (e.g., in Jupyter notebooks) will not work.
+
+    If `rethrow=True` and the child process exits with a nonzero code,
+    raises ChildProcessError with the captured stderr.
+    """
+    ctx = multiprocessing.get_context("spawn")
+    result_queue = ctx.Queue()
+    process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs))
+    process.start()
+
+    try:
+        process.join(timeout)
+        if process.is_alive():
+            process.terminate()
+            process.join()
+            result = CompletedProcess(
+                returncode=PROCESS_KILLED,
+                stdout="",
+                stderr=f"Process timed out after {timeout} seconds and was terminated.",
+            )
+        else:
+            try:
+                returncode, stdout, stderr = result_queue.get(timeout=1.0)
+            except (queue.Empty, EOFError):
+                result = CompletedProcess(
+                    returncode=PROCESS_NO_RESULT,
+                    stdout="",
+                    stderr="Process exited or crashed before returning results.",
+                )
+            else:
+                result = CompletedProcess(
+                    returncode=returncode,
+                    stdout=stdout,
+                    stderr=stderr,
+                )
+
+        if rethrow and result.returncode != 0:
+            raise ChildProcessError(
+                f"Child process exited with code {result.returncode}.\n"
+                "--- stderr-from-child-process ---\n"
+                f"{result.stderr}"
+                "<end-of-stderr-from-child-process>\n"
+            )
+
+        return result
+
+    finally:
+        try:
+            result_queue.close()
+            result_queue.join_thread()
+        except Exception:  # noqa: S110
+            pass
+        if process.is_alive():
+            process.kill()
+            process.join()
diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py
index f4440743f5..34850851e0 100644
--- a/cuda_pathfinder/tests/spawned_process_runner.py
+++ b/cuda_pathfinder/tests/spawned_process_runner.py
@@ -1,127 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-import multiprocessing
-import queue  # for Empty
-import sys
-import traceback
-from collections.abc import Callable, Sequence
-from dataclasses import dataclass
-from io import StringIO
-from typing import Any
-
-PROCESS_KILLED = -9
-PROCESS_NO_RESULT = -999
-
-
-# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess
-# (args, check_returncode() are intentionally not supported here.)
-@dataclass
-class CompletedProcess:
-    returncode: int
-    stdout: str
-    stderr: str
-
-
-class ChildProcessWrapper:
-    def __init__(self, result_queue, target, args, kwargs):
-        self.target = target
-        self.args = () if args is None else args
-        self.kwargs = {} if kwargs is None else kwargs
-        self.result_queue = result_queue
-
-    def __call__(self):
-        # Capture stdout/stderr
-        old_stdout = sys.stdout
-        old_stderr = sys.stderr
-        sys.stdout = StringIO()
-        sys.stderr = StringIO()
-
-        try:
-            self.target(*self.args, **self.kwargs)
-            returncode = 0
-        except SystemExit as e:  # Handle sys.exit()
-            returncode = e.code if isinstance(e.code, int) else 0
-        except BaseException:
-            traceback.print_exc()
-            returncode = 1
-        finally:
-            # Collect outputs and restore streams
-            stdout = sys.stdout.getvalue()
-            stderr = sys.stderr.getvalue()
-            sys.stdout = old_stdout
-            sys.stderr = old_stderr
-            try:  # noqa: SIM105
-                self.result_queue.put((returncode, stdout, stderr))
-            except Exception:  # noqa: S110
-                # If the queue is broken (e.g., parent gone), best effort logging
-                pass
-
-
-def run_in_spawned_child_process(
-    target: Callable[..., None],
-    *,
-    args: Sequence[Any] | None = None,
-    kwargs: dict[str, Any] | None = None,
-    timeout: float | None = None,
-    rethrow: bool = False,
-) -> CompletedProcess:
-    """Run `target` in a spawned child process, capturing stdout/stderr.
-
-    The provided `target` must be defined at the top level of a module, and must
-    be importable in the spawned child process. Lambdas, closures, or interactively
-    defined functions (e.g., in Jupyter notebooks) will not work.
-
-    If `rethrow=True` and the child process exits with a nonzero code,
-    raises ChildProcessError with the captured stderr.
-    """
-    ctx = multiprocessing.get_context("spawn")
-    result_queue = ctx.Queue()
-    process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs))
-    process.start()
-
-    try:
-        process.join(timeout)
-        if process.is_alive():
-            process.terminate()
-            process.join()
-            result = CompletedProcess(
-                returncode=PROCESS_KILLED,
-                stdout="",
-                stderr=f"Process timed out after {timeout} seconds and was terminated.",
-            )
-        else:
-            try:
-                returncode, stdout, stderr = result_queue.get(timeout=1.0)
-            except (queue.Empty, EOFError):
-                result = CompletedProcess(
-                    returncode=PROCESS_NO_RESULT,
-                    stdout="",
-                    stderr="Process exited or crashed before returning results.",
-                )
-            else:
-                result = CompletedProcess(
-                    returncode=returncode,
-                    stdout=stdout,
-                    stderr=stderr,
-                )
-
-        if rethrow and result.returncode != 0:
-            raise ChildProcessError(
-                f"Child process exited with code {result.returncode}.\n"
-                "--- stderr-from-child-process ---\n"
-                f"{result.stderr}"
-                "<end-of-stderr-from-child-process>\n"
-            )
-
-        return result
-
-    finally:
-        try:
-            result_queue.close()
-            result_queue.join_thread()
-        except Exception:  # noqa: S110
-            pass
-        if process.is_alive():
-            process.kill()
-            process.join()
+from cuda.pathfinder._utils.spawned_process_runner import (
+    PROCESS_KILLED,
+    PROCESS_NO_RESULT,
+    CompletedProcess,
+    run_in_spawned_child_process,
+)

From fbdfc5ae06e9de8eeb4c453eaa93a56df925a04c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Fri, 20 Feb 2026 11:33:11 -0500
Subject: [PATCH 11/11] fix(pathfinder): satisfy pre-commit for spawned runner
 utilities

Add the missing type annotations required by mypy and keep the test shim exporting only the runner entry point so lint checks pass cleanly.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../cuda/pathfinder/_utils/spawned_process_runner.py   | 10 ++++++++--
 cuda_pathfinder/tests/spawned_process_runner.py        |  9 +++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py
index 1908695fe5..cba0390861 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py
@@ -24,13 +24,19 @@ class CompletedProcess:
 
 
 class ChildProcessWrapper:
-    def __init__(self, result_queue, target, args, kwargs):
+    def __init__(
+        self,
+        result_queue: Any,
+        target: Callable[..., None],
+        args: Sequence[Any] | None,
+        kwargs: dict[str, Any] | None,
+    ) -> None:
         self.target = target
         self.args = () if args is None else args
         self.kwargs = {} if kwargs is None else kwargs
         self.result_queue = result_queue
 
-    def __call__(self):
+    def __call__(self) -> None:
         # Capture stdout/stderr
         old_stdout = sys.stdout
         old_stderr = sys.stderr
diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py
index 34850851e0..ac0418445c 100644
--- a/cuda_pathfinder/tests/spawned_process_runner.py
+++ b/cuda_pathfinder/tests/spawned_process_runner.py
@@ -1,9 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-from cuda.pathfinder._utils.spawned_process_runner import (
-    PROCESS_KILLED,
-    PROCESS_NO_RESULT,
-    CompletedProcess,
-    run_in_spawned_child_process,
-)
+from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
+
+__all__ = ["run_in_spawned_child_process"]