diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index cf10aa23f..000000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -dask_cuda/_version.py export-subst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9bfa630e1..be9daacfb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,10 +1,14 @@ #python code owners dask_cuda/ @rapidsai/daskcuda-python-codeowners -#build/ops code owners -.github/ @rapidsai/ops-codeowners -ci/ @rapidsai/ops-codeowners -conda/ @rapidsai/ops-codeowners -**/Dockerfile @rapidsai/ops-codeowners -**/.dockerignore @rapidsai/ops-codeowners -dependencies.yaml @rapidsai/ops-codeowners +#CI code owners +/.github/ @rapidsai/ci-codeowners +/ci/ @rapidsai/ci-codeowners +/.pre-commit-config.yaml @rapidsai/ci-codeowners + +#packaging code owners +/.devcontainer/ @rapidsai/packaging-codeowners +/conda/ @rapidsai/packaging-codeowners +/dependencies.yaml @rapidsai/packaging-codeowners +/build.sh @rapidsai/packaging-codeowners +pyproject.toml @rapidsai/packaging-codeowners diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 56fed450a..69b0de5f5 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.08 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -72,7 +72,7 @@ jobs: wheel-publish: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6688d0ff7..4e56d24d2 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,26 +18,26 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.08 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -46,7 +46,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 with: build_type: pull-request # Package is pure Python and only ever requires one build. diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2424729d7..7a884c5c6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 492c96f2c..335080816 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,8 +32,12 @@ repos: additional_dependencies: [types-cachetools] args: ["--module=dask_cuda", "--ignore-missing-imports"] pass_filenames: false + - repo: https://github.com/rapidsai/pre-commit-hooks + rev: v0.3.0 + hooks: + - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.8.0 + rev: v1.13.11 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ea704c1f..37c588511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,29 @@ +# dask-cuda 24.08.00 (7 Aug 2024) + +## 🐛 Bug Fixes + +- Fix partitioning in explicit-comms shuffle ([#1356](https://github.com/rapidsai/dask-cuda/pull/1356)) [@rjzamora](https://github.com/rjzamora) +- Update cuDF's `assert_eq` import ([#1353](https://github.com/rapidsai/dask-cuda/pull/1353)) [@pentschev](https://github.com/pentschev) + +## 🚀 New Features + +- Add arguments to enable cuDF spilling and set statistics ([#1362](https://github.com/rapidsai/dask-cuda/pull/1362)) [@pentschev](https://github.com/pentschev) +- Allow disabling RMM in benchmarks ([#1352](https://github.com/rapidsai/dask-cuda/pull/1352)) [@pentschev](https://github.com/pentschev) + +## 🛠️ Improvements + +- consolidate cuda_suffixed=false blocks in dependencies.yaml, fix update-version.sh ([#1367](https://github.com/rapidsai/dask-cuda/pull/1367)) [@jameslamb](https://github.com/jameslamb) +- split up CUDA-suffixed dependencies in dependencies.yaml ([#1364](https://github.com/rapidsai/dask-cuda/pull/1364)) [@jameslamb](https://github.com/jameslamb) +- Use verify-alpha-spec hook ([#1360](https://github.com/rapidsai/dask-cuda/pull/1360)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Use workflow branch 24.08 again ([#1359](https://github.com/rapidsai/dask-cuda/pull/1359)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Build and test with CUDA 12.5.1 ([#1357](https://github.com/rapidsai/dask-cuda/pull/1357)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Drop `setup.py` ([#1354](https://github.com/rapidsai/dask-cuda/pull/1354)) [@jakirkham](https://github.com/jakirkham) +- remove .gitattributes ([#1350](https://github.com/rapidsai/dask-cuda/pull/1350)) [@jameslamb](https://github.com/jameslamb) +- make conda recipe data-loading stricter ([#1349](https://github.com/rapidsai/dask-cuda/pull/1349)) [@jameslamb](https://github.com/jameslamb) +- Adopt CI/packaging codeowners ([#1347](https://github.com/rapidsai/dask-cuda/pull/1347)) [@bdice](https://github.com/bdice) +- Remove text builds of documentation ([#1346](https://github.com/rapidsai/dask-cuda/pull/1346)) [@vyasr](https://github.com/vyasr) +- use rapids-build-backend ([#1343](https://github.com/rapidsai/dask-cuda/pull/1343)) [@jameslamb](https://github.com/jameslamb) + # dask-cuda 24.06.00 (5 Jun 2024) ## 🐛 Bug Fixes diff --git a/VERSION b/VERSION index 0bff6981a..ec8489fda 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.06.00 +24.08.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index a727d6daf..c2a65a414 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -7,7 +7,7 @@ rapids-logger "Create test conda environment" rapids-dependency-file-generator \ --output conda \ - --file_key docs \ + --file-key docs \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n docs @@ -23,16 +23,14 @@ rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ dask-cuda -export RAPIDS_VERSION_NUMBER="24.06" +export RAPIDS_VERSION_NUMBER="24.08" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build Python docs" pushd docs sphinx-build -b dirhtml ./source _html -sphinx-build -b text ./source _text -mkdir -p "${RAPIDS_DOCS_DIR}/dask-cuda/"{html,txt} +mkdir -p "${RAPIDS_DOCS_DIR}/dask-cuda/"html mv _html/* "${RAPIDS_DOCS_DIR}/dask-cuda/html" -mv _text/* "${RAPIDS_DOCS_DIR}/dask-cuda/txt" popd rapids-upload-docs diff --git a/ci/build_python.sh b/ci/build_python.sh index e2429e98c..48cece328 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -13,18 +13,12 @@ export CMAKE_GENERATOR=Ninja rapids-print-env -package_name="dask_cuda" - -version=$(rapids-generate-version) -commit=$(git rev-parse HEAD) - -echo "${version}" | tr -d '"' > VERSION -sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_name}/_version.py" +rapids-generate-version > ./VERSION rapids-logger "Begin py build" conda config --set path_conflict prevent -RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ +RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ conda/recipes/dask-cuda rapids-upload-conda-to-s3 python diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 9ec826733..828972dc2 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -6,22 +6,7 @@ set -euo pipefail source rapids-configure-sccache source rapids-date-string -version=$(rapids-generate-version) -commit=$(git rev-parse HEAD) - -echo "${version}" | tr -d '"' > VERSION -sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "dask_cuda/_version.py" - -# For nightlies we want to ensure that we're pulling in alphas as well. The -# easiest way to do so is to augment the spec with a constraint containing a -# min alpha version that doesn't affect the version bounds but does allow usage -# of alpha versions for that dependency without --pre -alpha_spec='' -if ! rapids-is-release-build; then - alpha_spec=',>=0.0.0a0' -fi - -sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" pyproject.toml +rapids-generate-version > ./VERSION python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check diff --git a/ci/check_style.sh b/ci/check_style.sh index 9bc26fe71..f8bc16525 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -8,7 +8,7 @@ rapids-logger "Create checks conda environment" rapids-dependency-file-generator \ --output conda \ - --file_key checks \ + --file-key checks \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n checks diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 0d1b8b1a5..a9fe1d02e 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -36,8 +36,8 @@ function sed_runner() { echo "${NEXT_FULL_TAG}" | tr -d '"' > VERSION # Bump testing dependencies -sed_runner "s/ucx-py==.*/ucx-py==${NEXT_UCXPY_VERSION}.*/g" dependencies.yaml -sed_runner "s/ucxx==.*/ucxx==${NEXT_UCXPY_VERSION}.*/g" dependencies.yaml +sed_runner "s/ucx-py==.*/ucx-py==${NEXT_UCXPY_VERSION}.*,>=0.0.0a0/g" dependencies.yaml +sed_runner "s/ucxx==.*/ucxx==${NEXT_UCXPY_VERSION}.*,>=0.0.0a0/g" dependencies.yaml DEPENDENCIES=( cudf @@ -45,10 +45,23 @@ DEPENDENCIES=( kvikio rapids-dask-dependency ) -for FILE in dependencies.yaml conda/environments/*.yaml; do - for DEP in "${DEPENDENCIES[@]}"; do - sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" "${FILE}" +for DEP in "${DEPENDENCIES[@]}"; do + for FILE in dependencies.yaml conda/environments/*.yaml; do + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" pyproject.toml +done + +UCX_DEPENDENCIES=( + distributed-ucxx + ucx-py + ucxx +) +for DEP in "${UCX_DEPENDENCIES[@]}"; do + for FILE in dependencies.yaml conda/environments/*.yaml; do + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXPY_VERSION}.*,>=0.0.0a0/g" "${FILE}" + done + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_UCXPY_VERSION}.*,>=0.0.0a0\"/g" pyproject.toml done # CI files diff --git a/ci/test_python.sh b/ci/test_python.sh index b52cbb6d4..78330a403 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,7 +8,7 @@ set -euo pipefail rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ - --file_key test_python \ + --file-key test_python \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n test @@ -99,6 +99,59 @@ python dask_cuda/benchmarks/local_cudf_shuffle.py \ --runs 1 \ --backend explicit-comms +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --disable-rmm \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --disable-rmm-pool \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --rmm-pool-size 2GiB \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --rmm-pool-size 2GiB \ + --rmm-maximum-pool-size 4GiB \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --rmm-pool-size 2GiB \ + --rmm-maximum-pool-size 4GiB \ + --enable-rmm-async \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + +DASK_DATAFRAME__QUERY_PLANNING=True \ +python dask_cuda/benchmarks/local_cudf_shuffle.py \ + --rmm-pool-size 2GiB \ + --rmm-maximum-pool-size 4GiB \ + --enable-rmm-managed \ + --partition-size="1 KiB" \ + -d 0 \ + --runs 1 \ + --backend explicit-comms + rapids-logger "Run local benchmark (legacy dd)" DASK_DATAFRAME__QUERY_PLANNING=False \ python dask_cuda/benchmarks/local_cudf_shuffle.py \ diff --git a/conda/environments/all_cuda-114_arch-x86_64.yaml b/conda/environments/all_cuda-114_arch-x86_64.yaml index afdb516fa..c0fed8e57 100644 --- a/conda/environments/all_cuda-114_arch-x86_64.yaml +++ b/conda/environments/all_cuda-114_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.4 - cudatoolkit -- cudf==24.6.* -- dask-cudf==24.6.* -- distributed-ucxx==0.38.* -- kvikio==24.6.* +- cudf==24.8.*,>=0.0.0a0 +- dask-cudf==24.8.*,>=0.0.0a0 +- distributed-ucxx==0.39.*,>=0.0.0a0 +- kvikio==24.8.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<2.0a0 @@ -24,13 +24,14 @@ dependencies: - pytest - pytest-cov - python>=3.9,<3.12 -- rapids-dask-dependency==24.6.* +- rapids-build-backend>=0.3.0,<0.4.0dev0 +- rapids-dask-dependency==24.8.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.38.* -- ucxx==0.38.* +- ucx-py==0.39.*,>=0.0.0a0 +- ucxx==0.39.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-114_arch-x86_64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a25b7e7a7..d1f6933cd 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.8 - cudatoolkit -- cudf==24.6.* -- dask-cudf==24.6.* -- distributed-ucxx==0.38.* -- kvikio==24.6.* +- cudf==24.8.*,>=0.0.0a0 +- dask-cudf==24.8.*,>=0.0.0a0 +- distributed-ucxx==0.39.*,>=0.0.0a0 +- kvikio==24.8.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<2.0a0 @@ -24,13 +24,14 @@ dependencies: - pytest - pytest-cov - python>=3.9,<3.12 -- rapids-dask-dependency==24.6.* +- rapids-build-backend>=0.3.0,<0.4.0dev0 +- rapids-dask-dependency==24.8.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.38.* -- ucxx==0.38.* +- ucx-py==0.39.*,>=0.0.0a0 +- ucxx==0.39.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml similarity index 65% rename from conda/environments/all_cuda-122_arch-x86_64.yaml rename to conda/environments/all_cuda-125_arch-x86_64.yaml index ff2dea696..a27dea728 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -10,11 +10,11 @@ dependencies: - click >=8.1 - cuda-nvcc-impl - cuda-nvrtc -- cuda-version=12.2 -- cudf==24.6.* -- dask-cudf==24.6.* -- distributed-ucxx==0.38.* -- kvikio==24.6.* +- cuda-version=12.5 +- cudf==24.8.*,>=0.0.0a0 +- dask-cudf==24.8.*,>=0.0.0a0 +- distributed-ucxx==0.39.*,>=0.0.0a0 +- kvikio==24.8.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<2.0a0 @@ -25,13 +25,14 @@ dependencies: - pytest - pytest-cov - python>=3.9,<3.12 -- rapids-dask-dependency==24.6.* +- rapids-build-backend>=0.3.0,<0.4.0dev0 +- rapids-dask-dependency==24.8.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.38.* -- ucxx==0.38.* +- ucx-py==0.39.*,>=0.0.0a0 +- ucxx==0.39.*,>=0.0.0a0 - zict>=2.0.0 -name: all_cuda-122_arch-x86_64 +name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/dask-cuda/meta.yaml b/conda/recipes/dask-cuda/meta.yaml index 357e6dede..eba1a4fc0 100644 --- a/conda/recipes/dask-cuda/meta.yaml +++ b/conda/recipes/dask-cuda/meta.yaml @@ -21,17 +21,18 @@ build: script: - {{ PYTHON }} -m pip install . -vv entry_points: - {% for e in data.get("project", {}).get("scripts", {}).items() %} - - {{ e|join(" = ") }} + {% for entrypoint in data["project"]["scripts"] %} + - {{ entrypoint ~ ' = ' ~ data["project"]["scripts"][entrypoint] }} {% endfor %} requirements: host: - python - pip + - rapids-build-backend>=0.3.0,<0.4.0.dev0 run: - python - {% for r in data.get("project", {}).get("dependencies", []) %} + {% for r in data["project"]["dependencies"] %} - {{ r }} {% endfor %} @@ -40,18 +41,18 @@ test: - dask_cuda commands: - dask cuda --help - {% for e in data.get("project", {}).get("scripts", {}).keys() %} - - {{ e }} --help - - {{ e|replace("-", " ") }} --help + {% for entrypoint in data["project"]["scripts"] %} + - {{ entrypoint }} --help + - {{ entrypoint|replace("-", " ") }} --help {% endfor %} about: - home: {{ data.get("project", {}).get("urls", {}).get("Homepage", "") }} - license: {{ data.get("project", {}).get("license", {}).get("text", "") }} + home: {{ data["project"]["urls"]["Homepage"] }} + license: {{ data["project"]["license"]["text"] }} license_file: - {% for e in data.get("tool", {}).get("setuptools", {}).get("license-files", []) %} + {% for e in data["tool"]["setuptools"]["license-files"] %} - ../../../{{ e }} {% endfor %} - summary: {{ data.get("project", {}).get("description", "") }} - dev_url: {{ data.get("project", {}).get("urls", {}).get("Source", "") }} - doc_url: {{ data.get("project", {}).get("urls", {}).get("Documentation", "") }} + summary: {{ data["project"]["description"] }} + dev_url: {{ data["project"]["urls"]["Source"] }} + doc_url: {{ data["project"]["urls"]["Documentation"] }} diff --git a/dask_cuda/_version.py b/dask_cuda/_version.py index c54072ba5..820bf10ba 100644 --- a/dask_cuda/_version.py +++ b/dask_cuda/_version.py @@ -15,6 +15,16 @@ import importlib.resources __version__ = ( - importlib.resources.files("dask_cuda").joinpath("VERSION").read_text().strip() + importlib.resources.files(__package__).joinpath("VERSION").read_text().strip() ) -__git_commit__ = "" +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/dask_cuda/benchmarks/common.py b/dask_cuda/benchmarks/common.py index 1335334ab..7f48d4fae 100644 --- a/dask_cuda/benchmarks/common.py +++ b/dask_cuda/benchmarks/common.py @@ -117,16 +117,18 @@ def run(client: Client, args: Namespace, config: Config): wait_for_cluster(client, shutdown_on_failure=True) assert len(client.scheduler_info()["workers"]) > 0 setup_memory_pools( - client, - args.type == "gpu", - args.rmm_pool_size, - args.disable_rmm_pool, - args.enable_rmm_async, - args.enable_rmm_managed, - args.rmm_release_threshold, - args.rmm_log_directory, - args.enable_rmm_statistics, - args.enable_rmm_track_allocations, + client=client, + is_gpu=args.type == "gpu", + disable_rmm=args.disable_rmm, + disable_rmm_pool=args.disable_rmm_pool, + pool_size=args.rmm_pool_size, + maximum_pool_size=args.rmm_maximum_pool_size, + rmm_async=args.enable_rmm_async, + rmm_managed=args.enable_rmm_managed, + release_threshold=args.rmm_release_threshold, + log_directory=args.rmm_log_directory, + statistics=args.enable_rmm_statistics, + rmm_track_allocations=args.enable_rmm_track_allocations, ) address_to_index, results, message_data = gather_bench_results(client, args, config) p2p_bw = peer_to_peer_bandwidths(message_data, address_to_index) diff --git a/dask_cuda/benchmarks/utils.py b/dask_cuda/benchmarks/utils.py index 5ac79a88d..48e4755fb 100644 --- a/dask_cuda/benchmarks/utils.py +++ b/dask_cuda/benchmarks/utils.py @@ -17,6 +17,7 @@ from distributed.comm.addressing import get_address_host from dask_cuda.local_cuda_cluster import LocalCUDACluster +from dask_cuda.utils import parse_device_memory_limit def as_noop(dsk): @@ -93,15 +94,41 @@ def parse_benchmark_args( "'forkserver' can be used to avoid issues with fork not being allowed " "after the networking stack has been initialised.", ) + cluster_args.add_argument( + "--disable-rmm", + action="store_true", + help="Disable RMM.", + ) + cluster_args.add_argument( + "--disable-rmm-pool", + action="store_true", + help="Uses RMM for allocations but without a memory pool.", + ) cluster_args.add_argument( "--rmm-pool-size", default=None, type=parse_bytes, help="The size of the RMM memory pool. Can be an integer (bytes) or a string " - "(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used.", + "(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used." + "" + ".. note::" + " This size is a per-worker configuration, and not cluster-wide.", ) cluster_args.add_argument( - "--disable-rmm-pool", action="store_true", help="Disable the RMM memory pool" + "--rmm-maximum-pool-size", + default=None, + help="When ``--rmm-pool-size`` is specified, this argument indicates the " + "maximum pool size. Can be an integer (bytes), or a string (like '4GB' or " + "'5000M'). By default, the total available memory on the GPU is used. " + "``rmm_pool_size`` must be specified to use RMM pool and to set the maximum " + "pool size." + "" + ".. note::" + " When paired with `--enable-rmm-async` the maximum size cannot be " + " guaranteed due to fragmentation." + "" + ".. note::" + " This size is a per-worker configuration, and not cluster-wide.", ) cluster_args.add_argument( "--enable-rmm-managed", @@ -407,10 +434,29 @@ def get_worker_device(): return -1 +def setup_rmm_resources(statistics=False, rmm_track_allocations=False): + import cupy + + import rmm + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + if statistics: + rmm.mr.set_current_device_resource( + rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource()) + ) + if rmm_track_allocations: + rmm.mr.set_current_device_resource( + rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource()) + ) + + def setup_memory_pool( dask_worker=None, + disable_rmm=None, + disable_rmm_pool=None, pool_size=None, - disable_pool=False, + maximum_pool_size=None, rmm_async=False, rmm_managed=False, release_threshold=None, @@ -418,45 +464,66 @@ def setup_memory_pool( statistics=False, rmm_track_allocations=False, ): - import cupy - import rmm - from rmm.allocators.cupy import rmm_cupy_allocator from dask_cuda.utils import get_rmm_log_file_name logging = log_directory is not None - if rmm_async: - rmm.mr.set_current_device_resource( - rmm.mr.CudaAsyncMemoryResource( - initial_pool_size=pool_size, release_threshold=release_threshold - ) - ) - else: - rmm.reinitialize( - pool_allocator=not disable_pool, - managed_memory=rmm_managed, - initial_pool_size=pool_size, - logging=logging, - log_file_name=get_rmm_log_file_name(dask_worker, logging, log_directory), - ) - cupy.cuda.set_allocator(rmm_cupy_allocator) - if statistics: - rmm.mr.set_current_device_resource( - rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource()) + if pool_size is not None: + pool_size = parse_device_memory_limit(pool_size, alignment_size=256) + + if maximum_pool_size is not None: + maximum_pool_size = parse_device_memory_limit( + maximum_pool_size, alignment_size=256 ) - if rmm_track_allocations: - rmm.mr.set_current_device_resource( - rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource()) + + if release_threshold is not None: + release_threshold = parse_device_memory_limit( + release_threshold, alignment_size=256 ) + if not disable_rmm: + if rmm_async: + mr = rmm.mr.CudaAsyncMemoryResource( + initial_pool_size=pool_size, + release_threshold=release_threshold, + ) + + if maximum_pool_size is not None: + mr = rmm.mr.LimitingResourceAdaptor( + mr, allocation_limit=maximum_pool_size + ) + + rmm.mr.set_current_device_resource(mr) + + setup_rmm_resources( + statistics=statistics, rmm_track_allocations=rmm_track_allocations + ) + else: + rmm.reinitialize( + pool_allocator=not disable_rmm_pool, + managed_memory=rmm_managed, + initial_pool_size=pool_size, + maximum_pool_size=maximum_pool_size, + logging=logging, + log_file_name=get_rmm_log_file_name( + dask_worker, logging, log_directory + ), + ) + + setup_rmm_resources( + statistics=statistics, rmm_track_allocations=rmm_track_allocations + ) + def setup_memory_pools( client, is_gpu, + disable_rmm, + disable_rmm_pool, pool_size, - disable_pool, + maximum_pool_size, rmm_async, rmm_managed, release_threshold, @@ -468,8 +535,10 @@ def setup_memory_pools( return client.run( setup_memory_pool, + disable_rmm=disable_rmm, + disable_rmm_pool=disable_rmm_pool, pool_size=pool_size, - disable_pool=disable_pool, + maximum_pool_size=maximum_pool_size, rmm_async=rmm_async, rmm_managed=rmm_managed, release_threshold=release_threshold, @@ -482,7 +551,9 @@ def setup_memory_pools( client.run_on_scheduler( setup_memory_pool, pool_size=1e9, - disable_pool=disable_pool, + disable_rmm=disable_rmm, + disable_rmm_pool=disable_rmm_pool, + maximum_pool_size=maximum_pool_size, rmm_async=rmm_async, rmm_managed=rmm_managed, release_threshold=release_threshold, diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index cc2d08437..6a3518e07 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -101,6 +101,20 @@ def cuda(): total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to disable spilling to host (i.e. allow full device memory usage).""", ) +@click.option( + "--enable-cudf-spill/--disable-cudf-spill", + default=False, + show_default=True, + help="""Enable automatic cuDF spilling. WARNING: This should NOT be used with + JIT-Unspill.""", +) +@click.option( + "--cudf-spill-stats", + type=int, + default=0, + help="""Set the cuDF spilling statistics level. This option has no effect if + `--enable-cudf-spill` is not specified.""", +) @click.option( "--rmm-pool-size", default=None, @@ -120,6 +134,10 @@ def cuda(): memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool and to set the maximum pool size. + .. note:: + When paired with `--enable-rmm-async` the maximum size cannot be guaranteed due + to fragmentation. + .. note:: This size is a per-worker configuration, and not cluster-wide.""", ) @@ -326,6 +344,8 @@ def worker( name, memory_limit, device_memory_limit, + enable_cudf_spill, + cudf_spill_stats, rmm_pool_size, rmm_maximum_pool_size, rmm_managed_memory, @@ -398,6 +418,8 @@ def worker( name, memory_limit, device_memory_limit, + enable_cudf_spill, + cudf_spill_stats, rmm_pool_size, rmm_maximum_pool_size, rmm_managed_memory, diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py index e25a7c142..b88c9bc98 100644 --- a/dask_cuda/cuda_worker.py +++ b/dask_cuda/cuda_worker.py @@ -20,7 +20,7 @@ from .device_host_file import DeviceHostFile from .initialize import initialize -from .plugins import CPUAffinity, PreImport, RMMSetup +from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup from .proxify_host_file import ProxifyHostFile from .utils import ( cuda_visible_devices, @@ -41,6 +41,8 @@ def __init__( name=None, memory_limit="auto", device_memory_limit="auto", + enable_cudf_spill=False, + cudf_spill_stats=0, rmm_pool_size=None, rmm_maximum_pool_size=None, rmm_managed_memory=False, @@ -166,6 +168,12 @@ def del_pid_file(): if device_memory_limit is None and memory_limit is None: data = lambda _: {} elif jit_unspill: + if enable_cudf_spill: + warnings.warn( + "Enabling cuDF spilling and JIT-Unspill together is not " + "safe, consider disabling JIT-Unspill." + ) + data = lambda i: ( ProxifyHostFile, { @@ -217,6 +225,7 @@ def del_pid_file(): track_allocations=rmm_track_allocations, ), PreImport(pre_import), + CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats), }, name=name if nprocs == 1 or name is None else str(name) + "-" + str(i), local_directory=local_directory, diff --git a/dask_cuda/explicit_comms/dataframe/shuffle.py b/dask_cuda/explicit_comms/dataframe/shuffle.py index 3f7b79514..70f123354 100644 --- a/dask_cuda/explicit_comms/dataframe/shuffle.py +++ b/dask_cuda/explicit_comms/dataframe/shuffle.py @@ -8,6 +8,9 @@ from operator import getitem from typing import Any, Callable, Dict, List, Optional, Set, TypeVar +import numpy as np +import pandas as pd + import dask import dask.config import dask.dataframe @@ -155,9 +158,16 @@ def compute_map_index( if column_names[0] == "_partitions": ind = df[column_names[0]] else: - ind = hash_object_dispatch( - df[column_names] if column_names else df, index=False - ) + # Need to cast numerical dtypes to be consistent + # with `dask.dataframe.shuffle.partitioning_index` + dtypes = {} + index = df[column_names] if column_names else df + for col, dtype in index.dtypes.items(): + if pd.api.types.is_numeric_dtype(dtype): + dtypes[col] = np.float64 + if dtypes: + index = index.astype(dtypes, errors="ignore") + ind = hash_object_dispatch(index, index=False) return ind % npartitions @@ -187,15 +197,8 @@ def partition_dataframe( partitions Dict of dataframe-partitions, mapping partition-ID to dataframe """ - if column_names[0] != "_partitions" and hasattr(df, "partition_by_hash"): - return dict( - zip( - range(npartitions), - df.partition_by_hash( - column_names, npartitions, keep_index=not ignore_index - ), - ) - ) + # TODO: Use `partition_by_hash` if/when dtype-casting is added + # (See: https://github.com/rapidsai/cudf/issues/16221) map_index = compute_map_index(df, column_names, npartitions) return group_split_dispatch(df, map_index, npartitions, ignore_index=ignore_index) @@ -529,18 +532,19 @@ def shuffle( # TODO: can we do this without using `submit()` to avoid the overhead # of creating a Future for each dataframe partition? - futures = [] + _futures = {} for rank in ranks: for part_id in rank_to_out_part_ids[rank]: - futures.append( - c.client.submit( - getitem, - shuffle_result[rank], - part_id, - workers=[c.worker_addresses[rank]], - ) + _futures[part_id] = c.client.submit( + getitem, + shuffle_result[rank], + part_id, + workers=[c.worker_addresses[rank]], ) + # Make sure partitions are properly ordered + futures = [_futures.pop(i) for i in range(npartitions)] + # Create a distributed Dataframe from all the pieces divs = [None] * (len(futures) + 1) kwargs = {"meta": df_meta, "divisions": divs, "prefix": "explicit-comms-shuffle"} diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py index 7a5c8c13d..202373e9d 100644 --- a/dask_cuda/local_cuda_cluster.py +++ b/dask_cuda/local_cuda_cluster.py @@ -10,7 +10,7 @@ from .device_host_file import DeviceHostFile from .initialize import initialize -from .plugins import CPUAffinity, PreImport, RMMSetup +from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup from .proxify_host_file import ProxifyHostFile from .utils import ( cuda_visible_devices, @@ -73,6 +73,14 @@ class LocalCUDACluster(LocalCluster): starts spilling to host memory. Can be an integer (bytes), float (fraction of total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0, or ``None`` to disable spilling to host (i.e. allow full device memory usage). + enable_cudf_spill : bool, default False + Enable automatic cuDF spilling. + + .. warning:: + This should NOT be used together with JIT-Unspill. + cudf_spill_stats : int, default 0 + Set the cuDF spilling statistics level. This option has no effect if + ``enable_cudf_spill=False``. local_directory : str or None, default None Path on local machine to store temporary files. Can be a string (like ``"path/to/files"``) or ``None`` to fall back on the value of @@ -114,6 +122,10 @@ class LocalCUDACluster(LocalCluster): memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool and to set the maximum pool size. + .. note:: + When paired with `--enable-rmm-async` the maximum size cannot be guaranteed + due to fragmentation. + .. note:: This size is a per-worker configuration, and not cluster-wide. rmm_managed_memory : bool, default False @@ -205,6 +217,8 @@ def __init__( threads_per_worker=1, memory_limit="auto", device_memory_limit=0.8, + enable_cudf_spill=False, + cudf_spill_stats=0, data=None, local_directory=None, shared_filesystem=None, @@ -255,6 +269,8 @@ def __init__( self.device_memory_limit = parse_device_memory_limit( device_memory_limit, device_index=nvml_device_index(0, CUDA_VISIBLE_DEVICES) ) + self.enable_cudf_spill = enable_cudf_spill + self.cudf_spill_stats = cudf_spill_stats self.rmm_pool_size = rmm_pool_size self.rmm_maximum_pool_size = rmm_maximum_pool_size @@ -298,6 +314,12 @@ def __init__( if device_memory_limit is None and memory_limit is None: data = {} elif jit_unspill: + if enable_cudf_spill: + warnings.warn( + "Enabling cuDF spilling and JIT-Unspill together is not " + "safe, consider disabling JIT-Unspill." + ) + data = ( ProxifyHostFile, { @@ -410,6 +432,7 @@ def new_worker_spec(self): track_allocations=self.rmm_track_allocations, ), PreImport(self.pre_import), + CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats), }, } ) diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py index 4eba97f2b..122f93ffa 100644 --- a/dask_cuda/plugins.py +++ b/dask_cuda/plugins.py @@ -14,6 +14,21 @@ def setup(self, worker=None): os.sched_setaffinity(0, self.cores) +class CUDFSetup(WorkerPlugin): + def __init__(self, spill, spill_stats): + self.spill = spill + self.spill_stats = spill_stats + + def setup(self, worker=None): + try: + import cudf + + cudf.set_option("spill", self.spill) + cudf.set_option("spill_stats", self.spill_stats) + except ImportError: + pass + + class RMMSetup(WorkerPlugin): def __init__( self, diff --git a/dask_cuda/tests/test_cudf_builtin_spilling.py b/dask_cuda/tests/test_cudf_builtin_spilling.py index d4c28ba06..80b1d482d 100644 --- a/dask_cuda/tests/test_cudf_builtin_spilling.py +++ b/dask_cuda/tests/test_cudf_builtin_spilling.py @@ -20,7 +20,7 @@ get_global_manager, set_global_manager, ) -from cudf.testing._utils import assert_eq # noqa: E402 +from cudf.testing import assert_eq # noqa: E402 if get_global_manager() is not None: pytest.skip( diff --git a/dask_cuda/tests/test_dask_cuda_worker.py b/dask_cuda/tests/test_dask_cuda_worker.py index 974ad1319..505af12f1 100644 --- a/dask_cuda/tests/test_dask_cuda_worker.py +++ b/dask_cuda/tests/test_dask_cuda_worker.py @@ -231,6 +231,64 @@ def test_rmm_logging(loop): # noqa: F811 assert v is rmm.mr.LoggingResourceAdaptor +def test_cudf_spill_disabled(loop): # noqa: F811 + cudf = pytest.importorskip("cudf") + with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]): + with popen( + [ + "dask", + "cuda", + "worker", + "127.0.0.1:9369", + "--host", + "127.0.0.1", + "--no-dashboard", + ] + ): + with Client("127.0.0.1:9369", loop=loop) as client: + assert wait_workers(client, n_gpus=get_n_gpus()) + + cudf_spill = client.run( + cudf.get_option, + "spill", + ) + for v in cudf_spill.values(): + assert v is False + + cudf_spill_stats = client.run(cudf.get_option, "spill_stats") + for v in cudf_spill_stats.values(): + assert v == 0 + + +def test_cudf_spill(loop): # noqa: F811 + cudf = pytest.importorskip("cudf") + with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]): + with popen( + [ + "dask", + "cuda", + "worker", + "127.0.0.1:9369", + "--host", + "127.0.0.1", + "--no-dashboard", + "--enable-cudf-spill", + "--cudf-spill-stats", + "2", + ] + ): + with Client("127.0.0.1:9369", loop=loop) as client: + assert wait_workers(client, n_gpus=get_n_gpus()) + + cudf_spill = client.run(cudf.get_option, "spill") + for v in cudf_spill.values(): + assert v is True + + cudf_spill_stats = client.run(cudf.get_option, "spill_stats") + for v in cudf_spill_stats.values(): + assert v == 2 + + @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"}) def test_dashboard_address(loop): # noqa: F811 with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]): diff --git a/dask_cuda/tests/test_explicit_comms.py b/dask_cuda/tests/test_explicit_comms.py index f495648e0..2806dc1cd 100644 --- a/dask_cuda/tests/test_explicit_comms.py +++ b/dask_cuda/tests/test_explicit_comms.py @@ -109,7 +109,14 @@ def test_dataframe_merge_empty_partitions(): def check_partitions(df, npartitions): """Check that all values in `df` hashes to the same""" - hashes = partitioning_index(df, npartitions) + dtypes = {} + for col, dtype in df.dtypes.items(): + if pd.api.types.is_numeric_dtype(dtype): + dtypes[col] = np.float64 + if not dtypes: + dtypes = None + + hashes = partitioning_index(df, npartitions, cast_dtype=dtypes) if len(hashes) > 0: return len(hashes.unique()) == 1 else: @@ -128,11 +135,10 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions): worker_class=IncreasedCloseTimeoutNanny, processes=True, ) as cluster: - with Client(cluster) as client: - all_workers = list(client.get_worker_logs().keys()) + with Client(cluster): comms.default_comms() np.random.seed(42) - df = pd.DataFrame({"key": np.random.random(100)}) + df = pd.DataFrame({"key": np.random.randint(0, high=100, size=100)}) if backend == "cudf": df = cudf.DataFrame.from_pandas(df) @@ -141,15 +147,13 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions): for input_nparts in range(1, 5): for output_nparts in range(1, 5): - ddf = dd.from_pandas(df.copy(), npartitions=input_nparts).persist( - workers=all_workers - ) + ddf1 = dd.from_pandas(df.copy(), npartitions=input_nparts) # To reduce test runtime, we change the batchsizes here instead # of using a test parameter. for batchsize in (-1, 1, 2): with dask.config.set(explicit_comms_batchsize=batchsize): ddf = explicit_comms_shuffle( - ddf, + ddf1, ["_partitions"] if _partitions else ["key"], npartitions=output_nparts, batchsize=batchsize, @@ -177,6 +181,32 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions): got = ddf.compute().sort_values("key") assert_eq(got, expected) + # Check that partitioning is consistent with "tasks" + ddf_tasks = ddf1.shuffle( + ["key"], + npartitions=output_nparts, + shuffle_method="tasks", + ) + for i in range(output_nparts): + expected_partition = ddf_tasks.partitions[ + i + ].compute()["key"] + actual_partition = ddf.partitions[i].compute()[ + "key" + ] + if backend == "cudf": + expected_partition = ( + expected_partition.values_host + ) + actual_partition = actual_partition.values_host + else: + expected_partition = expected_partition.values + actual_partition = actual_partition.values + assert all( + np.sort(expected_partition) + == np.sort(actual_partition) + ) + @pytest.mark.parametrize("nworkers", [1, 2, 3]) @pytest.mark.parametrize("backend", ["pandas", "cudf"]) diff --git a/dask_cuda/tests/test_local_cuda_cluster.py b/dask_cuda/tests/test_local_cuda_cluster.py index b05389e4c..b144d1114 100644 --- a/dask_cuda/tests/test_local_cuda_cluster.py +++ b/dask_cuda/tests/test_local_cuda_cluster.py @@ -500,6 +500,54 @@ async def test_worker_fraction_limits(): ) +@gen_test(timeout=20) +async def test_cudf_spill_disabled(): + cudf = pytest.importorskip("cudf") + + async with LocalCUDACluster( + asynchronous=True, + ) as cluster: + async with Client(cluster, asynchronous=True) as client: + cudf_spill = await client.run( + cudf.get_option, + "spill", + ) + for v in cudf_spill.values(): + assert v is False + + cudf_spill_stats = await client.run( + cudf.get_option, + "spill_stats", + ) + for v in cudf_spill_stats.values(): + assert v == 0 + + +@gen_test(timeout=20) +async def test_cudf_spill(): + cudf = pytest.importorskip("cudf") + + async with LocalCUDACluster( + enable_cudf_spill=True, + cudf_spill_stats=2, + asynchronous=True, + ) as cluster: + async with Client(cluster, asynchronous=True) as client: + cudf_spill = await client.run( + cudf.get_option, + "spill", + ) + for v in cudf_spill.values(): + assert v is True + + cudf_spill_stats = await client.run( + cudf.get_option, + "spill_stats", + ) + for v in cudf_spill_stats.values(): + assert v == 2 + + @pytest.mark.parametrize( "protocol", ["ucx", "ucxx"], diff --git a/dask_cuda/tests/test_version.py b/dask_cuda/tests/test_version.py new file mode 100644 index 000000000..f30b2847d --- /dev/null +++ b/dask_cuda/tests/test_version.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import dask_cuda + + +def test_version_constants_are_populated(): + # __git_commit__ will only be non-empty in a built distribution + assert isinstance(dask_cuda.__git_commit__, str) + + # __version__ should always be non-empty + assert isinstance(dask_cuda.__version__, str) + assert len(dask_cuda.__version__) > 0 diff --git a/dependencies.yaml b/dependencies.yaml index 20c6ca05e..c3b629654 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: conda matrix: - cuda: ["11.4", "11.8", "12.2"] + cuda: ["11.4", "11.8", "12.5"] arch: [x86_64] includes: - build_python @@ -74,6 +74,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: + - rapids-build-backend>=0.3.0,<0.4.0dev0 - setuptools>=64.0.0 cuda_version: specific: @@ -99,6 +100,10 @@ dependencies: cuda: "12.2" packages: - cuda-version=12.2 + - matrix: + cuda: "12.5" + packages: + - cuda-version=12.5 cuda: specific: - output_types: conda @@ -153,23 +158,23 @@ dependencies: - numpy>=1.23,<2.0a0 - pandas>=1.3 - pynvml>=11.0.0,<11.5 - - rapids-dask-dependency==24.6.* + - rapids-dask-dependency==24.8.*,>=0.0.0a0 - zict>=2.0.0 test_python: common: - output_types: [conda, requirements, pyproject] packages: - - cudf==24.6.* - - dask-cudf==24.6.* - - kvikio==24.6.* - pytest - pytest-cov - - ucx-py==0.38.* - output_types: [conda] packages: - - distributed-ucxx==0.38.* + - &cudf_unsuffixed cudf==24.8.*,>=0.0.0a0 + - &dask_cudf_unsuffixed dask-cudf==24.8.*,>=0.0.0a0 + - distributed-ucxx==0.39.*,>=0.0.0a0 + - &kvikio_unsuffixed kvikio==24.8.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.39.*,>=0.0.0a0 - ucx-proc=*=gpu - - ucxx==0.38.* + - ucxx==0.39.*,>=0.0.0a0 specific: - output_types: conda matrices: @@ -181,3 +186,27 @@ dependencies: arch: aarch64 packages: - numactl-devel-cos7-aarch64 + - output_types: [requirements, pyproject] + matrices: + # kvikio should be added to the CUDA-version-specific matrices once there are wheels available + # ref: https://github.com/rapidsai/kvikio/pull/369 + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - cudf-cu12==24.8.*,>=0.0.0a0 + - dask-cudf-cu12==24.8.*,>=0.0.0a0 + - ucx-py-cu12==0.39.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - cudf-cu11==24.8.*,>=0.0.0a0 + - dask-cudf-cu11==24.8.*,>=0.0.0a0 + - ucx-py-cu11==0.39.*,>=0.0.0a0 + - matrix: + packages: + - *cudf_unsuffixed + - *dask_cudf_unsuffixed + - *kvikio_unsuffixed + - *ucx_py_unsuffixed diff --git a/docs/source/explicit_comms.rst b/docs/source/explicit_comms.rst index aecbc1fd9..9fde8756a 100644 --- a/docs/source/explicit_comms.rst +++ b/docs/source/explicit_comms.rst @@ -14,4 +14,4 @@ Usage In order to use explicit-comms in Dask/Distributed automatically, simply define the environment variable ``DASK_EXPLICIT_COMMS=True`` or setting the ``"explicit-comms"`` key in the `Dask configuration `_. -It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. +It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. diff --git a/docs/source/install.rst b/docs/source/install.rst index e522ae3c1..43082a671 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -12,11 +12,11 @@ To use Dask-CUDA on your system, you will need: - A version of NVIDIA CUDA Toolkit compatible with the installed driver version; see Table 1 of `CUDA Compatibility -- Binary Compatibility `_ for an overview of CUDA Toolkit driver requirements Once the proper CUDA Toolkit version has been determined, it can be installed using along with Dask-CUDA using ``conda``. -To install the latest version of Dask-CUDA along with CUDA Toolkit 12.0: +To install the latest version of Dask-CUDA along with CUDA Toolkit 12.5: .. code-block:: bash - conda install -c rapidsai -c conda-forge -c nvidia dask-cuda cuda-version=12.0 + conda install -c rapidsai -c conda-forge -c nvidia dask-cuda cuda-version=12.5 Pip --- diff --git a/pyproject.toml b/pyproject.toml index 35d485381..b6c431d61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [build-system] -build-backend = "setuptools.build_meta" +build-backend = "rapids_build_backend.build" requires = [ + "rapids-build-backend>=0.3.0,<0.4.0dev0", "setuptools>=64.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. @@ -20,7 +21,7 @@ dependencies = [ "numpy>=1.23,<2.0a0", "pandas>=1.3", "pynvml>=11.0.0,<11.5", - "rapids-dask-dependency==24.6.*", + "rapids-dask-dependency==24.8.*,>=0.0.0a0", "zict>=2.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -49,12 +50,12 @@ docs = [ "sphinx-rtd-theme>=0.5.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. test = [ - "cudf==24.6.*", - "dask-cudf==24.6.*", - "kvikio==24.6.*", + "cudf==24.8.*,>=0.0.0a0", + "dask-cudf==24.8.*,>=0.0.0a0", + "kvikio==24.8.*,>=0.0.0a0", "pytest", "pytest-cov", - "ucx-py==0.38.*", + "ucx-py==0.39.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] @@ -129,6 +130,12 @@ filterwarnings = [ "ignore:Dask DataFrame implementation is deprecated:DeprecationWarning", ] +[tool.rapids-build-backend] +build-backend = "setuptools.build_meta" +dependencies-file = "dependencies.yaml" +disable-cuda = true +matrix-entry = "cuda_suffixed=true" + [tool.setuptools] license-files = ["LICENSE"] diff --git a/setup.py b/setup.py deleted file mode 100644 index 606849326..000000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup()