Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cuda::mr::memory_resource instead of raw device_memory_resource #1095

Merged
merged 37 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
90adef2
Add cccl repo to build dependencies
miscco Sep 8, 2023
a49fe9c
PoC for the new design of `cuda::mr::{async_}resource_ref`
miscco Sep 8, 2023
0e78d9e
Make `pinned_memory_resource` usable for `pool_memory_resource`
miscco Sep 8, 2023
4d25841
Port `device_buffer` and `device_uvector` to the new interface
miscco Sep 8, 2023
de71dcc
Add cccl repo to build dependencies
miscco Sep 8, 2023
13a990f
PoC for the new design of `cuda::mr::{async_}resource_ref`
miscco Sep 8, 2023
9ceab83
Make `pinned_memory_resource` usable for `pool_memory_resource`
miscco Sep 8, 2023
c0df290
Port `device_buffer` and `device_uvector` to the new interface
miscco Sep 8, 2023
8b1bc04
Use upstream patch for memory resource.
bdice Oct 17, 2023
bccabd1
Add target_link_libraries for libcudacxx.
bdice Oct 17, 2023
5b27e10
Revert "Use upstream patch for memory resource."
bdice Oct 17, 2023
194da78
Merge branch 'memory_resource' of github.com:miscco/rmm into memory_r…
miscco Oct 18, 2023
6e610a3
Add restrictions on memory usage for tests
miscco Oct 18, 2023
edb8f78
Remove `get_current_device_resource_ref`
miscco Oct 18, 2023
4ea703e
Make CI happy
miscco Oct 18, 2023
e9a5ace
Merge branch 'branch-23.12' into memory_resource
harrism Oct 25, 2023
df9c6e9
Merge branch 'branch-23.12' into memory_resource
miscco Nov 1, 2023
8e7c91b
Address review comments
miscco Nov 1, 2023
849c880
Address alignment differences with `cuda::mr::resource_ref`
miscco Nov 1, 2023
2cf03a7
Roll back the changes to `device_u{buffer, vector}`
miscco Nov 2, 2023
76d55fa
Address review comments
miscco Nov 2, 2023
3390fb8
Revert "Roll back the changes to `device_u{buffer, vector}`"
miscco Nov 3, 2023
c0c8504
Merge branch 'branch-23.12' into memory_resource
miscco Nov 3, 2023
559a674
Add a check that we do not store unnecessary data inside a `async_res…
miscco Nov 7, 2023
c1f36d8
Properly test that a `device_memory_resource` is an `async_resource`
miscco Nov 7, 2023
1d422b5
Also port `rmm::mr::thrust_allocator` to `async_resource_ref``
miscco Nov 7, 2023
17021f1
Merge branch 'branch-23.12' into memory_resource
miscco Nov 7, 2023
e867442
Do not put `rmm::bad_alloc` in quotes
miscco Nov 7, 2023
73efc2e
Try to avoid namespaces in comments
miscco Nov 7, 2023
c298fbc
Tell sphinx to skipp documenting libcu++ names
miscco Nov 7, 2023
41c1bea
Merge branch 'branch-23.12' into memory_resource
harrism Nov 15, 2023
80201bd
Update copyright years.
bdice Nov 15, 2023
5937e3e
Fix typos and grammar.
bdice Nov 15, 2023
baf99f3
Drop support for CUDA older than CUDA 11 in tests.
bdice Nov 15, 2023
af12733
Do not propagate the `device_accessible` property for containers
miscco Nov 16, 2023
37c1e3f
Fix deadlock in new tests using fix from #1097
harrism Nov 16, 2023
67d1bdc
Merge branch 'memory_resource' of https://github.com/miscco/rmm into …
harrism Nov 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ rapids_cpm_init()

include(cmake/thirdparty/get_fmt.cmake)
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_libcudacxx.cmake)
miscco marked this conversation as resolved.
Show resolved Hide resolved
include(cmake/thirdparty/get_thrust.cmake)

# ##################################################################################################
Expand All @@ -89,11 +90,13 @@ else()
target_link_libraries(rmm INTERFACE CUDA::cudart)
endif()

target_link_libraries(rmm INTERFACE libcudacxx::libcudacxx)
target_link_libraries(rmm INTERFACE rmm::Thrust)
target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
target_link_libraries(rmm INTERFACE dl)
target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

# ##################################################################################################
# * tests and benchmarks ---------------------------------------------------------------------------
Expand Down
23 changes: 23 additions & 0 deletions cmake/thirdparty/get_libcudacxx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# =============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone libcudacxx
function(find_and_configure_libcudacxx)

include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
rapids_cpm_libcudacxx(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports)

endfunction()

find_and_configure_libcudacxx()
16 changes: 16 additions & 0 deletions include/rmm/cuda_stream_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <cuda_runtime_api.h>

#include <cuda/stream_ref>

#include <atomic>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -58,6 +60,13 @@ class cuda_stream_view {
*/
constexpr cuda_stream_view(cudaStream_t stream) noexcept : stream_{stream} {}

/**
* @brief Implicit conversion from stream_ref.
*
* @param stream The underlying stream for this view
*/
constexpr cuda_stream_view(cuda::stream_ref stream) noexcept : stream_{stream.get()} {}

/**
* @brief Get the wrapped stream.
*
Expand All @@ -72,6 +81,13 @@ class cuda_stream_view {
*/
constexpr operator cudaStream_t() const noexcept { return value(); }

/**
* @brief Implicit conversion to stream_ref.
*
* @return stream_ref The underlying stream referenced by this cuda_stream_view
*/
constexpr operator cuda::stream_ref() const noexcept { return value(); }

/**
* @briefreturn{true if the wrapped stream is the CUDA per-thread default stream}
*/
Expand Down
26 changes: 15 additions & 11 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <stdexcept>
#include <utility>

#include <cuda/memory_resource>

namespace rmm {
/**
* @addtogroup data_containers
Expand Down Expand Up @@ -80,6 +82,8 @@ namespace rmm {
*```
*/
class device_buffer {
using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;

public:
// The copy constructor and copy assignment operator without a stream are deleted because they
// provide no way to specify an explicit stream
Expand Down Expand Up @@ -107,7 +111,7 @@ class device_buffer {
*/
explicit device_buffer(std::size_t size,
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
async_resource_ref mr = mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
cuda_set_device_raii dev{_device};
Expand Down Expand Up @@ -136,7 +140,7 @@ class device_buffer {
device_buffer(void const* source_data,
std::size_t size,
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
cuda_set_device_raii dev{_device};
Expand Down Expand Up @@ -167,7 +171,7 @@ class device_buffer {
*/
device_buffer(device_buffer const& other,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
async_resource_ref mr = rmm::mr::get_current_device_resource())
: device_buffer{other.data(), other.size(), stream, mr}
{
}
Expand Down Expand Up @@ -245,7 +249,6 @@ class device_buffer {
{
cuda_set_device_raii dev{_device};
deallocate_async();
_mr = nullptr;
_stream = cuda_stream_view{};
}

Expand Down Expand Up @@ -407,18 +410,19 @@ class device_buffer {
void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }

/**
* @briefreturn{Pointer to the memory resource used to allocate and deallocate}
* @briefreturn{The async_resource_ref used to allocate and deallocate}
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
[[nodiscard]] async_resource_ref memory_resource() const noexcept { return _mr; }

private:
void* _data{nullptr}; ///< Pointer to device memory allocation
std::size_t _size{}; ///< Requested size of the device memory allocation
std::size_t _capacity{}; ///< The actual size of the device memory allocation
cuda_stream_view _stream{}; ///< Stream to use for device memory deallocation
mr::device_memory_resource* _mr{
mr::get_current_device_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory

async_resource_ref _mr{
rmm::mr::get_current_device_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
cuda_device_id _device{get_current_cuda_device()};

/**
Expand All @@ -434,7 +438,7 @@ class device_buffer {
{
_size = bytes;
_capacity = bytes;
_data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr;
_data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
miscco marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand All @@ -448,7 +452,7 @@ class device_buffer {
*/
void deallocate_async() noexcept
{
if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); }
if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
_size = 0;
_capacity = 0;
_data = nullptr;
Expand Down
21 changes: 11 additions & 10 deletions include/rmm/device_uvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <cstddef>
#include <vector>

#include <cuda/memory_resource>

namespace rmm {
/**
* @addtogroup data_containers
Expand Down Expand Up @@ -72,6 +74,7 @@ namespace rmm {
*/
template <typename T>
class device_uvector {
using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
static_assert(std::is_trivially_copyable<T>::value,
"device_uvector only supports types that are trivially copyable.");

Expand Down Expand Up @@ -121,10 +124,9 @@ class device_uvector {
* @param stream The stream on which to perform the allocation
* @param mr The resource used to allocate the device storage
*/
explicit device_uvector(
std::size_t size,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
explicit device_uvector(std::size_t size,
cuda_stream_view stream,
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _storage{elements_to_bytes(size), stream, mr}
{
}
Expand All @@ -138,10 +140,9 @@ class device_uvector {
* @param stream The stream on which to perform the copy
* @param mr The resource used to allocate device memory for the new vector
*/
explicit device_uvector(
device_uvector const& other,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
explicit device_uvector(device_uvector const& other,
cuda_stream_view stream,
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _storage{other._storage, stream, mr}
{
}
Expand Down Expand Up @@ -524,9 +525,9 @@ class device_uvector {
[[nodiscard]] bool is_empty() const noexcept { return size() == 0; }

/**
* @briefreturn{Pointer to underlying resource used to allocate and deallocate the device storage}
* @briefreturn{The async_resource_ref used to allocate and deallocate the device storage}
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept
[[nodiscard]] async_resource_ref memory_resource() const noexcept
{
return _storage.memory_resource();
}
Expand Down
4 changes: 2 additions & 2 deletions include/rmm/mr/device/callback_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class callback_memory_resource final : public device_memory_resource {
throw std::runtime_error("cannot get free / total memory");
}

[[nodiscard]] virtual bool supports_streams() const noexcept { return false; }
[[nodiscard]] virtual bool supports_get_mem_info() const noexcept { return false; }
[[nodiscard]] bool supports_streams() const noexcept override { return false; }
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

allocate_callback_t allocate_callback_;
deallocate_callback_t deallocate_callback_;
Expand Down
Loading