From eb39019d55615160766af37ff6b36d51c23522a6 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 30 May 2024 16:24:30 -0700 Subject: [PATCH 01/31] remove pinned_host_vector --- cpp/benchmarks/io/text/multibyte_split.cpp | 8 +- .../detail/utilities/pinned_host_vector.hpp | 216 ------------------ .../cudf/detail/utilities/rmm_host_vector.hpp | 8 +- .../detail/utilities/vector_factories.hpp | 38 ++- cpp/src/io/csv/reader_impl.cu | 1 + cpp/src/io/orc/reader_impl_chunking.cu | 1 + cpp/src/io/orc/writer_impl.cu | 5 +- cpp/src/io/parquet/reader_impl_helpers.cpp | 2 + cpp/src/io/parquet/writer_impl.cu | 3 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 16 +- .../io/text/data_chunk_source_factories.cpp | 23 +- cpp/src/io/utilities/hostdevice_vector.hpp | 4 +- 12 files changed, 77 insertions(+), 248 deletions(-) delete mode 100644 cpp/include/cudf/detail/utilities/pinned_host_vector.hpp diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index b5d855d8881..172182c3607 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state, auto const delim_factor = static_cast(delim_percent) / 100; std::unique_ptr datasource; - auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); - auto host_input = std::vector{}; - auto host_pinned_input = cudf::detail::pinned_host_vector{}; + auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); + auto host_input = std::vector{}; + auto host_pinned_input = + cudf::detail::make_pinned_vector_async(0, cudf::get_default_stream()); if (source_type != data_chunk_source_type::device && source_type != data_chunk_source_type::host_pinned) { diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp deleted file mode 100644 index c22b6a6ba15..00000000000 --- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2008-2024, NVIDIA CORPORATION - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -#include -#include -#include // for bad_alloc - -namespace cudf::detail { - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class pinned_allocator; - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template <> -class pinned_allocator { - public: - using value_type = void; ///< The type of the elements in the allocator - using pointer = void*; ///< The type returned by address() / allocate() - using const_pointer = void const*; ///< The type returned by address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - /** - * @brief converts a `pinned_allocator` to `pinned_allocator` - */ - template - struct rebind { - using other = pinned_allocator; ///< The rebound type - }; -}; - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class pinned_allocator { - public: - using value_type = T; ///< The type of the elements in the allocator - using pointer = T*; ///< The type returned by address() / allocate() - using const_pointer = T const*; ///< The type returned by address() - using reference = T&; ///< The parameter type for address() - using const_reference = T const&; ///< The parameter type for address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - /** - * @brief converts a `pinned_allocator` to `pinned_allocator` - */ - template - struct rebind { - using other = pinned_allocator; ///< The rebound type - }; - - /** - * @brief pinned_allocator's null constructor does nothing. - */ - __host__ __device__ inline pinned_allocator() {} - - /** - * @brief pinned_allocator's null destructor does nothing. - */ - __host__ __device__ inline ~pinned_allocator() {} - - /** - * @brief pinned_allocator's copy constructor does nothing. - */ - __host__ __device__ inline pinned_allocator(pinned_allocator const&) {} - - /** - * @brief pinned_allocator's copy constructor does nothing. - * - * This version of pinned_allocator's copy constructor - * is templated on the \c value_type of the pinned_allocator - * to copy from. It is provided merely for convenience; it - * does nothing. - */ - template - __host__ __device__ inline pinned_allocator(pinned_allocator const&) - { - } - - /** - * @brief This method returns the address of a \c reference of - * interest. - * - * @param r The \c reference of interest. - * @return \c r's address. - */ - __host__ __device__ inline pointer address(reference r) { return &r; } - - /** - * @brief This method returns the address of a \c const_reference - * of interest. - * - * @param r The \c const_reference of interest. - * @return \c r's address. - */ - __host__ __device__ inline const_pointer address(const_reference r) { return &r; } - - /** - * @brief This method allocates storage for objects in pinned host - * memory. - * - * @param cnt The number of objects to allocate. - * @return a \c pointer to the newly allocated objects. - * @note The second parameter to this function is meant as a - * hint pointer to a nearby memory location, but is - * not used by this allocator. - * @note This method does not invoke \p value_type's constructor. - * It is the responsibility of the caller to initialize the - * objects at the returned \c pointer. - */ - __host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0) - { - if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - - pointer result(0); - CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); - return result; - } - - /** - * @brief This method deallocates pinned host memory previously allocated - * with this \c pinned_allocator. - * - * @param p A \c pointer to the previously allocated memory. - * @note The second parameter is the number of objects previously allocated - * but is ignored by this allocator. - * @note This method does not invoke \p value_type's destructor. - * It is the responsibility of the caller to destroy - * the objects stored at \p p. - */ - __host__ inline void deallocate(pointer p, size_type /*cnt*/) - { - auto dealloc_worked = cudaFreeHost(p); - (void)dealloc_worked; - assert(dealloc_worked == cudaSuccess); - } - - /** - * @brief This method returns the maximum size of the \c cnt parameter - * accepted by the \p allocate() method. - * - * @return The maximum number of objects that may be allocated - * by a single call to \p allocate(). - */ - inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } - - /** - * @brief This method tests this \p pinned_allocator for equality to - * another. - * - * @param x The other \p pinned_allocator of interest. - * @return This method always returns \c true. - */ - __host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; } - - /** - * @brief This method tests this \p pinned_allocator for inequality - * to another. - * - * @param x The other \p pinned_allocator of interest. - * @return This method always returns \c false. - */ - __host__ __device__ inline bool operator!=(pinned_allocator const& x) const - { - return !operator==(x); - } -}; - -/** - * @brief A vector class with pinned host memory allocator - */ -template -using pinned_host_vector = thrust::host_vector>; - -} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp index 6901a19473e..6604020c224 100644 --- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -32,8 +33,6 @@ namespace cudf::detail { /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c a `rmm::host_async_resource_ref` for allocation. * - * This implementation is ported from pinned_host_vector in cudf. - * * \see https://en.cppreference.com/w/cpp/memory/allocator */ template @@ -42,8 +41,6 @@ class rmm_host_allocator; /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c an `cudf::host_async_resource_ref` for allocation. * - * This implementation is ported from pinned_host_vector in cudf. - * * \see https://en.cppreference.com/w/cpp/memory/allocator */ template <> @@ -70,8 +67,7 @@ class rmm_host_allocator { * The \p rmm_host_allocator provides an interface for host memory allocation through the user * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of * this reference and therefore it is the user's responsibility to ensure its lifetime for the - * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from - * pinned_host_vector in cudf. + * duration of the lifetime of the \p rmm_host_allocator. * * \see https://en.cppreference.com/w/cpp/memory/allocator */ diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 293a4096c57..6f2287fc1c8 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,6 +21,8 @@ * @file vector_factories.hpp */ +#include +#include #include #include #include @@ -380,7 +382,7 @@ thrust::host_vector make_host_vector_async(device_span v, rmm::cuda_ * @brief Asynchronously construct a `std::vector` containing a copy of data from a device * container * - * @note This function synchronizes `stream`. + * @note This function does not synchronize `stream`. * * @tparam Container The type of the container to copy from * @tparam T The type of the data to copy @@ -439,6 +441,40 @@ thrust::host_vector make_host_vector_sync( return make_host_vector_sync(device_span{c}, stream); } +/** + * @brief Asynchronously construct a `rmm_host_vector` of the given size + * + * @note This function does not synchronize `stream`. + * + * @tparam T The type of the vector data + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory + * @return A rmm_host_vector of the given size + */ +template +rmm_host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) +{ + return rmm_host_vector(size, {cudf::io::get_host_memory_resource(), stream}); +} + +/** + * @brief Synchronously construct a `rmm_host_vector` of the given size + * + * @note This function synchronizes `stream`. + * + * @tparam T The type of the vector data + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory + * @return A rmm_host_vector of the given size + */ +template +rmm_host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) +{ + auto result = make_pinned_vector_async(size, stream); + stream.synchronize(); + return result; +} + } // namespace detail } // namespace cudf diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 5dee0c17a33..05faded651d 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -27,6 +27,7 @@ #include "io/utilities/parsing_utils.cuh" #include +#include #include #include #include diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5034aa14a95..43301826003 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 344e216cdc8..e9e031a407a 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -2339,7 +2338,7 @@ auto convert_table_to_orc_data(table_view const& input, std::move(streams), std::move(stripes), std::move(stripe_dicts.views), - cudf::detail::pinned_host_vector()}; + cudf::detail::make_pinned_vector_async(0, stream)}; } // Allocate intermediate output stream buffer @@ -2407,7 +2406,7 @@ auto convert_table_to_orc_data(table_view const& input, return max_stream_size; }(); - cudf::detail::pinned_host_vector bounce_buffer(max_out_stream_size); + auto bounce_buffer = cudf::detail::make_pinned_vector_async(max_out_stream_size, stream); auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index eb653c6b9ac..9de8a9e2719 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -23,6 +23,8 @@ #include "ipc/Message_generated.h" #include "ipc/Schema_generated.h" +#include + #include #include diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 1dfced94f5b..6d466748c17 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -2278,7 +2277,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, } auto bounce_buffer = - cudf::detail::pinned_host_vector(all_device_write ? 0 : max_write_size); + cudf::detail::make_pinned_vector_async(all_device_write ? 0 : max_write_size, stream); return std::tuple{std::move(agg_meta), std::move(pages), diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index faa09e586ab..190015686df 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -14,13 +14,14 @@ * limitations under the License. */ +#include "cudf/detail/utilities/vector_factories.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/text/device_data_chunks.hpp" #include "io/utilities/config_utils.hpp" #include #include -#include +#include #include #include #include @@ -66,7 +67,7 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: template - static void copy_to_device(cudf::detail::pinned_host_vector const& host, + static void copy_to_device(cudf::detail::rmm_host_vector const& host, rmm::device_uvector& device, rmm::cuda_stream_view stream) { @@ -84,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { 1 << 16; // 64k offset allocation, resized on demand cudaEvent_t event; - cudf::detail::pinned_host_vector h_compressed_blocks; - cudf::detail::pinned_host_vector h_compressed_offsets; - cudf::detail::pinned_host_vector h_decompressed_offsets; + cudf::detail::rmm_host_vector h_compressed_blocks; + cudf::detail::rmm_host_vector h_compressed_offsets; + cudf::detail::rmm_host_vector h_decompressed_offsets; rmm::device_uvector d_compressed_blocks; rmm::device_uvector d_decompressed_blocks; rmm::device_uvector d_compressed_offsets; @@ -103,7 +104,10 @@ class bgzip_data_chunk_reader : public data_chunk_reader { bool is_decompressed{}; decompression_blocks(rmm::cuda_stream_view init_stream) - : d_compressed_blocks(0, init_stream), + : h_compressed_blocks{cudf::detail::make_pinned_vector_async(0, init_stream)}, + h_compressed_offsets{cudf::detail::make_pinned_vector_async(0, init_stream)}, + h_decompressed_offsets{cudf::detail::make_pinned_vector_async(0, init_stream)}, + d_compressed_blocks(0, init_stream), d_decompressed_blocks(0, init_stream), d_compressed_offsets(0, init_stream), d_decompressed_offsets(0, init_stream), diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 9d1d0498ace..8278b2c25cb 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -14,10 +14,11 @@ * limitations under the License. */ +#include "cudf/detail/utilities/vector_factories.hpp" #include "io/text/device_data_chunks.hpp" #include -#include +#include #include #include @@ -32,7 +33,7 @@ namespace { struct host_ticket { cudaEvent_t event; - cudf::detail::pinned_host_vector buffer; + std::unique_ptr> buffer; }; /** @@ -84,13 +85,16 @@ class datasource_chunk_reader : public data_chunk_reader { CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event)); // resize the host buffer as necessary to contain the requested number of bytes - if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); } + if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { + h_ticket.buffer = std::make_unique>( + cudf::detail::make_pinned_vector_sync(read_size, stream)); + } - _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer.data())); + _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer->data())); // copy the host-pinned data on to device CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value())); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -148,10 +152,13 @@ class istream_data_chunk_reader : public data_chunk_reader { CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event)); // resize the host buffer as necessary to contain the requested number of bytes - if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); } + if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { + h_ticket.buffer = std::make_unique>( + cudf::detail::make_pinned_vector_sync(read_size, stream)); + } // read data from the host istream in to the pinned host memory buffer - _datastream->read(h_ticket.buffer.data(), read_size); + _datastream->read(h_ticket.buffer->data(), read_size); // adjust the read size to reflect how many bytes were actually read from the data stream read_size = _datastream->gcount(); @@ -161,7 +168,7 @@ class istream_data_chunk_reader : public data_chunk_reader { // copy the host-pinned data on to device CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value())); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 0883ac3609f..492084bd5bc 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -16,10 +16,10 @@ #pragma once -#include "config_utils.hpp" #include "hostdevice_span.hpp" #include +#include #include #include #include @@ -53,7 +53,7 @@ class hostdevice_vector { } explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream) - : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(max_size, stream) + : h_data{make_pinned_vector_async(0, stream)}, d_data(max_size, stream) { CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size"); From 24b12451ffea31766d01c9cbe8d4e10bbe3734be Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 30 May 2024 16:54:36 -0700 Subject: [PATCH 02/31] switch to host_device resource ref --- cpp/benchmarks/fixture/nvbench_fixture.hpp | 5 +++-- cpp/include/cudf/io/memory_resource.hpp | 7 +++---- cpp/src/io/utilities/config_utils.cpp | 21 +++++++++++---------- cpp/src/io/utilities/hostdevice_vector.hpp | 1 - 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index ebcbcb17e98..3a5a9bfd2fa 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -81,14 +81,15 @@ struct nvbench_base_fixture { "\nExpecting: cuda, pool, async, arena, managed, or managed_pool"); } - inline rmm::host_async_resource_ref make_cuio_host_pinned() + inline rmm::host_device_async_resource_ref make_cuio_host_pinned() { static std::shared_ptr mr = std::make_shared(); return *mr; } - inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode) + inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource( + std::string const& mode) { if (mode == "pinned") return make_cuio_host_pinned(); if (mode == "pinned_pool") return cudf::io::get_host_memory_resource(); diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp index a36e220ae7b..2af5755f824 100644 --- a/cpp/include/cudf/io/memory_resource.hpp +++ b/cpp/include/cudf/io/memory_resource.hpp @@ -33,7 +33,8 @@ namespace cudf::io { * @param mr The rmm resource to be used for host-side allocations * @return The previous resource that was in use */ -rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr); +rmm::host_device_async_resource_ref set_host_memory_resource( + rmm::host_device_async_resource_ref mr); /** * @brief Get the rmm resource being used for host memory allocations by @@ -41,7 +42,7 @@ rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_r * * @return The rmm resource used for host-side allocations */ -rmm::host_async_resource_ref get_host_memory_resource(); +rmm::host_device_async_resource_ref get_host_memory_resource(); /** * @brief Options to configure the default host memory resource @@ -54,8 +55,6 @@ struct host_mr_options { /** * @brief Configure the size of the default host memory resource. * - * @throws cudf::logic_error if called after the default host memory resource has been created - * * @param opts Options to configure the default host memory resource * @return True if this call successfully configured the host memory resource, false if a * a resource was already configured. diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index dad1135e766..25649d17c76 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -209,7 +209,8 @@ static_assert(cuda::mr::resource_with config_size) +CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( + std::optional config_size) { static fixed_pinned_pool_memory_resource mr = [config_size]() { auto const size = [&config_size]() -> size_t { @@ -233,7 +234,7 @@ CUDF_EXPORT rmm::host_async_resource_ref& make_default_pinned_mr(std::optional const& opts, - bool* did_configure = nullptr) +CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr( + std::optional const& opts, bool* did_configure = nullptr) { - static rmm::host_async_resource_ref* mr_ref = nullptr; - bool configured = false; + static rmm::host_device_async_resource_ref* mr_ref = nullptr; + bool configured = false; if (mr_ref == nullptr) { configured = true; mr_ref = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt); @@ -262,13 +263,13 @@ CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional #include -#include #include #include #include From 6c896f6ebf2d6177f62903dd719cebf88da08565 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 30 May 2024 18:07:00 -0700 Subject: [PATCH 03/31] rebrand host memory resource --- cpp/CMakeLists.txt | 1 + cpp/benchmarks/fixture/nvbench_fixture.hpp | 8 +- .../io/parquet/parquet_reader_multithread.cpp | 2 +- .../detail/utilities/vector_factories.hpp | 4 +- cpp/include/cudf/io/memory_resource.hpp | 64 ----- cpp/include/cudf/utilities/pinned_memory.hpp | 58 +++++ cpp/src/io/utilities/config_utils.cpp | 215 +--------------- cpp/src/utilities/pinned_memory.cpp | 230 ++++++++++++++++++ cpp/tests/io/json_test.cpp | 6 +- .../utilities_tests/io_utilities_tests.cpp | 8 +- 10 files changed, 305 insertions(+), 291 deletions(-) delete mode 100644 cpp/include/cudf/io/memory_resource.hpp create mode 100644 cpp/include/cudf/utilities/pinned_memory.hpp create mode 100644 cpp/src/utilities/pinned_memory.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f637db66c2c..8a0f56aec53 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -665,6 +665,7 @@ add_library( src/utilities/default_stream.cpp src/utilities/linked_column.cpp src/utilities/logger.cpp + src/utilities/pinned_memory.cpp src/utilities/stacktrace.cpp src/utilities/stream_pool.cpp src/utilities/traits.cpp diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index 3a5a9bfd2fa..df1492690bb 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include #include @@ -92,7 +92,7 @@ struct nvbench_base_fixture { std::string const& mode) { if (mode == "pinned") return make_cuio_host_pinned(); - if (mode == "pinned_pool") return cudf::io::get_host_memory_resource(); + if (mode == "pinned_pool") return cudf::get_pinned_memory_resource(); CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool"); } @@ -113,14 +113,14 @@ struct nvbench_base_fixture { rmm::mr::set_current_device_resource(mr.get()); std::cout << "RMM memory resource = " << rmm_mode << "\n"; - cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); + cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n"; } ~nvbench_base_fixture() { // Ensure the the pool is freed before the CUDA context is destroyed: - cudf::io::set_host_memory_resource(this->make_cuio_host_pinned()); + cudf::set_pinned_memory_resource(this->make_cuio_host_pinned()); } std::shared_ptr mr; diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index bd80c4e0e88..f340b4aeb7a 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -20,9 +20,9 @@ #include #include -#include #include #include +#include #include #include diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 6f2287fc1c8..7343ee25c8f 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -22,9 +22,9 @@ */ #include -#include #include #include +#include #include #include @@ -454,7 +454,7 @@ thrust::host_vector make_host_vector_sync( template rmm_host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) { - return rmm_host_vector(size, {cudf::io::get_host_memory_resource(), stream}); + return rmm_host_vector(size, {cudf::get_pinned_memory_resource(), stream}); } /** diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp deleted file mode 100644 index 2af5755f824..00000000000 --- a/cpp/include/cudf/io/memory_resource.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -namespace cudf::io { - -/** - * @brief Set the rmm resource to be used for host memory allocations by - * cudf::detail::hostdevice_vector - * - * hostdevice_vector is a utility class that uses a pair of host and device-side buffers for - * bouncing state between the cpu and the gpu. The resource set with this function (typically a - * pinned memory allocator) is what it uses to allocate space for it's host-side buffer. - * - * @param mr The rmm resource to be used for host-side allocations - * @return The previous resource that was in use - */ -rmm::host_device_async_resource_ref set_host_memory_resource( - rmm::host_device_async_resource_ref mr); - -/** - * @brief Get the rmm resource being used for host memory allocations by - * cudf::detail::hostdevice_vector - * - * @return The rmm resource used for host-side allocations - */ -rmm::host_device_async_resource_ref get_host_memory_resource(); - -/** - * @brief Options to configure the default host memory resource - */ -struct host_mr_options { - std::optional pool_size; ///< The size of the pool to use for the default host memory - ///< resource. If not set, the default pool size is used. -}; - -/** - * @brief Configure the size of the default host memory resource. - * - * @param opts Options to configure the default host memory resource - * @return True if this call successfully configured the host memory resource, false if a - * a resource was already configured. - */ -bool config_default_host_memory_resource(host_mr_options const& opts); - -} // namespace cudf::io diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp new file mode 100644 index 00000000000..b423eab6d38 --- /dev/null +++ b/cpp/include/cudf/utilities/pinned_memory.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { + +/** + * @brief Set the rmm resource to be used for pinned memory allocations. + * + * @param mr The rmm resource to be used for pinned allocations + * @return The previous resource that was in use + */ +rmm::host_device_async_resource_ref set_pinned_memory_resource( + rmm::host_device_async_resource_ref mr); + +/** + * @brief Get the rmm resource being used for pinned memory allocations. + * + * @return The rmm resource used for pinned allocations + */ +rmm::host_device_async_resource_ref get_pinned_memory_resource(); + +/** + * @brief Options to configure the default pinned memory resource + */ +struct pinned_mr_options { + std::optional pool_size; ///< The size of the pool to use for the default pinned memory + ///< resource. If not set, the default pool size is used. +}; + +/** + * @brief Configure the size of the default pinned memory resource. + * + * @param opts Options to configure the default pinned memory resource + * @return True if this call successfully configured the pinned memory resource, false if a + * a resource was already configured. + */ +bool config_default_pinned_memory_resource(pinned_mr_options const& opts); + +} // namespace cudf diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 25649d17c76..20ac89b4d53 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -16,22 +16,12 @@ #include "config_utils.hpp" -#include -#include #include -#include - -#include -#include -#include -#include #include #include -namespace cudf::io { - -namespace detail { +namespace cudf::io::detail { namespace cufile_integration { @@ -90,205 +80,4 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_ } // namespace nvcomp_integration -} // namespace detail - -namespace { -class fixed_pinned_pool_memory_resource { - using upstream_mr = rmm::mr::pinned_host_memory_resource; - using host_pooled_mr = rmm::mr::pool_memory_resource; - - private: - upstream_mr upstream_mr_{}; - size_t pool_size_{0}; - // Raw pointer to avoid a segfault when the pool is destroyed on exit - host_pooled_mr* pool_{nullptr}; - void* pool_begin_{nullptr}; - void* pool_end_{nullptr}; - cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()}; - - public: - fixed_pinned_pool_memory_resource(size_t size) - : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)} - { - if (pool_size_ == 0) { return; } - - // Allocate full size from the pinned pool to figure out the beginning and end address - pool_begin_ = pool_->allocate_async(pool_size_, stream_); - pool_end_ = static_cast(static_cast(pool_begin_) + pool_size_); - pool_->deallocate_async(pool_begin_, pool_size_, stream_); - } - - void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) - { - if (bytes <= pool_size_) { - try { - return pool_->allocate_async(bytes, alignment, stream); - } catch (...) { - // If the pool is exhausted, fall back to the upstream memory resource - } - } - - return upstream_mr_.allocate_async(bytes, alignment, stream); - } - - void do_deallocate_async(void* ptr, - std::size_t bytes, - std::size_t alignment, - cuda::stream_ref stream) noexcept - { - if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) { - pool_->deallocate_async(ptr, bytes, alignment, stream); - } else { - upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); - } - } - - void* allocate_async(std::size_t bytes, cuda::stream_ref stream) - { - return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - } - - void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) - { - return do_allocate_async(bytes, alignment, stream); - } - - void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) - { - auto const result = do_allocate_async(bytes, alignment, stream_); - stream_.wait(); - return result; - } - - void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept - { - return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - } - - void deallocate_async(void* ptr, - std::size_t bytes, - std::size_t alignment, - cuda::stream_ref stream) noexcept - { - return do_deallocate_async(ptr, bytes, alignment, stream); - } - - void deallocate(void* ptr, - std::size_t bytes, - std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept - { - deallocate_async(ptr, bytes, alignment, stream_); - stream_.wait(); - } - - bool operator==(fixed_pinned_pool_memory_resource const& other) const - { - return pool_ == other.pool_ and stream_ == other.stream_; - } - - bool operator!=(fixed_pinned_pool_memory_resource const& other) const - { - return !operator==(other); - } - - [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, - cuda::mr::device_accessible) noexcept - { - } - - [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, - cuda::mr::host_accessible) noexcept - { - } -}; - -static_assert(cuda::mr::resource_with, - ""); - -} // namespace - -CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( - std::optional config_size) -{ - static fixed_pinned_pool_memory_resource mr = [config_size]() { - auto const size = [&config_size]() -> size_t { - if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) { - return std::atol(env_val); - } - - if (config_size.has_value()) { return *config_size; } - - size_t free{}, total{}; - CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total)); - // 0.5% of the total device memory, capped at 100MB - return std::min(total / 200, size_t{100} * 1024 * 1024); - }(); - - // rmm requires the pool size to be a multiple of 256 bytes - auto const aligned_size = (size + 255) & ~255; - CUDF_LOG_INFO("Pinned pool size = {}", aligned_size); - - // make the pool with max size equal to the initial size - return fixed_pinned_pool_memory_resource{aligned_size}; - }(); - - static rmm::host_device_async_resource_ref mr_ref{mr}; - return mr_ref; -} - -CUDF_EXPORT std::mutex& host_mr_mutex() -{ - static std::mutex map_lock; - return map_lock; -} - -// Must be called with the host_mr_mutex mutex held -CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr( - std::optional const& opts, bool* did_configure = nullptr) -{ - static rmm::host_device_async_resource_ref* mr_ref = nullptr; - bool configured = false; - if (mr_ref == nullptr) { - configured = true; - mr_ref = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt); - } - - // If the user passed an out param to detect whether this call configured a resource - // set the result - if (did_configure != nullptr) { *did_configure = configured; } - - return *mr_ref; -} - -// Must be called with the host_mr_mutex mutex held -CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr() -{ - static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt); - return mr_ref; -} - -rmm::host_device_async_resource_ref set_host_memory_resource(rmm::host_device_async_resource_ref mr) -{ - std::scoped_lock lock{host_mr_mutex()}; - auto last_mr = host_mr(); - host_mr() = mr; - return last_mr; -} - -rmm::host_device_async_resource_ref get_host_memory_resource() -{ - std::scoped_lock lock{host_mr_mutex()}; - return host_mr(); -} - -bool config_default_host_memory_resource(host_mr_options const& opts) -{ - std::scoped_lock lock{host_mr_mutex()}; - auto did_configure = false; - make_host_mr(opts, &did_configure); - return did_configure; -} - -} // namespace cudf::io +} // namespace cudf::io::detail diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp new file mode 100644 index 00000000000..23d673a1382 --- /dev/null +++ b/cpp/src/utilities/pinned_memory.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cudf { + +namespace { +class fixed_pinned_pool_memory_resource { + using upstream_mr = rmm::mr::pinned_host_memory_resource; + using host_pooled_mr = rmm::mr::pool_memory_resource; + + private: + upstream_mr upstream_mr_{}; + size_t pool_size_{0}; + // Raw pointer to avoid a segfault when the pool is destroyed on exit + host_pooled_mr* pool_{nullptr}; + void* pool_begin_{nullptr}; + void* pool_end_{nullptr}; + cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()}; + + public: + fixed_pinned_pool_memory_resource(size_t size) + : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)} + { + if (pool_size_ == 0) { return; } + + // Allocate full size from the pinned pool to figure out the beginning and end address + pool_begin_ = pool_->allocate_async(pool_size_, stream_); + pool_end_ = static_cast(static_cast(pool_begin_) + pool_size_); + pool_->deallocate_async(pool_begin_, pool_size_, stream_); + } + + void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) + { + if (bytes <= pool_size_) { + try { + return pool_->allocate_async(bytes, alignment, stream); + } catch (...) { + // If the pool is exhausted, fall back to the upstream memory resource + } + } + + return upstream_mr_.allocate_async(bytes, alignment, stream); + } + + void do_deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + cuda::stream_ref stream) noexcept + { + if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) { + pool_->deallocate_async(ptr, bytes, alignment, stream); + } else { + upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); + } + } + + void* allocate_async(std::size_t bytes, cuda::stream_ref stream) + { + return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + } + + void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) + { + return do_allocate_async(bytes, alignment, stream); + } + + void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + auto const result = do_allocate_async(bytes, alignment, stream_); + stream_.wait(); + return result; + } + + void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept + { + return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + } + + void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + cuda::stream_ref stream) noexcept + { + return do_deallocate_async(ptr, bytes, alignment, stream); + } + + void deallocate(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + { + deallocate_async(ptr, bytes, alignment, stream_); + stream_.wait(); + } + + bool operator==(fixed_pinned_pool_memory_resource const& other) const + { + return pool_ == other.pool_ and stream_ == other.stream_; + } + + bool operator!=(fixed_pinned_pool_memory_resource const& other) const + { + return !operator==(other); + } + + [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, + cuda::mr::device_accessible) noexcept + { + } + + [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, + cuda::mr::host_accessible) noexcept + { + } +}; + +static_assert(cuda::mr::resource_with, + ""); + +} // namespace + +CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( + std::optional config_size) +{ + static fixed_pinned_pool_memory_resource mr = [config_size]() { + auto const size = [&config_size]() -> size_t { + if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) { + return std::atol(env_val); + } + + if (config_size.has_value()) { return *config_size; } + + size_t free{}, total{}; + CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total)); + // 0.5% of the total device memory, capped at 100MB + return std::min(total / 200, size_t{100} * 1024 * 1024); + }(); + + // rmm requires the pool size to be a multiple of 256 bytes + auto const aligned_size = (size + 255) & ~255; + CUDF_LOG_INFO("Pinned pool size = {}", aligned_size); + + // make the pool with max size equal to the initial size + return fixed_pinned_pool_memory_resource{aligned_size}; + }(); + + static rmm::host_device_async_resource_ref mr_ref{mr}; + return mr_ref; +} + +CUDF_EXPORT std::mutex& host_mr_mutex() +{ + static std::mutex map_lock; + return map_lock; +} + +// Must be called with the host_mr_mutex mutex held +CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr( + std::optional const& opts, bool* did_configure = nullptr) +{ + static rmm::host_device_async_resource_ref* mr_ref = nullptr; + bool configured = false; + if (mr_ref == nullptr) { + configured = true; + mr_ref = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt); + } + + // If the user passed an out param to detect whether this call configured a resource + // set the result + if (did_configure != nullptr) { *did_configure = configured; } + + return *mr_ref; +} + +// Must be called with the host_mr_mutex mutex held +CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr() +{ + static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt); + return mr_ref; +} + +rmm::host_device_async_resource_ref set_pinned_memory_resource( + rmm::host_device_async_resource_ref mr) +{ + std::scoped_lock lock{host_mr_mutex()}; + auto last_mr = host_mr(); + host_mr() = mr; + return last_mr; +} + +rmm::host_device_async_resource_ref get_pinned_memory_resource() +{ + std::scoped_lock lock{host_mr_mutex()}; + return host_mr(); +} + +bool config_default_pinned_memory_resource(pinned_mr_options const& opts) +{ + std::scoped_lock lock{host_mr_mutex()}; + auto did_configure = false; + make_host_mr(opts, &did_configure); + return did_configure; +} + +} // namespace cudf diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 9d766e80094..4b3793b22f6 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -28,13 +28,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include @@ -2068,7 +2068,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync) size_t{128} * 1024 * 1024}; // Set new resource - auto last_mr = cudf::io::set_host_memory_resource(mr); + auto last_mr = cudf::set_pinned_memory_resource(mr); /** * @brief Spark has the specific need to ignore extra characters that come after the first record @@ -2158,7 +2158,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync) float64_wrapper{c_data.cbegin(), c_data.cend(), c_validity.cbegin()}); // Restore original memory source - cudf::io::set_host_memory_resource(last_mr); + cudf::set_pinned_memory_resource(last_mr); } TEST_F(JsonReaderTest, MixedTypes) diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp index e5a153bf781..c00f1ab8863 100644 --- a/cpp/tests/utilities_tests/io_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include @@ -44,8 +44,8 @@ TEST(IoUtilitiesTest, HostMemoryGetAndSet) size_t{128} * 1024 * 1024); // set new resource - auto last_mr = cudf::io::get_host_memory_resource(); - cudf::io::set_host_memory_resource(mr); + auto last_mr = cudf::get_pinned_memory_resource(); + cudf::set_pinned_memory_resource(mr); constexpr int num_rows = 32 * 1024; auto valids = @@ -66,7 +66,7 @@ TEST(IoUtilitiesTest, HostMemoryGetAndSet) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected); // reset memory resource back - cudf::io::set_host_memory_resource(last_mr); + cudf::set_pinned_memory_resource(last_mr); } TEST(IoUtilitiesTest, Base64EncodeAndDecode) From 0048c5951171d931b676dee3ab40ca312a7eb560 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 30 May 2024 19:09:32 -0700 Subject: [PATCH 04/31] style --- cpp/benchmarks/io/text/multibyte_split.cpp | 2 +- cpp/src/io/text/data_chunk_source_factories.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 172182c3607..67705863d41 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 8278b2c25cb..46149db929f 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 1964523554cd86763be1a0a6b1580f7d12016270 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 31 May 2024 10:39:55 -0700 Subject: [PATCH 05/31] java update because breaking --- java/src/main/native/src/RmmJni.cpp | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index fa78f6ca4e2..e1cb7845b77 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -16,7 +16,7 @@ #include "cudf_jni_apis.hpp" -#include +#include #include #include @@ -395,15 +395,17 @@ class java_debug_event_handler_memory_resource final : public java_event_handler } }; -inline auto& prior_cuio_host_mr() +inline auto& prior_cudf_pinned_mr() { - static rmm::host_async_resource_ref _prior_cuio_host_mr = cudf::io::get_host_memory_resource(); - return _prior_cuio_host_mr; + static rmm::host_device_async_resource_ref _prior_cudf_pinned_mr = + cudf::get_pinned_memory_resource(); + return _prior_cudf_pinned_mr; } /** * This is a pinned fallback memory resource that will try to allocate `pool` - * and if that fails, attempt to allocate from the prior resource used by cuIO `prior_cuio_host_mr`. + * and if that fails, attempt to allocate from the prior resource used by cuDF + * `prior_cudf_pinned_mr`. * * We detect whether a pointer to free is inside of the pool by checking its address (see * constructor) @@ -433,7 +435,7 @@ class pinned_fallback_host_memory_resource { /** * @brief Allocates pinned host memory of size at least \p bytes bytes from either the - * _pool argument provided, or prior_cuio_host_mr. + * _pool argument provided, or prior_cudf_pinned_mr. * * @throws rmm::bad_alloc if the requested allocation could not be fulfilled due to any other * reason. @@ -450,7 +452,7 @@ class pinned_fallback_host_memory_resource { return _pool->allocate(bytes, alignment); } catch (const std::exception& unused) { // try to allocate using the underlying pinned resource - return prior_cuio_host_mr().allocate(bytes, alignment); + return prior_cudf_pinned_mr().allocate(bytes, alignment); } // we should not reached here return nullptr; @@ -459,7 +461,7 @@ class pinned_fallback_host_memory_resource { /** * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. We attempt * to deallocate from _pool, if ptr is detected to be in the pool address range, - * otherwise we deallocate from `prior_cuio_host_mr`. + * otherwise we deallocate from `prior_cudf_pinned_mr`. * * @param ptr Pointer to be deallocated. * @param bytes Size of the allocation. @@ -472,7 +474,7 @@ class pinned_fallback_host_memory_resource { if (ptr >= pool_begin_ && ptr <= pool_end_) { _pool->deallocate(ptr, bytes, alignment); } else { - prior_cuio_host_mr().deallocate(ptr, bytes, alignment); + prior_cudf_pinned_mr().deallocate(ptr, bytes, alignment); } } @@ -1025,7 +1027,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_newPinnedPoolMemoryResource(JNIE CATCH_STD(env, 0) } -JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(JNIEnv* env, +JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCudfPinnedPoolMemoryResource(JNIEnv* env, jclass clazz, jlong pool_ptr) { @@ -1035,7 +1037,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(J // create a pinned fallback pool that will allocate pinned memory // if the regular pinned pool is exhausted pinned_fallback_mr.reset(new pinned_fallback_host_memory_resource(pool)); - prior_cuio_host_mr() = cudf::io::set_host_memory_resource(*pinned_fallback_mr); + prior_cudf_pinned_mr() = cudf::set_pinned_memory_resource(*pinned_fallback_mr); } CATCH_STD(env, ) } @@ -1048,7 +1050,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releasePinnedPoolMemoryResource(J cudf::jni::auto_set_device(env); // set the cuio host memory resource to what it was before, or the same // if we didn't overwrite it with setCuioPinnedPoolMemoryResource - cudf::io::set_host_memory_resource(prior_cuio_host_mr()); + cudf::set_pinned_memory_resource(prior_cudf_pinned_mr()); pinned_fallback_mr.reset(); delete reinterpret_cast(pool_ptr); } @@ -1088,7 +1090,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_allocFromFallbackPinnedPool(JNIE jlong size) { cudf::jni::auto_set_device(env); - void* ret = cudf::io::get_host_memory_resource().allocate(size); + void* ret = cudf::get_pinned_memory_resource().allocate(size); return reinterpret_cast(ret); } @@ -1101,7 +1103,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_freeFromFallbackPinnedPool(JNIEnv try { cudf::jni::auto_set_device(env); void* cptr = reinterpret_cast(ptr); - cudf::io::get_host_memory_resource().deallocate(cptr, size); + cudf::get_pinned_memory_resource().deallocate(cptr, size); } CATCH_STD(env, ) } @@ -1112,7 +1114,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Rmm_configureDefaultCudfPinnedPoo { try { cudf::jni::auto_set_device(env); - return cudf::io::config_default_host_memory_resource(cudf::io::host_mr_options{size}); + return cudf::config_default_pinned_memory_resource(cudf::pinned_mr_options{size}); } CATCH_STD(env, false) } From ac0ce9c0ef5c2b3c6c4dfe1e1b4ee5330100f999 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 31 May 2024 12:04:05 -0700 Subject: [PATCH 06/31] java fix --- java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 2 +- java/src/main/java/ai/rapids/cudf/Rmm.java | 2 +- java/src/main/native/src/RmmJni.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java index 83b801db7fb..5050834303d 100644 --- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java +++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java @@ -224,7 +224,7 @@ private PinnedMemoryPool(long poolSize, int gpuId, boolean setCuioHostMemoryReso } this.poolHandle = Rmm.newPinnedPoolMemoryResource(poolSize, poolSize); if (setCuioHostMemoryResource) { - Rmm.setCuioPinnedPoolMemoryResource(this.poolHandle); + Rmm.setCudfPinnedPoolMemoryResource(this.poolHandle); } this.poolSize = poolSize; } diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java index 4dee1b7aa24..ed029c918e4 100755 --- a/java/src/main/java/ai/rapids/cudf/Rmm.java +++ b/java/src/main/java/ai/rapids/cudf/Rmm.java @@ -597,7 +597,7 @@ static native long newEventHandlerResourceAdaptor(long handle, long trackerHandl public static native long newPinnedPoolMemoryResource(long initSize, long maxSize); - public static native long setCuioPinnedPoolMemoryResource(long poolPtr); + public static native long setCudfPinnedPoolMemoryResource(long poolPtr); public static native void releasePinnedPoolMemoryResource(long poolPtr); diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index e1cb7845b77..8bd0f7793b4 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -1049,7 +1049,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releasePinnedPoolMemoryResource(J try { cudf::jni::auto_set_device(env); // set the cuio host memory resource to what it was before, or the same - // if we didn't overwrite it with setCuioPinnedPoolMemoryResource + // if we didn't overwrite it with setCudfPinnedPoolMemoryResource cudf::set_pinned_memory_resource(prior_cudf_pinned_mr()); pinned_fallback_mr.reset(); delete reinterpret_cast(pool_ptr); From ab36162ac32cd0be5fe69ef3d92e421f4e5ea798 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 31 May 2024 15:48:45 -0700 Subject: [PATCH 07/31] move test out of io util --- cpp/tests/CMakeLists.txt | 5 +- .../utilities_tests/io_utilities_tests.cpp | 45 ------------- .../utilities_tests/pinned_memory_tests.cpp | 65 +++++++++++++++++++ 3 files changed, 68 insertions(+), 47 deletions(-) create mode 100644 cpp/tests/utilities_tests/pinned_memory_tests.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2f2c12f265c..19c87facb51 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -378,15 +378,16 @@ ConfigureTest( # * utilities tests ------------------------------------------------------------------------------- ConfigureTest( UTILITIES_TEST - utilities_tests/type_list_tests.cpp utilities_tests/column_debug_tests.cpp utilities_tests/column_utilities_tests.cpp utilities_tests/column_wrapper_tests.cpp + utilities_tests/default_stream_tests.cpp utilities_tests/io_utilities_tests.cpp utilities_tests/lists_column_wrapper_tests.cpp utilities_tests/logger_tests.cpp - utilities_tests/default_stream_tests.cpp + utilities_tests/pinned_memory_tests.cpp utilities_tests/type_check_tests.cpp + utilities_tests/type_list_tests.cpp ) # ################################################################################################## diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp index c00f1ab8863..9ed8f18f5cc 100644 --- a/cpp/tests/utilities_tests/io_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp @@ -16,14 +16,6 @@ #include #include -#include - -#include -#include - -#include -#include -#include #include @@ -32,43 +24,6 @@ using cudf::io::detail::base64_encode; class IoUtilitiesTest : public cudf::test::BaseFixture {}; -TEST(IoUtilitiesTest, HostMemoryGetAndSet) -{ - // Global environment for temporary files - auto const temp_env = static_cast( - ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); - - // pinned/pooled host memory resource - using host_pooled_mr = rmm::mr::pool_memory_resource; - host_pooled_mr mr(std::make_shared().get(), - size_t{128} * 1024 * 1024); - - // set new resource - auto last_mr = cudf::get_pinned_memory_resource(); - cudf::set_pinned_memory_resource(mr); - - constexpr int num_rows = 32 * 1024; - auto valids = - cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; }); - auto values = thrust::make_counting_iterator(0); - - cudf::test::fixed_width_column_wrapper col(values, values + num_rows, valids); - - cudf::table_view expected({col}); - auto filepath = temp_env->get_temp_filepath("IoUtilsMemTest.parquet"); - cudf::io::parquet_writer_options out_args = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf::io::write_parquet(out_args); - - cudf::io::parquet_reader_options const read_opts = - cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); - auto const result = cudf::io::read_parquet(read_opts); - CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected); - - // reset memory resource back - cudf::set_pinned_memory_resource(last_mr); -} - TEST(IoUtilitiesTest, Base64EncodeAndDecode) { // a vector of lorem ipsum strings diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp new file mode 100644 index 00000000000..df9103640f4 --- /dev/null +++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +class PinnedMemoryTest : public cudf::test::BaseFixture {}; + +TEST(PinnedMemoryTest, MemoryResourceGetAndSet) +{ + // Global environment for temporary files + auto const temp_env = static_cast( + ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); + + // pinned/pooled host memory resource + using host_pooled_mr = rmm::mr::pool_memory_resource; + host_pooled_mr mr(std::make_shared().get(), + 4 * 1024 * 1024); + + // set new resource + auto last_mr = cudf::get_pinned_memory_resource(); + cudf::set_pinned_memory_resource(mr); + + constexpr int num_rows = 32 * 1024; + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; }); + auto values = thrust::make_counting_iterator(0); + + cudf::test::fixed_width_column_wrapper col(values, values + num_rows, valids); + + cudf::table_view expected({col}); + auto filepath = temp_env->get_temp_filepath("MemoryResourceGetAndSetTest.parquet"); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_args); + + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto const result = cudf::io::read_parquet(read_opts); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected); + + // reset memory resource back + cudf::set_pinned_memory_resource(last_mr); +} From 83f665a15f4aba3040079b58306fd964621a91c8 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Jun 2024 15:01:38 -0700 Subject: [PATCH 08/31] missed rename --- .../src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java index 5050834303d..5ca5bc0db68 100644 --- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java +++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java @@ -128,9 +128,9 @@ public static synchronized void initialize(long poolSize, int gpuId) { * * @param poolSize size of the pool to initialize. * @param gpuId gpu id to set to get memory pool from, -1 means to use default - * @param setCuioHostMemoryResource true if this pinned pool should be used by cuIO for host memory + * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuIO for host memory */ - public static synchronized void initialize(long poolSize, int gpuId, boolean setCuioHostMemoryResource) { + public static synchronized void initialize(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) { if (isInitialized()) { throw new IllegalStateException("Can only initialize the pool once."); } @@ -139,7 +139,7 @@ public static synchronized void initialize(long poolSize, int gpuId, boolean set t.setDaemon(true); return t; }); - initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCuioHostMemoryResource)); + initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCudfPinnedPoolMemoryResource)); initService.shutdown(); } @@ -216,14 +216,14 @@ public static long getTotalPoolSizeBytes() { return 0; } - private PinnedMemoryPool(long poolSize, int gpuId, boolean setCuioHostMemoryResource) { + private PinnedMemoryPool(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) { if (gpuId > -1) { // set the gpu device to use Cuda.setDevice(gpuId); Cuda.freeZero(); } this.poolHandle = Rmm.newPinnedPoolMemoryResource(poolSize, poolSize); - if (setCuioHostMemoryResource) { + if (setCudfPinnedPoolMemoryResource) { Rmm.setCudfPinnedPoolMemoryResource(this.poolHandle); } this.poolSize = poolSize; From c1ae478c2b3ce28101e35ec3f5a5af5e03d5452c Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Jun 2024 15:52:34 -0700 Subject: [PATCH 09/31] update benchmark changes --- cpp/benchmarks/io/cuio_common.cpp | 12 ++++++++++++ cpp/benchmarks/io/cuio_common.hpp | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 37ced8ea703..f06938bd721 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -14,6 +14,9 @@ * limitations under the License. */ +#include "rmm/mr/pinned_host_memory_resource.hpp" +#include "rmm/resource_ref.hpp" + #include #include @@ -28,6 +31,14 @@ temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; +// Don't use cudf's pinned pool for the source data +rmm::host_async_resource_ref pinned_memory_resource() +{ + static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{}; + + return mr; +} + std::string random_file_in_dir(std::string const& dir_path) { // `mkstemp` modifies the template in place @@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path) cuio_source_sink_pair::cuio_source_sink_pair(io_type type) : type{type}, + pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}), d_buffer{0, cudf::get_default_stream()}, file_name{random_file_in_dir(tmpdir.path())}, void_sink{cudf::io::data_sink::create()} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index d4f39a5f243..407805a8a1a 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -79,7 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; - cudf::detail::pinned_host_vector pinned_buffer; + cudf::detail::rmm_host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; From 1c09d0cfe15bcf6b5d2ce775129f0db7e0c662f0 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 4 Jun 2024 11:52:48 -0700 Subject: [PATCH 10/31] rename rmm_host_vector --- cpp/benchmarks/io/cuio_common.hpp | 4 ++-- .../{rmm_host_vector.hpp => host_vector.hpp} | 2 +- .../cudf/detail/utilities/vector_factories.hpp | 16 ++++++++-------- cpp/src/io/text/bgzip_data_chunk_source.cu | 10 +++++----- cpp/src/io/text/data_chunk_source_factories.cpp | 8 ++++---- cpp/src/io/utilities/hostdevice_vector.hpp | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) rename cpp/include/cudf/detail/utilities/{rmm_host_vector.hpp => host_vector.hpp} (98%) diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 407805a8a1a..64d6021cf50 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -79,7 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; - cudf::detail::rmm_host_vector pinned_buffer; + cudf::detail::host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp similarity index 98% rename from cpp/include/cudf/detail/utilities/rmm_host_vector.hpp rename to cpp/include/cudf/detail/utilities/host_vector.hpp index 6604020c224..e62c8017f8b 100644 --- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -178,6 +178,6 @@ class rmm_host_allocator { * @brief A vector class with rmm host memory allocator */ template -using rmm_host_vector = thrust::host_vector>; +using host_vector = thrust::host_vector>; } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 7343ee25c8f..6f859ded456 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,7 +21,7 @@ * @file vector_factories.hpp */ -#include +#include #include #include #include @@ -442,33 +442,33 @@ thrust::host_vector make_host_vector_sync( } /** - * @brief Asynchronously construct a `rmm_host_vector` of the given size + * @brief Asynchronously construct a `host_vector` of the given size * * @note This function does not synchronize `stream`. * * @tparam T The type of the vector data * @param size The number of elements in the created vector * @param stream The stream on which to allocate memory - * @return A rmm_host_vector of the given size + * @return A host_vector of the given size */ template -rmm_host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) +host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) { - return rmm_host_vector(size, {cudf::get_pinned_memory_resource(), stream}); + return host_vector(size, {cudf::get_pinned_memory_resource(), stream}); } /** - * @brief Synchronously construct a `rmm_host_vector` of the given size + * @brief Synchronously construct a `cudf::detail::host_vector` of the given size * * @note This function synchronizes `stream`. * * @tparam T The type of the vector data * @param size The number of elements in the created vector * @param stream The stream on which to allocate memory - * @return A rmm_host_vector of the given size + * @return A host_vector of the given size */ template -rmm_host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) +host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) { auto result = make_pinned_vector_async(size, stream); stream.synchronize(); diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 190015686df..896123d95a9 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -20,8 +20,8 @@ #include "io/utilities/config_utils.hpp" #include +#include #include -#include #include #include #include @@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: template - static void copy_to_device(cudf::detail::rmm_host_vector const& host, + static void copy_to_device(cudf::detail::host_vector const& host, rmm::device_uvector& device, rmm::cuda_stream_view stream) { @@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { 1 << 16; // 64k offset allocation, resized on demand cudaEvent_t event; - cudf::detail::rmm_host_vector h_compressed_blocks; - cudf::detail::rmm_host_vector h_compressed_offsets; - cudf::detail::rmm_host_vector h_decompressed_offsets; + cudf::detail::host_vector h_compressed_blocks; + cudf::detail::host_vector h_compressed_offsets; + cudf::detail::host_vector h_decompressed_offsets; rmm::device_uvector d_compressed_blocks; rmm::device_uvector d_decompressed_blocks; rmm::device_uvector d_compressed_offsets; diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 46149db929f..1e1671a1545 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -18,7 +18,7 @@ #include "io/text/device_data_chunks.hpp" #include -#include +#include #include #include @@ -33,7 +33,7 @@ namespace { struct host_ticket { cudaEvent_t event; - std::unique_ptr> buffer; + std::unique_ptr> buffer; }; /** @@ -86,7 +86,7 @@ class datasource_chunk_reader : public data_chunk_reader { // resize the host buffer as necessary to contain the requested number of bytes if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( + h_ticket.buffer = std::make_unique>( cudf::detail::make_pinned_vector_sync(read_size, stream)); } @@ -153,7 +153,7 @@ class istream_data_chunk_reader : public data_chunk_reader { // resize the host buffer as necessary to contain the requested number of bytes if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( + h_ticket.buffer = std::make_unique>( cudf::detail::make_pinned_vector_sync(read_size, stream)); } diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index f6f7ff14d38..1ae27a2f4ae 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -18,7 +18,7 @@ #include "hostdevice_span.hpp" -#include +#include #include #include #include @@ -172,7 +172,7 @@ class hostdevice_vector { } private: - cudf::detail::rmm_host_vector h_data; + cudf::detail::host_vector h_data; rmm::device_uvector d_data; }; From c343c3194f48dedb10e49c8610e3e0deaacf315b Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 4 Jun 2024 13:00:10 -0700 Subject: [PATCH 11/31] remove do_xyz --- cpp/src/utilities/pinned_memory.cpp | 39 ++++++++++------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 23d673a1382..47c09217363 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -53,7 +53,7 @@ class fixed_pinned_pool_memory_resource { pool_->deallocate_async(pool_begin_, pool_size_, stream_); } - void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) + void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) { if (bytes <= pool_size_) { try { @@ -66,46 +66,33 @@ class fixed_pinned_pool_memory_resource { return upstream_mr_.allocate_async(bytes, alignment, stream); } - void do_deallocate_async(void* ptr, - std::size_t bytes, - std::size_t alignment, - cuda::stream_ref stream) noexcept - { - if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) { - pool_->deallocate_async(ptr, bytes, alignment, stream); - } else { - upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); - } - } - void* allocate_async(std::size_t bytes, cuda::stream_ref stream) { - return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - } - - void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) - { - return do_allocate_async(bytes, alignment, stream); + return allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); } void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - auto const result = do_allocate_async(bytes, alignment, stream_); + auto const result = allocate_async(bytes, alignment, stream_); stream_.wait(); return result; } - void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept - { - return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - } - void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) noexcept { - return do_deallocate_async(ptr, bytes, alignment, stream); + if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) { + pool_->deallocate_async(ptr, bytes, alignment, stream); + } else { + upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); + } + } + + void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept + { + return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); } void deallocate(void* ptr, From 50f4d3ee3d27a2a0a10a2a2cc8a7f425ab8c82e1 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 4 Jun 2024 15:14:21 -0700 Subject: [PATCH 12/31] comment --- cpp/include/cudf/detail/utilities/vector_factories.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 6f859ded456..06dfcbfc5e5 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -442,7 +442,7 @@ thrust::host_vector make_host_vector_sync( } /** - * @brief Asynchronously construct a `host_vector` of the given size + * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size * * @note This function does not synchronize `stream`. * @@ -458,7 +458,7 @@ host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view strea } /** - * @brief Synchronously construct a `cudf::detail::host_vector` of the given size + * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size * * @note This function synchronizes `stream`. * From 9082ccc979383b0bcfa7181c54d097d5036f2904 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 5 Jun 2024 15:14:34 -0700 Subject: [PATCH 13/31] include style Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/benchmarks/io/cuio_common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index f06938bd721..45dc812e247 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "rmm/mr/pinned_host_memory_resource.hpp" -#include "rmm/resource_ref.hpp" +#include +#include #include From 17b1ee0e736a0dbbbf152d99aad3a27de3bc9c3a Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 5 Jun 2024 17:05:40 -0700 Subject: [PATCH 14/31] reviews --- cpp/src/io/text/bgzip_data_chunk_source.cu | 2 +- cpp/src/io/text/data_chunk_source_factories.cpp | 2 +- java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 896123d95a9..0e3ce779089 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include "cudf/detail/utilities/vector_factories.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/text/device_data_chunks.hpp" #include "io/utilities/config_utils.hpp" @@ -22,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 1e1671a1545..45096b7155c 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "cudf/detail/utilities/vector_factories.hpp" #include "io/text/device_data_chunks.hpp" #include #include +#include #include #include diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java index 5ca5bc0db68..df0d9dc7c3e 100644 --- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java +++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java @@ -128,7 +128,7 @@ public static synchronized void initialize(long poolSize, int gpuId) { * * @param poolSize size of the pool to initialize. * @param gpuId gpu id to set to get memory pool from, -1 means to use default - * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuIO for host memory + * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuDF for pinned memory */ public static synchronized void initialize(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) { if (isInitialized()) { From 2dbb68f9d66e752eef4015082d2c877b8145e068 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 6 Jun 2024 09:52:44 -0700 Subject: [PATCH 15/31] available_device_memory --- cpp/src/utilities/pinned_memory.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 47c09217363..d0709c6a40e 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -142,8 +142,7 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( if (config_size.has_value()) { return *config_size; } - size_t free{}, total{}; - CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total)); + auto const total = rmm::available_device_memory().second; // 0.5% of the total device memory, capped at 100MB return std::min(total / 200, size_t{100} * 1024 * 1024); }(); From cb9cc228bdc8fa74b5a517a149b11f7e6201d71b Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 6 Jun 2024 10:58:48 -0700 Subject: [PATCH 16/31] reviews --- cpp/src/utilities/pinned_memory.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index d0709c6a40e..53e0d10c6f4 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -83,7 +83,7 @@ class fixed_pinned_pool_memory_resource { std::size_t alignment, cuda::stream_ref stream) noexcept { - if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) { + if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) { pool_->deallocate_async(ptr, bytes, alignment, stream); } else { upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); @@ -113,13 +113,13 @@ class fixed_pinned_pool_memory_resource { return !operator==(other); } - [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, - cuda::mr::device_accessible) noexcept + friend void get_property(fixed_pinned_pool_memory_resource const&, + cuda::mr::device_accessible) noexcept { } - [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&, - cuda::mr::host_accessible) noexcept + friend void get_property(fixed_pinned_pool_memory_resource const&, + cuda::mr::host_accessible) noexcept { } }; @@ -148,7 +148,7 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( }(); // rmm requires the pool size to be a multiple of 256 bytes - auto const aligned_size = (size + 255) & ~255; + auto const aligned_size = rmm::align_up(size, rmm::RMM_DEFAULT_HOST_ALIGNMENT); CUDF_LOG_INFO("Pinned pool size = {}", aligned_size); // make the pool with max size equal to the initial size From cf67a14795017eb4a0835bd727acebfff0a066f9 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 6 Jun 2024 11:01:20 -0700 Subject: [PATCH 17/31] expand anon namespace --- cpp/src/utilities/pinned_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 53e0d10c6f4..9cebf980d00 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -129,8 +129,6 @@ static_assert(cuda::mr::resource_with, ""); -} // namespace - CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( std::optional config_size) { @@ -190,6 +188,8 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr() return mr_ref; } +} // namespace + rmm::host_device_async_resource_ref set_pinned_memory_resource( rmm::host_device_async_resource_ref mr) { From 24c15498b9ad53ec452a99b94fb767b90f4551a0 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 11:07:20 -0700 Subject: [PATCH 18/31] host_uvector --- cpp/benchmarks/io/cuio_common.cpp | 9 +- cpp/benchmarks/io/cuio_common.hpp | 4 +- .../cudf/detail/utilities/host_uvector.hpp | 142 ++++++++++++++ .../cudf/detail/utilities/host_vector.hpp | 183 ------------------ .../detail/utilities/vector_factories.hpp | 12 +- cpp/include/cudf/utilities/span.hpp | 6 + cpp/src/io/text/bgzip_data_chunk_source.cu | 10 +- .../io/text/data_chunk_source_factories.cpp | 8 +- cpp/src/io/utilities/hostdevice_vector.hpp | 4 +- 9 files changed, 172 insertions(+), 206 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/host_uvector.hpp delete mode 100644 cpp/include/cudf/detail/utilities/host_vector.hpp diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 45dc812e247..09d7d8a9db6 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -14,13 +14,14 @@ * limitations under the License. */ -#include -#include - #include #include #include +#include + +#include +#include #include @@ -52,7 +53,7 @@ std::string random_file_in_dir(std::string const& dir_path) cuio_source_sink_pair::cuio_source_sink_pair(io_type type) : type{type}, - pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}), + pinned_buffer(0, pinned_memory_resource(), cudf::get_default_stream()), d_buffer{0, cudf::get_default_stream()}, file_name{random_file_in_dir(tmpdir.path())}, void_sink{cudf::io::data_sink::create()} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 64d6021cf50..020fd7e00c1 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -79,7 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; - cudf::detail::host_vector pinned_buffer; + cudf::detail::host_uvector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp new file mode 100644 index 00000000000..39bde04e985 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp @@ -0,0 +1,142 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace cudf::detail { + +template +class host_uvector { + public: + host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) + : _size{size}, _capacity{size}, _mr{mr}, _stream{stream} + { + if (_size != 0) { _data = static_cast(mr.allocate_async(_size * sizeof(T), _stream)); } + } + + host_uvector(host_uvector const&) = delete; + host_uvector(host_uvector&& other) + : _data{other._data}, + _size{other._size}, + _capacity{other._capacity}, + _mr{other._mr}, + _stream{other._stream} + { + other._data = nullptr; + other._size = 0; + other._capacity = 0; + } + + host_uvector& operator=(host_uvector const&) = delete; + host_uvector& operator=(host_uvector&& other) + { + if (this != &other) { + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } + _data = other._data; + _size = other._size; + _capacity = other._capacity; + _mr = other._mr; + _stream = other._stream; + other._data = nullptr; + other._size = 0; + other._capacity = 0; + } + return *this; + } + + ~host_uvector() + { + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } + } + + void resize(std::size_t new_size) + { + if (new_size > _capacity) { + auto new_data = static_cast(_mr.allocate_async(new_size * sizeof(T), _stream)); + _stream.synchronize(); + if (_data != nullptr) { + std::copy(_data, _data + _size, new_data); + _mr.deallocate_async(_data, _size * sizeof(T), _stream); + } + _data = new_data; + _capacity = new_size; + } + _size = new_size; + } + + void reserve(std::size_t new_capacity) + { + if (new_capacity > _capacity) { + auto new_data = static_cast(_mr.allocate_async(new_capacity * sizeof(T), _stream)); + _stream.synchronize(); + if (_data != nullptr) { + std::copy(_data, _data + _size, new_data); + _mr.deallocate_async(_data, _size * sizeof(T), _stream); + } + _data = new_data; + _capacity = new_capacity; + } + } + + void push_back(T const& value) + { + if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); } + _data[_size++] = value; + } + + void clear() { _size = 0; } + + [[nodiscard]] std::size_t size() const { return _size; } + [[nodiscard]] std::int64_t ssize() const { return _size; } + [[nodiscard]] bool is_empty() const { return _size == 0; } + [[nodiscard]] std::size_t capacity() const { return _capacity; } + + [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; } + [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; } + + [[nodiscard]] T* data() { return _data; } + [[nodiscard]] T const* data() const { return _data; } + + [[nodiscard]] T& front() { return _data[0]; } + [[nodiscard]] T const& front() const { return _data[0]; } + + [[nodiscard]] T& back() { return _data[_size - 1]; } + [[nodiscard]] T const& back() const { return _data[_size - 1]; } + + [[nodiscard]] T* begin() { return _data; } + [[nodiscard]] T const* begin() const { return _data; } + + [[nodiscard]] T* end() { return _data + _size; } + [[nodiscard]] T const* end() const { return _data + _size; } + + [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; } + [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; } + + private: + T* _data{nullptr}; + std::size_t _size; + std::size_t _capacity; + rmm::host_async_resource_ref _mr; + rmm::cuda_stream_view _stream; +}; + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp deleted file mode 100644 index e62c8017f8b..00000000000 --- a/cpp/include/cudf/detail/utilities/host_vector.hpp +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright 2024 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include -#include - -#include - -#include -#include -#include // for bad_alloc - -namespace cudf::detail { - -/*! \p rmm_host_allocator is a CUDA-specific host memory allocator - * that employs \c a `rmm::host_async_resource_ref` for allocation. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class rmm_host_allocator; - -/*! \p rmm_host_allocator is a CUDA-specific host memory allocator - * that employs \c an `cudf::host_async_resource_ref` for allocation. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template <> -class rmm_host_allocator { - public: - using value_type = void; ///< The type of the elements in the allocator - using pointer = void*; ///< The type returned by address() / allocate() - using const_pointer = void const*; ///< The type returned by address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - /** - * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` - */ - template - struct rebind { - using other = rmm_host_allocator; ///< The rebound type - }; -}; - -/*! \p rmm_host_allocator is a CUDA-specific host memory allocator - * that employs \c `rmm::host_async_resource_ref` for allocation. - * - * The \p rmm_host_allocator provides an interface for host memory allocation through the user - * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of - * this reference and therefore it is the user's responsibility to ensure its lifetime for the - * duration of the lifetime of the \p rmm_host_allocator. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class rmm_host_allocator { - public: - using value_type = T; ///< The type of the elements in the allocator - using pointer = T*; ///< The type returned by address() / allocate() - using const_pointer = T const*; ///< The type returned by address() - using reference = T&; ///< The parameter type for address() - using const_reference = T const&; ///< The parameter type for address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - typedef cuda::std::true_type propagate_on_container_move_assignment; - - /** - * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` - */ - template - struct rebind { - using other = rmm_host_allocator; ///< The rebound type - }; - - /** - * @brief Cannot declare an empty host allocator. - */ - rmm_host_allocator() = delete; - - /** - * @brief Construct from a `cudf::host_async_resource_ref` - */ - rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream) - : mr(_mr), stream(_stream) - { - } - - /** - * @brief This method allocates storage for objects in host memory. - * - * @param cnt The number of objects to allocate. - * @return a \c pointer to the newly allocated objects. - * @note This method does not invoke \p value_type's constructor. - * It is the responsibility of the caller to initialize the - * objects at the returned \c pointer. - */ - inline pointer allocate(size_type cnt) - { - if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - return static_cast( - mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream)); - } - - /** - * @brief This method deallocates host memory previously allocated - * with this \c rmm_host_allocator. - * - * @param p A \c pointer to the previously allocated memory. - * @note The second parameter is the number of objects previously allocated. - * @note This method does not invoke \p value_type's destructor. - * It is the responsibility of the caller to destroy - * the objects stored at \p p. - */ - inline void deallocate(pointer p, size_type cnt) - { - mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - } - - /** - * @brief This method returns the maximum size of the \c cnt parameter - * accepted by the \p allocate() method. - * - * @return The maximum number of objects that may be allocated - * by a single call to \p allocate(). - */ - constexpr inline size_type max_size() const - { - return (std::numeric_limits::max)() / sizeof(T); - } - - /** - * @brief This method tests this \p rmm_host_allocator for equality to - * another. - * - * @param x The other \p rmm_host_allocator of interest. - * @return This method always returns \c true. - */ - inline bool operator==(rmm_host_allocator const& x) const - { - return x.mr == mr && x.stream == stream; - } - - /** - * @brief This method tests this \p rmm_host_allocator for inequality - * to another. - * - * @param x The other \p rmm_host_allocator of interest. - * @return This method always returns \c false. - */ - inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); } - - private: - rmm::host_async_resource_ref mr; - rmm::cuda_stream_view stream; -}; - -/** - * @brief A vector class with rmm host memory allocator - */ -template -using host_vector = thrust::host_vector>; - -} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 06dfcbfc5e5..f67b671c610 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,7 +21,7 @@ * @file vector_factories.hpp */ -#include +#include #include #include #include @@ -442,7 +442,7 @@ thrust::host_vector make_host_vector_sync( } /** - * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size + * @brief Asynchronously construct a pinned `cudf::detail::host_uvector` of the given size * * @note This function does not synchronize `stream`. * @@ -452,13 +452,13 @@ thrust::host_vector make_host_vector_sync( * @return A host_vector of the given size */ template -host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) +host_uvector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) { - return host_vector(size, {cudf::get_pinned_memory_resource(), stream}); + return host_uvector(size, cudf::get_pinned_memory_resource(), stream); } /** - * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size + * @brief Synchronously construct a pinned `cudf::detail::host_uvector` of the given size * * @note This function synchronizes `stream`. * @@ -468,7 +468,7 @@ host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view strea * @return A host_vector of the given size */ template -host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) +host_uvector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) { auto result = make_pinned_vector_async(size, stream); stream.synchronize(); diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 47e92d61a9f..873d3e56acb 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -212,6 +214,10 @@ template struct is_host_span_supported_container< // thrust::host_vector> : std::true_type {}; +template +struct is_host_span_supported_container< // + cudf::detail::host_uvector> : std::true_type {}; + template struct is_host_span_supported_container< // std::basic_string, Alloc>> : std::true_type {}; diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 0e3ce779089..b7644a6fb9f 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -19,7 +19,7 @@ #include "io/utilities/config_utils.hpp" #include -#include +#include #include #include #include @@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: template - static void copy_to_device(cudf::detail::host_vector const& host, + static void copy_to_device(cudf::detail::host_uvector const& host, rmm::device_uvector& device, rmm::cuda_stream_view stream) { @@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { 1 << 16; // 64k offset allocation, resized on demand cudaEvent_t event; - cudf::detail::host_vector h_compressed_blocks; - cudf::detail::host_vector h_compressed_offsets; - cudf::detail::host_vector h_decompressed_offsets; + cudf::detail::host_uvector h_compressed_blocks; + cudf::detail::host_uvector h_compressed_offsets; + cudf::detail::host_uvector h_decompressed_offsets; rmm::device_uvector d_compressed_blocks; rmm::device_uvector d_decompressed_blocks; rmm::device_uvector d_compressed_offsets; diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 45096b7155c..2c4160e48c5 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -17,7 +17,7 @@ #include "io/text/device_data_chunks.hpp" #include -#include +#include #include #include @@ -33,7 +33,7 @@ namespace { struct host_ticket { cudaEvent_t event; - std::unique_ptr> buffer; + std::unique_ptr> buffer; }; /** @@ -86,7 +86,7 @@ class datasource_chunk_reader : public data_chunk_reader { // resize the host buffer as necessary to contain the requested number of bytes if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( + h_ticket.buffer = std::make_unique>( cudf::detail::make_pinned_vector_sync(read_size, stream)); } @@ -153,7 +153,7 @@ class istream_data_chunk_reader : public data_chunk_reader { // resize the host buffer as necessary to contain the requested number of bytes if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( + h_ticket.buffer = std::make_unique>( cudf::detail::make_pinned_vector_sync(read_size, stream)); } diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 1ae27a2f4ae..ae2ab03ded3 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -18,7 +18,7 @@ #include "hostdevice_span.hpp" -#include +#include #include #include #include @@ -172,7 +172,7 @@ class hostdevice_vector { } private: - cudf::detail::host_vector h_data; + cudf::detail::host_uvector h_data; rmm::device_uvector d_data; }; From 075deca7c87b70b62f30a5b8a266da39a3e852cb Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 11:33:10 -0700 Subject: [PATCH 19/31] style --- cpp/include/cudf/utilities/span.hpp | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 873d3e56acb..2f622612209 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -204,23 +204,28 @@ class span_base { // ===== host_span ================================================================================= template -struct is_host_span_supported_container : std::false_type {}; +struct is_host_span_supported_container : std::false_type { +}; template struct is_host_span_supported_container< // - std::vector> : std::true_type {}; + std::vector> : std::true_type { +}; template struct is_host_span_supported_container< // - thrust::host_vector> : std::true_type {}; + thrust::host_vector> : std::true_type { +}; template struct is_host_span_supported_container< // - cudf::detail::host_uvector> : std::true_type {}; + cudf::detail::host_uvector> : std::true_type { +}; template struct is_host_span_supported_container< // - std::basic_string, Alloc>> : std::true_type {}; + std::basic_string, Alloc>> : std::true_type { +}; /** * @brief C++20 std::span with reduced feature set. @@ -275,19 +280,23 @@ struct host_span : public cudf::detail::span_base -struct is_device_span_supported_container : std::false_type {}; +struct is_device_span_supported_container : std::false_type { +}; template struct is_device_span_supported_container< // - thrust::device_vector> : std::true_type {}; + thrust::device_vector> : std::true_type { +}; template struct is_device_span_supported_container< // - rmm::device_vector> : std::true_type {}; + rmm::device_vector> : std::true_type { +}; template struct is_device_span_supported_container< // - rmm::device_uvector> : std::true_type {}; + rmm::device_uvector> : std::true_type { +}; /** * @brief Device version of C++20 std::span with reduced feature set. From 164fce20ad07632b5a9899668d9da7d23ced6b97 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 11:53:00 -0700 Subject: [PATCH 20/31] docs; prefixes --- cpp/src/utilities/pinned_memory.cpp | 43 ++++++++++++++++------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 9cebf980d00..85d4b7e2283 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -28,34 +28,39 @@ namespace cudf { namespace { + +// Asynchronous memory resource that allocates a fixed-size pool of pinned memory and falls back to +// additional pinned allocations if the pool is exhausted. class fixed_pinned_pool_memory_resource { using upstream_mr = rmm::mr::pinned_host_memory_resource; using host_pooled_mr = rmm::mr::pool_memory_resource; private: - upstream_mr upstream_mr_{}; - size_t pool_size_{0}; + upstream_mr _upstream_mr{}; + size_t _pool_size{0}; // Raw pointer to avoid a segfault when the pool is destroyed on exit host_pooled_mr* pool_{nullptr}; - void* pool_begin_{nullptr}; - void* pool_end_{nullptr}; - cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()}; + // The beginning and end of the pool memory range; pool is never reallocated so these are constant + // and can be used to determine if a pointer is within the pool + void* _pool_begin{nullptr}; + void* _pool_end{nullptr}; + cuda::stream_ref _stream{cudf::detail::global_cuda_stream_pool().get_stream().value()}; public: fixed_pinned_pool_memory_resource(size_t size) - : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)} + : _pool_size{size}, pool_{new host_pooled_mr(_upstream_mr, size, size)} { - if (pool_size_ == 0) { return; } + if (_pool_size == 0) { return; } // Allocate full size from the pinned pool to figure out the beginning and end address - pool_begin_ = pool_->allocate_async(pool_size_, stream_); - pool_end_ = static_cast(static_cast(pool_begin_) + pool_size_); - pool_->deallocate_async(pool_begin_, pool_size_, stream_); + _pool_begin = pool_->allocate_async(_pool_size, _stream); + _pool_end = static_cast(static_cast(_pool_begin) + _pool_size); + pool_->deallocate_async(_pool_begin, _pool_size, _stream); } void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) { - if (bytes <= pool_size_) { + if (bytes <= _pool_size) { try { return pool_->allocate_async(bytes, alignment, stream); } catch (...) { @@ -63,7 +68,7 @@ class fixed_pinned_pool_memory_resource { } } - return upstream_mr_.allocate_async(bytes, alignment, stream); + return _upstream_mr.allocate_async(bytes, alignment, stream); } void* allocate_async(std::size_t bytes, cuda::stream_ref stream) @@ -73,8 +78,8 @@ class fixed_pinned_pool_memory_resource { void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - auto const result = allocate_async(bytes, alignment, stream_); - stream_.wait(); + auto const result = allocate_async(bytes, alignment, _stream); + _stream.wait(); return result; } @@ -83,10 +88,10 @@ class fixed_pinned_pool_memory_resource { std::size_t alignment, cuda::stream_ref stream) noexcept { - if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) { + if (bytes <= _pool_size && ptr >= _pool_begin && ptr < _pool_end) { pool_->deallocate_async(ptr, bytes, alignment, stream); } else { - upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); + _upstream_mr.deallocate_async(ptr, bytes, alignment, stream); } } @@ -99,13 +104,13 @@ class fixed_pinned_pool_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - deallocate_async(ptr, bytes, alignment, stream_); - stream_.wait(); + deallocate_async(ptr, bytes, alignment, _stream); + _stream.wait(); } bool operator==(fixed_pinned_pool_memory_resource const& other) const { - return pool_ == other.pool_ and stream_ == other.stream_; + return pool_ == other.pool_ and _stream == other._stream; } bool operator!=(fixed_pinned_pool_memory_resource const& other) const From b566babb87696cf54656605ab76e9e25b5c42bed Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 12:00:40 -0700 Subject: [PATCH 21/31] type aliases in host_uvector --- .../cudf/detail/utilities/host_uvector.hpp | 69 +++++++++++-------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp index 39bde04e985..c8166217a73 100644 --- a/cpp/include/cudf/detail/utilities/host_uvector.hpp +++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp @@ -27,10 +27,21 @@ namespace cudf::detail { template class host_uvector { public: - host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) + using value_type = T; + using size_type = std::size_t; + using reference = value_type&; + using const_reference = value_type const&; + using pointer = value_type*; + using const_pointer = value_type const*; + using iterator = pointer; + using const_iterator = const_pointer; + + host_uvector(size_type size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) : _size{size}, _capacity{size}, _mr{mr}, _stream{stream} { - if (_size != 0) { _data = static_cast(mr.allocate_async(_size * sizeof(T), _stream)); } + if (_size != 0) { + _data = static_cast(mr.allocate_async(_size * sizeof(value_type), _stream)); + } } host_uvector(host_uvector const&) = delete; @@ -50,7 +61,7 @@ class host_uvector { host_uvector& operator=(host_uvector&& other) { if (this != &other) { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } _data = other._data; _size = other._size; _capacity = other._capacity; @@ -65,17 +76,18 @@ class host_uvector { ~host_uvector() { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } } - void resize(std::size_t new_size) + void resize(size_type new_size) { if (new_size > _capacity) { - auto new_data = static_cast(_mr.allocate_async(new_size * sizeof(T), _stream)); + auto new_data = + static_cast(_mr.allocate_async(new_size * sizeof(value_type), _stream)); _stream.synchronize(); if (_data != nullptr) { std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(T), _stream); + _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } _data = new_data; _capacity = new_size; @@ -83,21 +95,22 @@ class host_uvector { _size = new_size; } - void reserve(std::size_t new_capacity) + void reserve(size_type new_capacity) { if (new_capacity > _capacity) { - auto new_data = static_cast(_mr.allocate_async(new_capacity * sizeof(T), _stream)); + auto new_data = + static_cast(_mr.allocate_async(new_capacity * sizeof(value_type), _stream)); _stream.synchronize(); if (_data != nullptr) { std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(T), _stream); + _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } _data = new_data; _capacity = new_capacity; } } - void push_back(T const& value) + void push_back(const_reference value) { if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); } _data[_size++] = value; @@ -105,36 +118,36 @@ class host_uvector { void clear() { _size = 0; } - [[nodiscard]] std::size_t size() const { return _size; } + [[nodiscard]] size_type size() const { return _size; } [[nodiscard]] std::int64_t ssize() const { return _size; } [[nodiscard]] bool is_empty() const { return _size == 0; } - [[nodiscard]] std::size_t capacity() const { return _capacity; } + [[nodiscard]] size_type capacity() const { return _capacity; } - [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; } - [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; } + [[nodiscard]] reference operator[](size_type idx) { return _data[idx]; } + [[nodiscard]] const_reference operator[](size_type idx) const { return _data[idx]; } - [[nodiscard]] T* data() { return _data; } - [[nodiscard]] T const* data() const { return _data; } + [[nodiscard]] pointer data() { return _data; } + [[nodiscard]] const_pointer data() const { return _data; } - [[nodiscard]] T& front() { return _data[0]; } - [[nodiscard]] T const& front() const { return _data[0]; } + [[nodiscard]] reference front() { return _data[0]; } + [[nodiscard]] const_reference front() const { return _data[0]; } - [[nodiscard]] T& back() { return _data[_size - 1]; } - [[nodiscard]] T const& back() const { return _data[_size - 1]; } + [[nodiscard]] reference back() { return _data[_size - 1]; } + [[nodiscard]] const_reference back() const { return _data[_size - 1]; } - [[nodiscard]] T* begin() { return _data; } - [[nodiscard]] T const* begin() const { return _data; } + [[nodiscard]] iterator begin() { return _data; } + [[nodiscard]] const_iterator begin() const { return _data; } - [[nodiscard]] T* end() { return _data + _size; } - [[nodiscard]] T const* end() const { return _data + _size; } + [[nodiscard]] iterator end() { return _data + _size; } + [[nodiscard]] const_iterator end() const { return _data + _size; } [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; } [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; } private: - T* _data{nullptr}; - std::size_t _size; - std::size_t _capacity; + pointer _data{nullptr}; + size_type _size; + size_type _capacity; rmm::host_async_resource_ref _mr; rmm::cuda_stream_view _stream; }; From 21edb534a15c836963c116f3c9ca360cadb1844c Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 13:24:37 -0700 Subject: [PATCH 22/31] refactor host_ticket --- .../io/text/data_chunk_source_factories.cpp | 56 +++++++------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 2c4160e48c5..39e955232e3 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cudf/utilities/default_stream.hpp" #include "io/text/device_data_chunks.hpp" #include @@ -32,8 +33,15 @@ namespace cudf::io::text { namespace { struct host_ticket { - cudaEvent_t event; - std::unique_ptr> buffer; + cudaEvent_t event{}; // tracks the completion of the last device-to-host copy. + cudf::detail::host_uvector buffer; + + host_ticket() : buffer{cudf::detail::make_pinned_vector_sync(0, cudf::get_default_stream())} + { + cudaEventCreate(&event); + } + + ~host_ticket() { cudaEventDestroy(event); } }; /** @@ -44,20 +52,7 @@ class datasource_chunk_reader : public data_chunk_reader { constexpr static int num_tickets = 2; public: - datasource_chunk_reader(datasource* source) : _source(source) - { - // create an event to track the completion of the last device-to-host copy. - for (auto& ticket : _tickets) { - CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event))); - } - } - - ~datasource_chunk_reader() override - { - for (auto& ticket : _tickets) { - CUDF_CUDA_TRY(cudaEventDestroy(ticket.event)); - } - } + datasource_chunk_reader(datasource* source) : _source(source) {} void skip_bytes(std::size_t size) override { @@ -85,16 +80,15 @@ class datasource_chunk_reader : public data_chunk_reader { CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event)); // resize the host buffer as necessary to contain the requested number of bytes - if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( - cudf::detail::make_pinned_vector_sync(read_size, stream)); + if (h_ticket.buffer.size() < read_size) { + h_ticket.buffer = cudf::detail::make_pinned_vector_sync(read_size, stream); } - _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer->data())); + _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer.data())); // copy the host-pinned data on to device CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value())); + chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -124,17 +118,6 @@ class istream_data_chunk_reader : public data_chunk_reader { istream_data_chunk_reader(std::unique_ptr datastream) : _datastream(std::move(datastream)) { - // create an event to track the completion of the last device-to-host copy. - for (auto& ticket : _tickets) { - CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event))); - } - } - - ~istream_data_chunk_reader() override - { - for (auto& ticket : _tickets) { - CUDF_CUDA_TRY(cudaEventDestroy(ticket.event)); - } } void skip_bytes(std::size_t size) override { _datastream->ignore(size); }; @@ -152,13 +135,12 @@ class istream_data_chunk_reader : public data_chunk_reader { CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event)); // resize the host buffer as necessary to contain the requested number of bytes - if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) { - h_ticket.buffer = std::make_unique>( - cudf::detail::make_pinned_vector_sync(read_size, stream)); + if (h_ticket.buffer.size() < read_size) { + h_ticket.buffer = cudf::detail::make_pinned_vector_sync(read_size, stream); } // read data from the host istream in to the pinned host memory buffer - _datastream->read(h_ticket.buffer->data(), read_size); + _datastream->read(h_ticket.buffer.data(), read_size); // adjust the read size to reflect how many bytes were actually read from the data stream read_size = _datastream->gcount(); @@ -168,7 +150,7 @@ class istream_data_chunk_reader : public data_chunk_reader { // copy the host-pinned data on to device CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value())); + chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); From 3814797d5b2d3478901e14f9ecbb733d2168a06a Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 7 Jun 2024 13:33:50 -0700 Subject: [PATCH 23/31] style --- cpp/include/cudf/utilities/span.hpp | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 2f622612209..6deef974c0e 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -204,28 +204,23 @@ class span_base { // ===== host_span ================================================================================= template -struct is_host_span_supported_container : std::false_type { -}; +struct is_host_span_supported_container : std::false_type {}; template struct is_host_span_supported_container< // - std::vector> : std::true_type { -}; + std::vector> : std::true_type {}; template struct is_host_span_supported_container< // - thrust::host_vector> : std::true_type { -}; + thrust::host_vector> : std::true_type {}; template struct is_host_span_supported_container< // - cudf::detail::host_uvector> : std::true_type { -}; + cudf::detail::host_uvector> : std::true_type {}; template struct is_host_span_supported_container< // - std::basic_string, Alloc>> : std::true_type { -}; + std::basic_string, Alloc>> : std::true_type {}; /** * @brief C++20 std::span with reduced feature set. @@ -280,23 +275,19 @@ struct host_span : public cudf::detail::span_base -struct is_device_span_supported_container : std::false_type { -}; +struct is_device_span_supported_container : std::false_type {}; template struct is_device_span_supported_container< // - thrust::device_vector> : std::true_type { -}; + thrust::device_vector> : std::true_type {}; template struct is_device_span_supported_container< // - rmm::device_vector> : std::true_type { -}; + rmm::device_vector> : std::true_type {}; template struct is_device_span_supported_container< // - rmm::device_uvector> : std::true_type { -}; + rmm::device_uvector> : std::true_type {}; /** * @brief Device version of C++20 std::span with reduced feature set. From f7999aae606269e187de88279f96d5034ad48753 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 16:58:00 -0700 Subject: [PATCH 24/31] Revert "type aliases in host_uvector" This reverts commit b566babb87696cf54656605ab76e9e25b5c42bed. --- .../cudf/detail/utilities/host_uvector.hpp | 69 ++++++++----------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp index c8166217a73..39bde04e985 100644 --- a/cpp/include/cudf/detail/utilities/host_uvector.hpp +++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp @@ -27,21 +27,10 @@ namespace cudf::detail { template class host_uvector { public: - using value_type = T; - using size_type = std::size_t; - using reference = value_type&; - using const_reference = value_type const&; - using pointer = value_type*; - using const_pointer = value_type const*; - using iterator = pointer; - using const_iterator = const_pointer; - - host_uvector(size_type size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) + host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) : _size{size}, _capacity{size}, _mr{mr}, _stream{stream} { - if (_size != 0) { - _data = static_cast(mr.allocate_async(_size * sizeof(value_type), _stream)); - } + if (_size != 0) { _data = static_cast(mr.allocate_async(_size * sizeof(T), _stream)); } } host_uvector(host_uvector const&) = delete; @@ -61,7 +50,7 @@ class host_uvector { host_uvector& operator=(host_uvector&& other) { if (this != &other) { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } _data = other._data; _size = other._size; _capacity = other._capacity; @@ -76,18 +65,17 @@ class host_uvector { ~host_uvector() { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); } + if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } } - void resize(size_type new_size) + void resize(std::size_t new_size) { if (new_size > _capacity) { - auto new_data = - static_cast(_mr.allocate_async(new_size * sizeof(value_type), _stream)); + auto new_data = static_cast(_mr.allocate_async(new_size * sizeof(T), _stream)); _stream.synchronize(); if (_data != nullptr) { std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); + _mr.deallocate_async(_data, _size * sizeof(T), _stream); } _data = new_data; _capacity = new_size; @@ -95,22 +83,21 @@ class host_uvector { _size = new_size; } - void reserve(size_type new_capacity) + void reserve(std::size_t new_capacity) { if (new_capacity > _capacity) { - auto new_data = - static_cast(_mr.allocate_async(new_capacity * sizeof(value_type), _stream)); + auto new_data = static_cast(_mr.allocate_async(new_capacity * sizeof(T), _stream)); _stream.synchronize(); if (_data != nullptr) { std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); + _mr.deallocate_async(_data, _size * sizeof(T), _stream); } _data = new_data; _capacity = new_capacity; } } - void push_back(const_reference value) + void push_back(T const& value) { if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); } _data[_size++] = value; @@ -118,36 +105,36 @@ class host_uvector { void clear() { _size = 0; } - [[nodiscard]] size_type size() const { return _size; } + [[nodiscard]] std::size_t size() const { return _size; } [[nodiscard]] std::int64_t ssize() const { return _size; } [[nodiscard]] bool is_empty() const { return _size == 0; } - [[nodiscard]] size_type capacity() const { return _capacity; } + [[nodiscard]] std::size_t capacity() const { return _capacity; } - [[nodiscard]] reference operator[](size_type idx) { return _data[idx]; } - [[nodiscard]] const_reference operator[](size_type idx) const { return _data[idx]; } + [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; } + [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; } - [[nodiscard]] pointer data() { return _data; } - [[nodiscard]] const_pointer data() const { return _data; } + [[nodiscard]] T* data() { return _data; } + [[nodiscard]] T const* data() const { return _data; } - [[nodiscard]] reference front() { return _data[0]; } - [[nodiscard]] const_reference front() const { return _data[0]; } + [[nodiscard]] T& front() { return _data[0]; } + [[nodiscard]] T const& front() const { return _data[0]; } - [[nodiscard]] reference back() { return _data[_size - 1]; } - [[nodiscard]] const_reference back() const { return _data[_size - 1]; } + [[nodiscard]] T& back() { return _data[_size - 1]; } + [[nodiscard]] T const& back() const { return _data[_size - 1]; } - [[nodiscard]] iterator begin() { return _data; } - [[nodiscard]] const_iterator begin() const { return _data; } + [[nodiscard]] T* begin() { return _data; } + [[nodiscard]] T const* begin() const { return _data; } - [[nodiscard]] iterator end() { return _data + _size; } - [[nodiscard]] const_iterator end() const { return _data + _size; } + [[nodiscard]] T* end() { return _data + _size; } + [[nodiscard]] T const* end() const { return _data + _size; } [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; } [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; } private: - pointer _data{nullptr}; - size_type _size; - size_type _capacity; + T* _data{nullptr}; + std::size_t _size; + std::size_t _capacity; rmm::host_async_resource_ref _mr; rmm::cuda_stream_view _stream; }; From c9a82d010a997d4c1f4afad94b36709e859d98fe Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 16:58:14 -0700 Subject: [PATCH 25/31] Revert "docs; prefixes" This reverts commit 164fce20ad07632b5a9899668d9da7d23ced6b97. --- cpp/src/utilities/pinned_memory.cpp | 43 +++++++++++++---------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 85d4b7e2283..9cebf980d00 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -28,39 +28,34 @@ namespace cudf { namespace { - -// Asynchronous memory resource that allocates a fixed-size pool of pinned memory and falls back to -// additional pinned allocations if the pool is exhausted. class fixed_pinned_pool_memory_resource { using upstream_mr = rmm::mr::pinned_host_memory_resource; using host_pooled_mr = rmm::mr::pool_memory_resource; private: - upstream_mr _upstream_mr{}; - size_t _pool_size{0}; + upstream_mr upstream_mr_{}; + size_t pool_size_{0}; // Raw pointer to avoid a segfault when the pool is destroyed on exit host_pooled_mr* pool_{nullptr}; - // The beginning and end of the pool memory range; pool is never reallocated so these are constant - // and can be used to determine if a pointer is within the pool - void* _pool_begin{nullptr}; - void* _pool_end{nullptr}; - cuda::stream_ref _stream{cudf::detail::global_cuda_stream_pool().get_stream().value()}; + void* pool_begin_{nullptr}; + void* pool_end_{nullptr}; + cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()}; public: fixed_pinned_pool_memory_resource(size_t size) - : _pool_size{size}, pool_{new host_pooled_mr(_upstream_mr, size, size)} + : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)} { - if (_pool_size == 0) { return; } + if (pool_size_ == 0) { return; } // Allocate full size from the pinned pool to figure out the beginning and end address - _pool_begin = pool_->allocate_async(_pool_size, _stream); - _pool_end = static_cast(static_cast(_pool_begin) + _pool_size); - pool_->deallocate_async(_pool_begin, _pool_size, _stream); + pool_begin_ = pool_->allocate_async(pool_size_, stream_); + pool_end_ = static_cast(static_cast(pool_begin_) + pool_size_); + pool_->deallocate_async(pool_begin_, pool_size_, stream_); } void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream) { - if (bytes <= _pool_size) { + if (bytes <= pool_size_) { try { return pool_->allocate_async(bytes, alignment, stream); } catch (...) { @@ -68,7 +63,7 @@ class fixed_pinned_pool_memory_resource { } } - return _upstream_mr.allocate_async(bytes, alignment, stream); + return upstream_mr_.allocate_async(bytes, alignment, stream); } void* allocate_async(std::size_t bytes, cuda::stream_ref stream) @@ -78,8 +73,8 @@ class fixed_pinned_pool_memory_resource { void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { - auto const result = allocate_async(bytes, alignment, _stream); - _stream.wait(); + auto const result = allocate_async(bytes, alignment, stream_); + stream_.wait(); return result; } @@ -88,10 +83,10 @@ class fixed_pinned_pool_memory_resource { std::size_t alignment, cuda::stream_ref stream) noexcept { - if (bytes <= _pool_size && ptr >= _pool_begin && ptr < _pool_end) { + if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) { pool_->deallocate_async(ptr, bytes, alignment, stream); } else { - _upstream_mr.deallocate_async(ptr, bytes, alignment, stream); + upstream_mr_.deallocate_async(ptr, bytes, alignment, stream); } } @@ -104,13 +99,13 @@ class fixed_pinned_pool_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept { - deallocate_async(ptr, bytes, alignment, _stream); - _stream.wait(); + deallocate_async(ptr, bytes, alignment, stream_); + stream_.wait(); } bool operator==(fixed_pinned_pool_memory_resource const& other) const { - return pool_ == other.pool_ and _stream == other._stream; + return pool_ == other.pool_ and stream_ == other.stream_; } bool operator!=(fixed_pinned_pool_memory_resource const& other) const From 930efef8fcec62a4ac87a1f8faebab9783ccabd4 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 16:58:24 -0700 Subject: [PATCH 26/31] Revert "style" This reverts commit 075deca7c87b70b62f30a5b8a266da39a3e852cb. --- cpp/include/cudf/utilities/span.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 6deef974c0e..873d3e56acb 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 046694978dbe65ea515ad46b079ccbdcd9bc1206 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 16:59:50 -0700 Subject: [PATCH 27/31] Revert "host_uvector" This reverts commit 24c15498b9ad53ec452a99b94fb767b90f4551a0. --- cpp/benchmarks/io/cuio_common.cpp | 9 +- cpp/benchmarks/io/cuio_common.hpp | 4 +- .../cudf/detail/utilities/host_uvector.hpp | 142 -------------- .../cudf/detail/utilities/host_vector.hpp | 183 ++++++++++++++++++ .../detail/utilities/vector_factories.hpp | 12 +- cpp/include/cudf/utilities/span.hpp | 6 - cpp/src/io/text/bgzip_data_chunk_source.cu | 10 +- .../io/text/data_chunk_source_factories.cpp | 4 +- cpp/src/io/utilities/hostdevice_vector.hpp | 4 +- 9 files changed, 204 insertions(+), 170 deletions(-) delete mode 100644 cpp/include/cudf/detail/utilities/host_uvector.hpp create mode 100644 cpp/include/cudf/detail/utilities/host_vector.hpp diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 09d7d8a9db6..45dc812e247 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -14,14 +14,13 @@ * limitations under the License. */ +#include +#include + #include #include #include -#include - -#include -#include #include @@ -53,7 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path) cuio_source_sink_pair::cuio_source_sink_pair(io_type type) : type{type}, - pinned_buffer(0, pinned_memory_resource(), cudf::get_default_stream()), + pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}), d_buffer{0, cudf::get_default_stream()}, file_name{random_file_in_dir(tmpdir.path())}, void_sink{cudf::io::data_sink::create()} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 020fd7e00c1..64d6021cf50 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -79,7 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; - cudf::detail::host_uvector pinned_buffer; + cudf::detail::host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp deleted file mode 100644 index 39bde04e985..00000000000 --- a/cpp/include/cudf/detail/utilities/host_uvector.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2024 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include -#include - -namespace cudf::detail { - -template -class host_uvector { - public: - host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream) - : _size{size}, _capacity{size}, _mr{mr}, _stream{stream} - { - if (_size != 0) { _data = static_cast(mr.allocate_async(_size * sizeof(T), _stream)); } - } - - host_uvector(host_uvector const&) = delete; - host_uvector(host_uvector&& other) - : _data{other._data}, - _size{other._size}, - _capacity{other._capacity}, - _mr{other._mr}, - _stream{other._stream} - { - other._data = nullptr; - other._size = 0; - other._capacity = 0; - } - - host_uvector& operator=(host_uvector const&) = delete; - host_uvector& operator=(host_uvector&& other) - { - if (this != &other) { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } - _data = other._data; - _size = other._size; - _capacity = other._capacity; - _mr = other._mr; - _stream = other._stream; - other._data = nullptr; - other._size = 0; - other._capacity = 0; - } - return *this; - } - - ~host_uvector() - { - if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); } - } - - void resize(std::size_t new_size) - { - if (new_size > _capacity) { - auto new_data = static_cast(_mr.allocate_async(new_size * sizeof(T), _stream)); - _stream.synchronize(); - if (_data != nullptr) { - std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(T), _stream); - } - _data = new_data; - _capacity = new_size; - } - _size = new_size; - } - - void reserve(std::size_t new_capacity) - { - if (new_capacity > _capacity) { - auto new_data = static_cast(_mr.allocate_async(new_capacity * sizeof(T), _stream)); - _stream.synchronize(); - if (_data != nullptr) { - std::copy(_data, _data + _size, new_data); - _mr.deallocate_async(_data, _size * sizeof(T), _stream); - } - _data = new_data; - _capacity = new_capacity; - } - } - - void push_back(T const& value) - { - if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); } - _data[_size++] = value; - } - - void clear() { _size = 0; } - - [[nodiscard]] std::size_t size() const { return _size; } - [[nodiscard]] std::int64_t ssize() const { return _size; } - [[nodiscard]] bool is_empty() const { return _size == 0; } - [[nodiscard]] std::size_t capacity() const { return _capacity; } - - [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; } - [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; } - - [[nodiscard]] T* data() { return _data; } - [[nodiscard]] T const* data() const { return _data; } - - [[nodiscard]] T& front() { return _data[0]; } - [[nodiscard]] T const& front() const { return _data[0]; } - - [[nodiscard]] T& back() { return _data[_size - 1]; } - [[nodiscard]] T const& back() const { return _data[_size - 1]; } - - [[nodiscard]] T* begin() { return _data; } - [[nodiscard]] T const* begin() const { return _data; } - - [[nodiscard]] T* end() { return _data + _size; } - [[nodiscard]] T const* end() const { return _data + _size; } - - [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; } - [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; } - - private: - T* _data{nullptr}; - std::size_t _size; - std::size_t _capacity; - rmm::host_async_resource_ref _mr; - rmm::cuda_stream_view _stream; -}; - -} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp new file mode 100644 index 00000000000..e62c8017f8b --- /dev/null +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -0,0 +1,183 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +#include +#include +#include // for bad_alloc + +namespace cudf::detail { + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c a `rmm::host_async_resource_ref` for allocation. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template +class rmm_host_allocator; + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c an `cudf::host_async_resource_ref` for allocation. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template <> +class rmm_host_allocator { + public: + using value_type = void; ///< The type of the elements in the allocator + using pointer = void*; ///< The type returned by address() / allocate() + using const_pointer = void const*; ///< The type returned by address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + /** + * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` + */ + template + struct rebind { + using other = rmm_host_allocator; ///< The rebound type + }; +}; + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c `rmm::host_async_resource_ref` for allocation. + * + * The \p rmm_host_allocator provides an interface for host memory allocation through the user + * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of + * this reference and therefore it is the user's responsibility to ensure its lifetime for the + * duration of the lifetime of the \p rmm_host_allocator. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template +class rmm_host_allocator { + public: + using value_type = T; ///< The type of the elements in the allocator + using pointer = T*; ///< The type returned by address() / allocate() + using const_pointer = T const*; ///< The type returned by address() + using reference = T&; ///< The parameter type for address() + using const_reference = T const&; ///< The parameter type for address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + typedef cuda::std::true_type propagate_on_container_move_assignment; + + /** + * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` + */ + template + struct rebind { + using other = rmm_host_allocator; ///< The rebound type + }; + + /** + * @brief Cannot declare an empty host allocator. + */ + rmm_host_allocator() = delete; + + /** + * @brief Construct from a `cudf::host_async_resource_ref` + */ + rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream) + : mr(_mr), stream(_stream) + { + } + + /** + * @brief This method allocates storage for objects in host memory. + * + * @param cnt The number of objects to allocate. + * @return a \c pointer to the newly allocated objects. + * @note This method does not invoke \p value_type's constructor. + * It is the responsibility of the caller to initialize the + * objects at the returned \c pointer. + */ + inline pointer allocate(size_type cnt) + { + if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if + return static_cast( + mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream)); + } + + /** + * @brief This method deallocates host memory previously allocated + * with this \c rmm_host_allocator. + * + * @param p A \c pointer to the previously allocated memory. + * @note The second parameter is the number of objects previously allocated. + * @note This method does not invoke \p value_type's destructor. + * It is the responsibility of the caller to destroy + * the objects stored at \p p. + */ + inline void deallocate(pointer p, size_type cnt) + { + mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + } + + /** + * @brief This method returns the maximum size of the \c cnt parameter + * accepted by the \p allocate() method. + * + * @return The maximum number of objects that may be allocated + * by a single call to \p allocate(). + */ + constexpr inline size_type max_size() const + { + return (std::numeric_limits::max)() / sizeof(T); + } + + /** + * @brief This method tests this \p rmm_host_allocator for equality to + * another. + * + * @param x The other \p rmm_host_allocator of interest. + * @return This method always returns \c true. + */ + inline bool operator==(rmm_host_allocator const& x) const + { + return x.mr == mr && x.stream == stream; + } + + /** + * @brief This method tests this \p rmm_host_allocator for inequality + * to another. + * + * @param x The other \p rmm_host_allocator of interest. + * @return This method always returns \c false. + */ + inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); } + + private: + rmm::host_async_resource_ref mr; + rmm::cuda_stream_view stream; +}; + +/** + * @brief A vector class with rmm host memory allocator + */ +template +using host_vector = thrust::host_vector>; + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index f67b671c610..06dfcbfc5e5 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,7 +21,7 @@ * @file vector_factories.hpp */ -#include +#include #include #include #include @@ -442,7 +442,7 @@ thrust::host_vector make_host_vector_sync( } /** - * @brief Asynchronously construct a pinned `cudf::detail::host_uvector` of the given size + * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size * * @note This function does not synchronize `stream`. * @@ -452,13 +452,13 @@ thrust::host_vector make_host_vector_sync( * @return A host_vector of the given size */ template -host_uvector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) +host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) { - return host_uvector(size, cudf::get_pinned_memory_resource(), stream); + return host_vector(size, {cudf::get_pinned_memory_resource(), stream}); } /** - * @brief Synchronously construct a pinned `cudf::detail::host_uvector` of the given size + * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size * * @note This function synchronizes `stream`. * @@ -468,7 +468,7 @@ host_uvector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stre * @return A host_vector of the given size */ template -host_uvector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) +host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) { auto result = make_pinned_vector_async(size, stream); stream.synchronize(); diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 873d3e56acb..47e92d61a9f 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -16,8 +16,6 @@ #pragma once -#include - #include #include #include @@ -214,10 +212,6 @@ template struct is_host_span_supported_container< // thrust::host_vector> : std::true_type {}; -template -struct is_host_span_supported_container< // - cudf::detail::host_uvector> : std::true_type {}; - template struct is_host_span_supported_container< // std::basic_string, Alloc>> : std::true_type {}; diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index b7644a6fb9f..0e3ce779089 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -19,7 +19,7 @@ #include "io/utilities/config_utils.hpp" #include -#include +#include #include #include #include @@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: template - static void copy_to_device(cudf::detail::host_uvector const& host, + static void copy_to_device(cudf::detail::host_vector const& host, rmm::device_uvector& device, rmm::cuda_stream_view stream) { @@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { 1 << 16; // 64k offset allocation, resized on demand cudaEvent_t event; - cudf::detail::host_uvector h_compressed_blocks; - cudf::detail::host_uvector h_compressed_offsets; - cudf::detail::host_uvector h_decompressed_offsets; + cudf::detail::host_vector h_compressed_blocks; + cudf::detail::host_vector h_compressed_offsets; + cudf::detail::host_vector h_decompressed_offsets; rmm::device_uvector d_compressed_blocks; rmm::device_uvector d_decompressed_blocks; rmm::device_uvector d_compressed_offsets; diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 39e955232e3..596ca3458c8 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -18,7 +18,7 @@ #include "io/text/device_data_chunks.hpp" #include -#include +#include #include #include @@ -34,7 +34,7 @@ namespace { struct host_ticket { cudaEvent_t event{}; // tracks the completion of the last device-to-host copy. - cudf::detail::host_uvector buffer; + cudf::detail::host_vector buffer; host_ticket() : buffer{cudf::detail::make_pinned_vector_sync(0, cudf::get_default_stream())} { diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index ae2ab03ded3..1ae27a2f4ae 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -18,7 +18,7 @@ #include "hostdevice_span.hpp" -#include +#include #include #include #include @@ -172,7 +172,7 @@ class hostdevice_vector { } private: - cudf::detail::host_uvector h_data; + cudf::detail::host_vector h_data; rmm::device_uvector d_data; }; From f31221901aab0712b7e4e416c7454d4ef03a7019 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 17:38:31 -0700 Subject: [PATCH 28/31] make do without host_uvector --- cpp/include/cudf/detail/utilities/host_vector.hpp | 8 ++++++-- cpp/include/cudf/detail/utilities/vector_factories.hpp | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp index e62c8017f8b..756fdab177a 100644 --- a/cpp/include/cudf/detail/utilities/host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -117,8 +117,12 @@ class rmm_host_allocator { inline pointer allocate(size_type cnt) { if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - return static_cast( - mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream)); + auto const result = + mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + // synchronize to ensure the memory is allocated before thrust::host_vector initialization + // TODO: replace thrust::host_vector with a type that does not require synchronization + stream.synchronize(); + return static_cast(result); } /** diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 06dfcbfc5e5..20cb55bb1c7 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -444,7 +444,7 @@ thrust::host_vector make_host_vector_sync( /** * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size * - * @note This function does not synchronize `stream`. + * @note This function may not synchronize `stream`. * * @tparam T The type of the vector data * @param size The number of elements in the created vector From 7cfee0ab2d3bfc3b261edce2340555a69840ebcc Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 17:41:16 -0700 Subject: [PATCH 29/31] missed change --- cpp/include/cudf/detail/utilities/host_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp index 756fdab177a..6a115177ab5 100644 --- a/cpp/include/cudf/detail/utilities/host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -119,7 +119,7 @@ class rmm_host_allocator { if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if auto const result = mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); - // synchronize to ensure the memory is allocated before thrust::host_vector initialization + // Synchronize to ensure the memory is allocated before thrust::host_vector initialization // TODO: replace thrust::host_vector with a type that does not require synchronization stream.synchronize(); return static_cast(result); From fe4d668fb4e6dddf0a019e5443acf2ecc34ff0e8 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Jun 2024 17:54:30 -0700 Subject: [PATCH 30/31] style --- cpp/benchmarks/io/cuio_common.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 45dc812e247..645994f3f0d 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -14,14 +14,14 @@ * limitations under the License. */ -#include -#include - #include #include #include +#include +#include + #include #include From 2d63f5ab3826b3db7e83b99cac46d2aa15312eb6 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 11 Jun 2024 11:53:36 -0700 Subject: [PATCH 31/31] static assert message --- cpp/src/utilities/pinned_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp index 9cebf980d00..5d2e3ac332a 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/pinned_memory.cpp @@ -127,7 +127,7 @@ class fixed_pinned_pool_memory_resource { static_assert(cuda::mr::resource_with, - ""); + "Pinned pool mr must be accessible from both host and device"); CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr( std::optional config_size)