From eb39019d55615160766af37ff6b36d51c23522a6 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 30 May 2024 16:24:30 -0700
Subject: [PATCH 01/31] remove pinned_host_vector

---
 cpp/benchmarks/io/text/multibyte_split.cpp    |   8 +-
 .../detail/utilities/pinned_host_vector.hpp   | 216 ------------------
 .../cudf/detail/utilities/rmm_host_vector.hpp |   8 +-
 .../detail/utilities/vector_factories.hpp     |  38 ++-
 cpp/src/io/csv/reader_impl.cu                 |   1 +
 cpp/src/io/orc/reader_impl_chunking.cu        |   1 +
 cpp/src/io/orc/writer_impl.cu                 |   5 +-
 cpp/src/io/parquet/reader_impl_helpers.cpp    |   2 +
 cpp/src/io/parquet/writer_impl.cu             |   3 +-
 cpp/src/io/text/bgzip_data_chunk_source.cu    |  16 +-
 .../io/text/data_chunk_source_factories.cpp   |  23 +-
 cpp/src/io/utilities/hostdevice_vector.hpp    |   4 +-
 12 files changed, 77 insertions(+), 248 deletions(-)
 delete mode 100644 cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index b5d855d8881..172182c3607 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -22,7 +22,6 @@
 #include <cudf_test/file_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
@@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state,
 
   auto const delim_factor = static_cast<double>(delim_percent) / 100;
   std::unique_ptr<cudf::io::datasource> datasource;
-  auto device_input      = create_random_input(file_size_approx, delim_factor, 0.05, delim);
-  auto host_input        = std::vector<char>{};
-  auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};
+  auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
+  auto host_input   = std::vector<char>{};
+  auto host_pinned_input =
+    cudf::detail::make_pinned_vector_async<char>(0, cudf::get_default_stream());
 
   if (source_type != data_chunk_source_type::device &&
       source_type != data_chunk_source_type::host_pinned) {
diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
deleted file mode 100644
index c22b6a6ba15..00000000000
--- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- *  Copyright (c) 2008-2024, NVIDIA CORPORATION
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/utilities/error.hpp>
-
-#include <thrust/host_vector.h>
-
-#include <cstddef>
-#include <limits>
-#include <new>  // for bad_alloc
-
-namespace cudf::detail {
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class pinned_allocator;
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <>
-class pinned_allocator<void> {
- public:
-  using value_type      = void;            ///< The type of the elements in the allocator
-  using pointer         = void*;           ///< The type returned by address() / allocate()
-  using const_pointer   = void const*;     ///< The type returned by address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  /**
-   * @brief converts a `pinned_allocator<void>` to `pinned_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
-  };
-};
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- * This implementation is ported from the experimental/pinned_allocator
- * that Thrust used to provide.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class pinned_allocator {
- public:
-  using value_type      = T;               ///< The type of the elements in the allocator
-  using pointer         = T*;              ///< The type returned by address() / allocate()
-  using const_pointer   = T const*;        ///< The type returned by address()
-  using reference       = T&;              ///< The parameter type for address()
-  using const_reference = T const&;        ///< The parameter type for address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  /**
-   * @brief converts a `pinned_allocator<T>` to `pinned_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = pinned_allocator<U>;  ///< The rebound type
-  };
-
-  /**
-   * @brief pinned_allocator's null constructor does nothing.
-   */
-  __host__ __device__ inline pinned_allocator() {}
-
-  /**
-   * @brief pinned_allocator's null destructor does nothing.
-   */
-  __host__ __device__ inline ~pinned_allocator() {}
-
-  /**
-   * @brief pinned_allocator's copy constructor does nothing.
-   */
-  __host__ __device__ inline pinned_allocator(pinned_allocator const&) {}
-
-  /**
-   * @brief  pinned_allocator's copy constructor does nothing.
-   *
-   *  This version of pinned_allocator's copy constructor
-   *  is templated on the \c value_type of the pinned_allocator
-   *  to copy from.  It is provided merely for convenience; it
-   *  does nothing.
-   */
-  template <typename U>
-  __host__ __device__ inline pinned_allocator(pinned_allocator<U> const&)
-  {
-  }
-
-  /**
-   * @brief This method returns the address of a \c reference of
-   *  interest.
-   *
-   *  @param r The \c reference of interest.
-   *  @return \c r's address.
-   */
-  __host__ __device__ inline pointer address(reference r) { return &r; }
-
-  /**
-   * @brief This method returns the address of a \c const_reference
-   *  of interest.
-   *
-   *  @param r The \c const_reference of interest.
-   *  @return \c r's address.
-   */
-  __host__ __device__ inline const_pointer address(const_reference r) { return &r; }
-
-  /**
-   * @brief This method allocates storage for objects in pinned host
-   *  memory.
-   *
-   *  @param cnt The number of objects to allocate.
-   *  @return a \c pointer to the newly allocated objects.
-   *  @note The second parameter to this function is meant as a
-   *        hint pointer to a nearby memory location, but is
-   *        not used by this allocator.
-   *  @note This method does not invoke \p value_type's constructor.
-   *        It is the responsibility of the caller to initialize the
-   *        objects at the returned \c pointer.
-   */
-  __host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0)
-  {
-    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-
-    pointer result(0);
-    CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
-    return result;
-  }
-
-  /**
-   * @brief This method deallocates pinned host memory previously allocated
-   *  with this \c pinned_allocator.
-   *
-   *  @param p A \c pointer to the previously allocated memory.
-   *  @note The second parameter is the number of objects previously allocated
-   *        but is ignored by this allocator.
-   *  @note This method does not invoke \p value_type's destructor.
-   *        It is the responsibility of the caller to destroy
-   *        the objects stored at \p p.
-   */
-  __host__ inline void deallocate(pointer p, size_type /*cnt*/)
-  {
-    auto dealloc_worked = cudaFreeHost(p);
-    (void)dealloc_worked;
-    assert(dealloc_worked == cudaSuccess);
-  }
-
-  /**
-   * @brief This method returns the maximum size of the \c cnt parameter
-   *  accepted by the \p allocate() method.
-   *
-   *  @return The maximum number of objects that may be allocated
-   *          by a single call to \p allocate().
-   */
-  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }
-
-  /**
-   * @brief This method tests this \p pinned_allocator for equality to
-   *  another.
-   *
-   *  @param x The other \p pinned_allocator of interest.
-   *  @return This method always returns \c true.
-   */
-  __host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; }
-
-  /**
-   * @brief This method tests this \p pinned_allocator for inequality
-   *  to another.
-   *
-   *  @param x The other \p pinned_allocator of interest.
-   *  @return This method always returns \c false.
-   */
-  __host__ __device__ inline bool operator!=(pinned_allocator const& x) const
-  {
-    return !operator==(x);
-  }
-};
-
-/**
- * @brief A vector class with pinned host memory allocator
- */
-template <typename T>
-using pinned_host_vector = thrust::host_vector<T, pinned_allocator<T>>;
-
-}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
index 6901a19473e..6604020c224 100644
--- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
@@ -19,6 +19,7 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <rmm/aligned.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <thrust/host_vector.h>
@@ -32,8 +33,6 @@ namespace cudf::detail {
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c a `rmm::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <typename T>
@@ -42,8 +41,6 @@ class rmm_host_allocator;
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c an `cudf::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <>
@@ -70,8 +67,7 @@ class rmm_host_allocator<void> {
  * The \p rmm_host_allocator provides an interface for host memory allocation through the user
  * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
  * this reference and therefore it is the user's responsibility to ensure its lifetime for the
- * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from
- * pinned_host_vector in cudf.
+ * duration of the lifetime of the \p rmm_host_allocator.
  *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 293a4096c57..6f2287fc1c8 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -21,6 +21,8 @@
  * @file vector_factories.hpp
  */
 
+#include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
@@ -380,7 +382,7 @@ thrust::host_vector<T> make_host_vector_async(device_span<T const> v, rmm::cuda_
  * @brief Asynchronously construct a `std::vector` containing a copy of data from a device
  * container
  *
- * @note This function synchronizes `stream`.
+ * @note This function does not synchronize `stream`.
  *
  * @tparam Container The type of the container to copy from
  * @tparam T The type of the data to copy
@@ -439,6 +441,40 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
   return make_host_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
 
+/**
+ * @brief Asynchronously construct a `rmm_host_vector` of the given size
+ *
+ * @note This function does not synchronize `stream`.
+ *
+ * @tparam T The type of the vector data
+ * @param size The number of elements in the created vector
+ * @param stream The stream on which to allocate memory
+ * @return A rmm_host_vector of the given size
+ */
+template <typename T>
+rmm_host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
+{
+  return rmm_host_vector<T>(size, {cudf::io::get_host_memory_resource(), stream});
+}
+
+/**
+ * @brief Synchronously construct a `rmm_host_vector` of the given size
+ *
+ * @note This function synchronizes `stream`.
+ *
+ * @tparam T The type of the vector data
+ * @param size The number of elements in the created vector
+ * @param stream The stream on which to allocate memory
+ * @return A rmm_host_vector of the given size
+ */
+template <typename T>
+rmm_host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
+{
+  auto result = make_pinned_vector_async<T>(size, stream);
+  stream.synchronize();
+  return result;
+}
+
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index 5dee0c17a33..05faded651d 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -27,6 +27,7 @@
 #include "io/utilities/parsing_utils.cuh"
 
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/logger.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/io/csv.hpp>
diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu
index 5034aa14a95..43301826003 100644
--- a/cpp/src/io/orc/reader_impl_chunking.cu
+++ b/cpp/src/io/orc/reader_impl_chunking.cu
@@ -22,6 +22,7 @@
 
 #include <cudf/detail/timezone.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/logger.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 344e216cdc8..e9e031a407a 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -27,7 +27,6 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -2339,7 +2338,7 @@ auto convert_table_to_orc_data(table_view const& input,
                       std::move(streams),
                       std::move(stripes),
                       std::move(stripe_dicts.views),
-                      cudf::detail::pinned_host_vector<uint8_t>()};
+                      cudf::detail::make_pinned_vector_async<uint8_t>(0, stream)};
   }
 
   // Allocate intermediate output stream buffer
@@ -2407,7 +2406,7 @@ auto convert_table_to_orc_data(table_view const& input,
     return max_stream_size;
   }();
 
-  cudf::detail::pinned_host_vector<uint8_t> bounce_buffer(max_out_stream_size);
+  auto bounce_buffer = cudf::detail::make_pinned_vector_async<uint8_t>(max_out_stream_size, stream);
 
   auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream);
 
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
index eb653c6b9ac..9de8a9e2719 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -23,6 +23,8 @@
 #include "ipc/Message_generated.h"
 #include "ipc/Schema_generated.h"
 
+#include <cudf/detail/utilities/logger.hpp>
+
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 1dfced94f5b..6d466748c17 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -36,7 +36,6 @@
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/linked_column.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/lists/detail/dremel.hpp>
 #include <cudf/lists/lists_column_view.hpp>
@@ -2278,7 +2277,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
   }
 
   auto bounce_buffer =
-    cudf::detail::pinned_host_vector<uint8_t>(all_device_write ? 0 : max_write_size);
+    cudf::detail::make_pinned_vector_async<uint8_t>(all_device_write ? 0 : max_write_size, stream);
 
   return std::tuple{std::move(agg_meta),
                     std::move(pages),
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index faa09e586ab..190015686df 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include "cudf/detail/utilities/vector_factories.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/text/device_data_chunks.hpp"
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/rmm_host_vector.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -66,7 +67,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  static void copy_to_device(cudf::detail::pinned_host_vector<T> const& host,
+  static void copy_to_device(cudf::detail::rmm_host_vector<T> const& host,
                              rmm::device_uvector<T>& device,
                              rmm::cuda_stream_view stream)
   {
@@ -84,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       1 << 16;  // 64k offset allocation, resized on demand
 
     cudaEvent_t event;
-    cudf::detail::pinned_host_vector<char> h_compressed_blocks;
-    cudf::detail::pinned_host_vector<std::size_t> h_compressed_offsets;
-    cudf::detail::pinned_host_vector<std::size_t> h_decompressed_offsets;
+    cudf::detail::rmm_host_vector<char> h_compressed_blocks;
+    cudf::detail::rmm_host_vector<std::size_t> h_compressed_offsets;
+    cudf::detail::rmm_host_vector<std::size_t> h_decompressed_offsets;
     rmm::device_uvector<char> d_compressed_blocks;
     rmm::device_uvector<char> d_decompressed_blocks;
     rmm::device_uvector<std::size_t> d_compressed_offsets;
@@ -103,7 +104,10 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
     bool is_decompressed{};
 
     decompression_blocks(rmm::cuda_stream_view init_stream)
-      : d_compressed_blocks(0, init_stream),
+      : h_compressed_blocks{cudf::detail::make_pinned_vector_async<char>(0, init_stream)},
+        h_compressed_offsets{cudf::detail::make_pinned_vector_async<std::size_t>(0, init_stream)},
+        h_decompressed_offsets{cudf::detail::make_pinned_vector_async<std::size_t>(0, init_stream)},
+        d_compressed_blocks(0, init_stream),
         d_decompressed_blocks(0, init_stream),
         d_compressed_offsets(0, init_stream),
         d_decompressed_offsets(0, init_stream),
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 9d1d0498ace..8278b2c25cb 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
+#include "cudf/detail/utilities/vector_factories.hpp"
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/rmm_host_vector.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
 #include <rmm/device_buffer.hpp>
@@ -32,7 +33,7 @@ namespace {
 
 struct host_ticket {
   cudaEvent_t event;
-  cudf::detail::pinned_host_vector<char> buffer;
+  std::unique_ptr<cudf::detail::rmm_host_vector<char>> buffer;
 };
 
 /**
@@ -84,13 +85,16 @@ class datasource_chunk_reader : public data_chunk_reader {
       CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
       // resize the host buffer as necessary to contain the requested number of bytes
-      if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); }
+      if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
+        h_ticket.buffer = std::make_unique<cudf::detail::rmm_host_vector<char>>(
+          cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
+      }
 
-      _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer.data()));
+      _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer->data()));
 
       // copy the host-pinned data on to device
       CUDF_CUDA_TRY(cudaMemcpyAsync(
-        chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value()));
+        chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value()));
 
       // record the host-to-device copy.
       CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));
@@ -148,10 +152,13 @@ class istream_data_chunk_reader : public data_chunk_reader {
     CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
     // resize the host buffer as necessary to contain the requested number of bytes
-    if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); }
+    if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
+      h_ticket.buffer = std::make_unique<cudf::detail::rmm_host_vector<char>>(
+        cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
+    }
 
     // read data from the host istream in to the pinned host memory buffer
-    _datastream->read(h_ticket.buffer.data(), read_size);
+    _datastream->read(h_ticket.buffer->data(), read_size);
 
     // adjust the read size to reflect how many bytes were actually read from the data stream
     read_size = _datastream->gcount();
@@ -161,7 +168,7 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
     // copy the host-pinned data on to device
     CUDF_CUDA_TRY(cudaMemcpyAsync(
-      chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value()));
+      chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value()));
 
     // record the host-to-device copy.
     CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index 0883ac3609f..492084bd5bc 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -16,10 +16,10 @@
 
 #pragma once
 
-#include "config_utils.hpp"
 #include "hostdevice_span.hpp"
 
 #include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -53,7 +53,7 @@ class hostdevice_vector {
   }
 
   explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream)
-    : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(max_size, stream)
+    : h_data{make_pinned_vector_async<T>(0, stream)}, d_data(max_size, stream)
   {
     CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size");
 

From 24b12451ffea31766d01c9cbe8d4e10bbe3734be Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 30 May 2024 16:54:36 -0700
Subject: [PATCH 02/31] switch to host_device resource ref

---
 cpp/benchmarks/fixture/nvbench_fixture.hpp |  5 +++--
 cpp/include/cudf/io/memory_resource.hpp    |  7 +++----
 cpp/src/io/utilities/config_utils.cpp      | 21 +++++++++++----------
 cpp/src/io/utilities/hostdevice_vector.hpp |  1 -
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
index ebcbcb17e98..3a5a9bfd2fa 100644
--- a/cpp/benchmarks/fixture/nvbench_fixture.hpp
+++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -81,14 +81,15 @@ struct nvbench_base_fixture {
               "\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
   }
 
-  inline rmm::host_async_resource_ref make_cuio_host_pinned()
+  inline rmm::host_device_async_resource_ref make_cuio_host_pinned()
   {
     static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
       std::make_shared<rmm::mr::pinned_host_memory_resource>();
     return *mr;
   }
 
-  inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
+  inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource(
+    std::string const& mode)
   {
     if (mode == "pinned") return make_cuio_host_pinned();
     if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp
index a36e220ae7b..2af5755f824 100644
--- a/cpp/include/cudf/io/memory_resource.hpp
+++ b/cpp/include/cudf/io/memory_resource.hpp
@@ -33,7 +33,8 @@ namespace cudf::io {
  * @param mr The rmm resource to be used for host-side allocations
  * @return The previous resource that was in use
  */
-rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr);
+rmm::host_device_async_resource_ref set_host_memory_resource(
+  rmm::host_device_async_resource_ref mr);
 
 /**
  * @brief Get the rmm resource being used for host memory allocations by
@@ -41,7 +42,7 @@ rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_r
  *
  * @return The rmm resource used for host-side allocations
  */
-rmm::host_async_resource_ref get_host_memory_resource();
+rmm::host_device_async_resource_ref get_host_memory_resource();
 
 /**
  * @brief Options to configure the default host memory resource
@@ -54,8 +55,6 @@ struct host_mr_options {
 /**
  * @brief Configure the size of the default host memory resource.
  *
- * @throws cudf::logic_error if called after the default host memory resource has been created
- *
  * @param opts Options to configure the default host memory resource
  * @return True if this call successfully configured the host memory resource, false if a
  * a resource was already configured.
diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp
index dad1135e766..25649d17c76 100644
--- a/cpp/src/io/utilities/config_utils.cpp
+++ b/cpp/src/io/utilities/config_utils.cpp
@@ -209,7 +209,8 @@ static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
 
 }  // namespace
 
-CUDF_EXPORT rmm::host_async_resource_ref& make_default_pinned_mr(std::optional<size_t> config_size)
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
+  std::optional<size_t> config_size)
 {
   static fixed_pinned_pool_memory_resource mr = [config_size]() {
     auto const size = [&config_size]() -> size_t {
@@ -233,7 +234,7 @@ CUDF_EXPORT rmm::host_async_resource_ref& make_default_pinned_mr(std::optional<s
     return fixed_pinned_pool_memory_resource{aligned_size};
   }();
 
-  static rmm::host_async_resource_ref mr_ref{mr};
+  static rmm::host_device_async_resource_ref mr_ref{mr};
   return mr_ref;
 }
 
@@ -244,11 +245,11 @@ CUDF_EXPORT std::mutex& host_mr_mutex()
 }
 
 // Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional<host_mr_options> const& opts,
-                                                       bool* did_configure = nullptr)
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr(
+  std::optional<host_mr_options> const& opts, bool* did_configure = nullptr)
 {
-  static rmm::host_async_resource_ref* mr_ref = nullptr;
-  bool configured                             = false;
+  static rmm::host_device_async_resource_ref* mr_ref = nullptr;
+  bool configured                                    = false;
   if (mr_ref == nullptr) {
     configured = true;
     mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
@@ -262,13 +263,13 @@ CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional<host_mr_opt
 }
 
 // Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_async_resource_ref& host_mr()
+CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr()
 {
-  static rmm::host_async_resource_ref mr_ref = make_host_mr(std::nullopt);
+  static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt);
   return mr_ref;
 }
 
-rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr)
+rmm::host_device_async_resource_ref set_host_memory_resource(rmm::host_device_async_resource_ref mr)
 {
   std::scoped_lock lock{host_mr_mutex()};
   auto last_mr = host_mr();
@@ -276,7 +277,7 @@ rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_r
   return last_mr;
 }
 
-rmm::host_async_resource_ref get_host_memory_resource()
+rmm::host_device_async_resource_ref get_host_memory_resource()
 {
   std::scoped_lock lock{host_mr_mutex()};
   return host_mr();
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index 492084bd5bc..f6f7ff14d38 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -20,7 +20,6 @@
 
 #include <cudf/detail/utilities/rmm_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>

From 6c896f6ebf2d6177f62903dd719cebf88da08565 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 30 May 2024 18:07:00 -0700
Subject: [PATCH 03/31] rebrand host memory resource

---
 cpp/CMakeLists.txt                            |   1 +
 cpp/benchmarks/fixture/nvbench_fixture.hpp    |   8 +-
 .../io/parquet/parquet_reader_multithread.cpp |   2 +-
 .../detail/utilities/vector_factories.hpp     |   4 +-
 cpp/include/cudf/io/memory_resource.hpp       |  64 -----
 cpp/include/cudf/utilities/pinned_memory.hpp  |  58 +++++
 cpp/src/io/utilities/config_utils.cpp         | 215 +---------------
 cpp/src/utilities/pinned_memory.cpp           | 230 ++++++++++++++++++
 cpp/tests/io/json_test.cpp                    |   6 +-
 .../utilities_tests/io_utilities_tests.cpp    |   8 +-
 10 files changed, 305 insertions(+), 291 deletions(-)
 delete mode 100644 cpp/include/cudf/io/memory_resource.hpp
 create mode 100644 cpp/include/cudf/utilities/pinned_memory.hpp
 create mode 100644 cpp/src/utilities/pinned_memory.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f637db66c2c..8a0f56aec53 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -665,6 +665,7 @@ add_library(
   src/utilities/default_stream.cpp
   src/utilities/linked_column.cpp
   src/utilities/logger.cpp
+  src/utilities/pinned_memory.cpp
   src/utilities/stacktrace.cpp
   src/utilities/stream_pool.cpp
   src/utilities/traits.cpp
diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
index 3a5a9bfd2fa..df1492690bb 100644
--- a/cpp/benchmarks/fixture/nvbench_fixture.hpp
+++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
@@ -92,7 +92,7 @@ struct nvbench_base_fixture {
     std::string const& mode)
   {
     if (mode == "pinned") return make_cuio_host_pinned();
-    if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
+    if (mode == "pinned_pool") return cudf::get_pinned_memory_resource();
     CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
   }
 
@@ -113,14 +113,14 @@ struct nvbench_base_fixture {
     rmm::mr::set_current_device_resource(mr.get());
     std::cout << "RMM memory resource = " << rmm_mode << "\n";
 
-    cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
+    cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
     std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
   ~nvbench_base_fixture()
   {
     // Ensure the the pool is freed before the CUDA context is destroyed:
-    cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
+    cudf::set_pinned_memory_resource(this->make_cuio_host_pinned());
   }
 
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
index bd80c4e0e88..f340b4aeb7a 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
@@ -20,9 +20,9 @@
 #include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/detail/utilities/stream_pool.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 #include <cudf/utilities/thread_pool.hpp>
 
 #include <nvtx3/nvtx3.hpp>
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 6f2287fc1c8..7343ee25c8f 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -22,9 +22,9 @@
  */
 
 #include <cudf/detail/utilities/rmm_host_vector.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -454,7 +454,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 template <typename T>
 rmm_host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
 {
-  return rmm_host_vector<T>(size, {cudf::io::get_host_memory_resource(), stream});
+  return rmm_host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
 }
 
 /**
diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp
deleted file mode 100644
index 2af5755f824..00000000000
--- a/cpp/include/cudf/io/memory_resource.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <rmm/resource_ref.hpp>
-
-#include <optional>
-
-namespace cudf::io {
-
-/**
- * @brief Set the rmm resource to be used for host memory allocations by
- * cudf::detail::hostdevice_vector
- *
- * hostdevice_vector is a utility class that uses a pair of host and device-side buffers for
- * bouncing state between the cpu and the gpu. The resource set with this function (typically a
- * pinned memory allocator) is what it uses to allocate space for it's host-side buffer.
- *
- * @param mr The rmm resource to be used for host-side allocations
- * @return The previous resource that was in use
- */
-rmm::host_device_async_resource_ref set_host_memory_resource(
-  rmm::host_device_async_resource_ref mr);
-
-/**
- * @brief Get the rmm resource being used for host memory allocations by
- * cudf::detail::hostdevice_vector
- *
- * @return The rmm resource used for host-side allocations
- */
-rmm::host_device_async_resource_ref get_host_memory_resource();
-
-/**
- * @brief Options to configure the default host memory resource
- */
-struct host_mr_options {
-  std::optional<size_t> pool_size;  ///< The size of the pool to use for the default host memory
-                                    ///< resource. If not set, the default pool size is used.
-};
-
-/**
- * @brief Configure the size of the default host memory resource.
- *
- * @param opts Options to configure the default host memory resource
- * @return True if this call successfully configured the host memory resource, false if a
- * a resource was already configured.
- */
-bool config_default_host_memory_resource(host_mr_options const& opts);
-
-}  // namespace cudf::io
diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp
new file mode 100644
index 00000000000..b423eab6d38
--- /dev/null
+++ b/cpp/include/cudf/utilities/pinned_memory.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/resource_ref.hpp>
+
+#include <optional>
+
+namespace cudf {
+
+/**
+ * @brief Set the rmm resource to be used for pinned memory allocations.
+ *
+ * @param mr The rmm resource to be used for pinned allocations
+ * @return The previous resource that was in use
+ */
+rmm::host_device_async_resource_ref set_pinned_memory_resource(
+  rmm::host_device_async_resource_ref mr);
+
+/**
+ * @brief Get the rmm resource being used for pinned memory allocations.
+ *
+ * @return The rmm resource used for pinned allocations
+ */
+rmm::host_device_async_resource_ref get_pinned_memory_resource();
+
+/**
+ * @brief Options to configure the default pinned memory resource
+ */
+struct pinned_mr_options {
+  std::optional<size_t> pool_size;  ///< The size of the pool to use for the default pinned memory
+                                    ///< resource. If not set, the default pool size is used.
+};
+
+/**
+ * @brief Configure the size of the default pinned memory resource.
+ *
+ * @param opts Options to configure the default pinned memory resource
+ * @return True if this call successfully configured the pinned memory resource, false if a
+ * a resource was already configured.
+ */
+bool config_default_pinned_memory_resource(pinned_mr_options const& opts);
+
+}  // namespace cudf
diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp
index 25649d17c76..20ac89b4d53 100644
--- a/cpp/src/io/utilities/config_utils.cpp
+++ b/cpp/src/io/utilities/config_utils.cpp
@@ -16,22 +16,12 @@
 
 #include "config_utils.hpp"
 
-#include <cudf/detail/utilities/stream_pool.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/export.hpp>
-
-#include <rmm/cuda_device.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
 
 #include <cstdlib>
 #include <string>
 
-namespace cudf::io {
-
-namespace detail {
+namespace cudf::io::detail {
 
 namespace cufile_integration {
 
@@ -90,205 +80,4 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_
 
 }  // namespace nvcomp_integration
 
-}  // namespace detail
-
-namespace {
-class fixed_pinned_pool_memory_resource {
-  using upstream_mr    = rmm::mr::pinned_host_memory_resource;
-  using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
-
- private:
-  upstream_mr upstream_mr_{};
-  size_t pool_size_{0};
-  // Raw pointer to avoid a segfault when the pool is destroyed on exit
-  host_pooled_mr* pool_{nullptr};
-  void* pool_begin_{nullptr};
-  void* pool_end_{nullptr};
-  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
-
- public:
-  fixed_pinned_pool_memory_resource(size_t size)
-    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
-  {
-    if (pool_size_ == 0) { return; }
-
-    // Allocate full size from the pinned pool to figure out the beginning and end address
-    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
-    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
-    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
-  }
-
-  void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
-  {
-    if (bytes <= pool_size_) {
-      try {
-        return pool_->allocate_async(bytes, alignment, stream);
-      } catch (...) {
-        // If the pool is exhausted, fall back to the upstream memory resource
-      }
-    }
-
-    return upstream_mr_.allocate_async(bytes, alignment, stream);
-  }
-
-  void do_deallocate_async(void* ptr,
-                           std::size_t bytes,
-                           std::size_t alignment,
-                           cuda::stream_ref stream) noexcept
-  {
-    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
-      pool_->deallocate_async(ptr, bytes, alignment, stream);
-    } else {
-      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
-    }
-  }
-
-  void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
-  {
-    return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
-  {
-    return do_allocate_async(bytes, alignment, stream);
-  }
-
-  void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
-  {
-    auto const result = do_allocate_async(bytes, alignment, stream_);
-    stream_.wait();
-    return result;
-  }
-
-  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
-  {
-    return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  void deallocate_async(void* ptr,
-                        std::size_t bytes,
-                        std::size_t alignment,
-                        cuda::stream_ref stream) noexcept
-  {
-    return do_deallocate_async(ptr, bytes, alignment, stream);
-  }
-
-  void deallocate(void* ptr,
-                  std::size_t bytes,
-                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
-  {
-    deallocate_async(ptr, bytes, alignment, stream_);
-    stream_.wait();
-  }
-
-  bool operator==(fixed_pinned_pool_memory_resource const& other) const
-  {
-    return pool_ == other.pool_ and stream_ == other.stream_;
-  }
-
-  bool operator!=(fixed_pinned_pool_memory_resource const& other) const
-  {
-    return !operator==(other);
-  }
-
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::device_accessible) noexcept
-  {
-  }
-
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::host_accessible) noexcept
-  {
-  }
-};
-
-static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
-                                      cuda::mr::device_accessible,
-                                      cuda::mr::host_accessible>,
-              "");
-
-}  // namespace
-
-CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
-  std::optional<size_t> config_size)
-{
-  static fixed_pinned_pool_memory_resource mr = [config_size]() {
-    auto const size = [&config_size]() -> size_t {
-      if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) {
-        return std::atol(env_val);
-      }
-
-      if (config_size.has_value()) { return *config_size; }
-
-      size_t free{}, total{};
-      CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
-      // 0.5% of the total device memory, capped at 100MB
-      return std::min(total / 200, size_t{100} * 1024 * 1024);
-    }();
-
-    // rmm requires the pool size to be a multiple of 256 bytes
-    auto const aligned_size = (size + 255) & ~255;
-    CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
-
-    // make the pool with max size equal to the initial size
-    return fixed_pinned_pool_memory_resource{aligned_size};
-  }();
-
-  static rmm::host_device_async_resource_ref mr_ref{mr};
-  return mr_ref;
-}
-
-CUDF_EXPORT std::mutex& host_mr_mutex()
-{
-  static std::mutex map_lock;
-  return map_lock;
-}
-
-// Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr(
-  std::optional<host_mr_options> const& opts, bool* did_configure = nullptr)
-{
-  static rmm::host_device_async_resource_ref* mr_ref = nullptr;
-  bool configured                                    = false;
-  if (mr_ref == nullptr) {
-    configured = true;
-    mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
-  }
-
-  // If the user passed an out param to detect whether this call configured a resource
-  // set the result
-  if (did_configure != nullptr) { *did_configure = configured; }
-
-  return *mr_ref;
-}
-
-// Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr()
-{
-  static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt);
-  return mr_ref;
-}
-
-rmm::host_device_async_resource_ref set_host_memory_resource(rmm::host_device_async_resource_ref mr)
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  auto last_mr = host_mr();
-  host_mr()    = mr;
-  return last_mr;
-}
-
-rmm::host_device_async_resource_ref get_host_memory_resource()
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  return host_mr();
-}
-
-bool config_default_host_memory_resource(host_mr_options const& opts)
-{
-  std::scoped_lock lock{host_mr_mutex()};
-  auto did_configure = false;
-  make_host_mr(opts, &did_configure);
-  return did_configure;
-}
-
-}  // namespace cudf::io
+}  // namespace cudf::io::detail
diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
new file mode 100644
index 00000000000..23d673a1382
--- /dev/null
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/utilities/logger.hpp>
+#include <cudf/detail/utilities/stream_pool.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+namespace cudf {
+
+namespace {
+class fixed_pinned_pool_memory_resource {
+  using upstream_mr    = rmm::mr::pinned_host_memory_resource;
+  using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
+
+ private:
+  upstream_mr upstream_mr_{};
+  size_t pool_size_{0};
+  // Raw pointer to avoid a segfault when the pool is destroyed on exit
+  host_pooled_mr* pool_{nullptr};
+  void* pool_begin_{nullptr};
+  void* pool_end_{nullptr};
+  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
+
+ public:
+  fixed_pinned_pool_memory_resource(size_t size)
+    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
+  {
+    if (pool_size_ == 0) { return; }
+
+    // Allocate full size from the pinned pool to figure out the beginning and end address
+    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
+    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
+    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
+  }
+
+  void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
+  {
+    if (bytes <= pool_size_) {
+      try {
+        return pool_->allocate_async(bytes, alignment, stream);
+      } catch (...) {
+        // If the pool is exhausted, fall back to the upstream memory resource
+      }
+    }
+
+    return upstream_mr_.allocate_async(bytes, alignment, stream);
+  }
+
+  void do_deallocate_async(void* ptr,
+                           std::size_t bytes,
+                           std::size_t alignment,
+                           cuda::stream_ref stream) noexcept
+  {
+    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
+      pool_->deallocate_async(ptr, bytes, alignment, stream);
+    } else {
+      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
+    }
+  }
+
+  void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
+  {
+    return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+  }
+
+  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
+  {
+    return do_allocate_async(bytes, alignment, stream);
+  }
+
+  void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+  {
+    auto const result = do_allocate_async(bytes, alignment, stream_);
+    stream_.wait();
+    return result;
+  }
+
+  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
+  {
+    return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+  }
+
+  void deallocate_async(void* ptr,
+                        std::size_t bytes,
+                        std::size_t alignment,
+                        cuda::stream_ref stream) noexcept
+  {
+    return do_deallocate_async(ptr, bytes, alignment, stream);
+  }
+
+  void deallocate(void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
+  {
+    deallocate_async(ptr, bytes, alignment, stream_);
+    stream_.wait();
+  }
+
+  bool operator==(fixed_pinned_pool_memory_resource const& other) const
+  {
+    return pool_ == other.pool_ and stream_ == other.stream_;
+  }
+
+  bool operator!=(fixed_pinned_pool_memory_resource const& other) const
+  {
+    return !operator==(other);
+  }
+
+  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
+                                            cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
+                                            cuda::mr::host_accessible) noexcept
+  {
+  }
+};
+
+static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
+                                      cuda::mr::device_accessible,
+                                      cuda::mr::host_accessible>,
+              "");
+
+}  // namespace
+
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
+  std::optional<size_t> config_size)
+{
+  static fixed_pinned_pool_memory_resource mr = [config_size]() {
+    auto const size = [&config_size]() -> size_t {
+      if (auto const env_val = getenv("LIBCUDF_PINNED_POOL_SIZE"); env_val != nullptr) {
+        return std::atol(env_val);
+      }
+
+      if (config_size.has_value()) { return *config_size; }
+
+      size_t free{}, total{};
+      CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
+      // 0.5% of the total device memory, capped at 100MB
+      return std::min(total / 200, size_t{100} * 1024 * 1024);
+    }();
+
+    // rmm requires the pool size to be a multiple of 256 bytes
+    auto const aligned_size = (size + 255) & ~255;
+    CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
+
+    // make the pool with max size equal to the initial size
+    return fixed_pinned_pool_memory_resource{aligned_size};
+  }();
+
+  static rmm::host_device_async_resource_ref mr_ref{mr};
+  return mr_ref;
+}
+
+CUDF_EXPORT std::mutex& host_mr_mutex()
+{
+  static std::mutex map_lock;
+  return map_lock;
+}
+
+// Must be called with the host_mr_mutex mutex held
+CUDF_EXPORT rmm::host_device_async_resource_ref& make_host_mr(
+  std::optional<pinned_mr_options> const& opts, bool* did_configure = nullptr)
+{
+  static rmm::host_device_async_resource_ref* mr_ref = nullptr;
+  bool configured                                    = false;
+  if (mr_ref == nullptr) {
+    configured = true;
+    mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
+  }
+
+  // If the user passed an out param to detect whether this call configured a resource
+  // set the result
+  if (did_configure != nullptr) { *did_configure = configured; }
+
+  return *mr_ref;
+}
+
+// Must be called with the host_mr_mutex mutex held
+CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr()
+{
+  static rmm::host_device_async_resource_ref mr_ref = make_host_mr(std::nullopt);
+  return mr_ref;
+}
+
+rmm::host_device_async_resource_ref set_pinned_memory_resource(
+  rmm::host_device_async_resource_ref mr)
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  auto last_mr = host_mr();
+  host_mr()    = mr;
+  return last_mr;
+}
+
+rmm::host_device_async_resource_ref get_pinned_memory_resource()
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  return host_mr();
+}
+
+bool config_default_pinned_memory_resource(pinned_mr_options const& opts)
+{
+  std::scoped_lock lock{host_mr_mutex()};
+  auto did_configure = false;
+  make_host_mr(opts, &did_configure);
+  return did_configure;
+}
+
+}  // namespace cudf
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 9d766e80094..4b3793b22f6 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -28,13 +28,13 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/io/arrow_io_source.hpp>
 #include <cudf/io/json.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/repeat_strings.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/mr/pinned_host_memory_resource.hpp>
 
@@ -2068,7 +2068,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync)
                     size_t{128} * 1024 * 1024};
 
   // Set new resource
-  auto last_mr = cudf::io::set_host_memory_resource(mr);
+  auto last_mr = cudf::set_pinned_memory_resource(mr);
 
   /**
    * @brief Spark has the specific need to ignore extra characters that come after the first record
@@ -2158,7 +2158,7 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync)
     float64_wrapper{c_data.cbegin(), c_data.cend(), c_validity.cbegin()});
 
   // Restore original memory source
-  cudf::io::set_host_memory_resource(last_mr);
+  cudf::set_pinned_memory_resource(last_mr);
 }
 
 TEST_F(JsonReaderTest, MixedTypes)
diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp
index e5a153bf781..c00f1ab8863 100644
--- a/cpp/tests/utilities_tests/io_utilities_tests.cpp
+++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp
@@ -18,8 +18,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/io/parquet.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/mr/device/pool_memory_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
@@ -44,8 +44,8 @@ TEST(IoUtilitiesTest, HostMemoryGetAndSet)
                     size_t{128} * 1024 * 1024);
 
   // set new resource
-  auto last_mr = cudf::io::get_host_memory_resource();
-  cudf::io::set_host_memory_resource(mr);
+  auto last_mr = cudf::get_pinned_memory_resource();
+  cudf::set_pinned_memory_resource(mr);
 
   constexpr int num_rows = 32 * 1024;
   auto valids =
@@ -66,7 +66,7 @@ TEST(IoUtilitiesTest, HostMemoryGetAndSet)
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected);
 
   // reset memory resource back
-  cudf::io::set_host_memory_resource(last_mr);
+  cudf::set_pinned_memory_resource(last_mr);
 }
 
 TEST(IoUtilitiesTest, Base64EncodeAndDecode)

From 0048c5951171d931b676dee3ab40ca312a7eb560 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 30 May 2024 19:09:32 -0700
Subject: [PATCH 04/31] style

---
 cpp/benchmarks/io/text/multibyte_split.cpp      | 2 +-
 cpp/src/io/text/data_chunk_source_factories.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 172182c3607..67705863d41 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 8278b2c25cb..46149db929f 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 1964523554cd86763be1a0a6b1580f7d12016270 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 31 May 2024 10:39:55 -0700
Subject: [PATCH 05/31] java update because breaking

---
 java/src/main/native/src/RmmJni.cpp | 32 +++++++++++++++--------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index fa78f6ca4e2..e1cb7845b77 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -16,7 +16,7 @@
 
 #include "cudf_jni_apis.hpp"
 
-#include <cudf/io/memory_resource.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/aligned.hpp>
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
@@ -395,15 +395,17 @@ class java_debug_event_handler_memory_resource final : public java_event_handler
   }
 };
 
-inline auto& prior_cuio_host_mr()
+inline auto& prior_cudf_pinned_mr()
 {
-  static rmm::host_async_resource_ref _prior_cuio_host_mr = cudf::io::get_host_memory_resource();
-  return _prior_cuio_host_mr;
+  static rmm::host_device_async_resource_ref _prior_cudf_pinned_mr =
+    cudf::get_pinned_memory_resource();
+  return _prior_cudf_pinned_mr;
 }
 
 /**
  * This is a pinned fallback memory resource that will try to allocate `pool`
- * and if that fails, attempt to allocate from the prior resource used by cuIO `prior_cuio_host_mr`.
+ * and if that fails, attempt to allocate from the prior resource used by cuDF
+ * `prior_cudf_pinned_mr`.
  *
  * We detect whether a pointer to free is inside of the pool by checking its address (see
  * constructor)
@@ -433,7 +435,7 @@ class pinned_fallback_host_memory_resource {
 
   /**
    * @brief Allocates pinned host memory of size at least \p bytes bytes from either the
-   *        _pool argument provided, or prior_cuio_host_mr.
+   *        _pool argument provided, or prior_cudf_pinned_mr.
    *
    * @throws rmm::bad_alloc if the requested allocation could not be fulfilled due to any other
    * reason.
@@ -450,7 +452,7 @@ class pinned_fallback_host_memory_resource {
       return _pool->allocate(bytes, alignment);
     } catch (const std::exception& unused) {
       // try to allocate using the underlying pinned resource
-      return prior_cuio_host_mr().allocate(bytes, alignment);
+      return prior_cudf_pinned_mr().allocate(bytes, alignment);
     }
     // we should not reached here
     return nullptr;
@@ -459,7 +461,7 @@ class pinned_fallback_host_memory_resource {
   /**
    * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. We attempt
    *        to deallocate from _pool, if ptr is detected to be in the pool address range,
-   *        otherwise we deallocate from `prior_cuio_host_mr`.
+   *        otherwise we deallocate from `prior_cudf_pinned_mr`.
    *
    * @param ptr Pointer to be deallocated.
    * @param bytes Size of the allocation.
@@ -472,7 +474,7 @@ class pinned_fallback_host_memory_resource {
     if (ptr >= pool_begin_ && ptr <= pool_end_) {
       _pool->deallocate(ptr, bytes, alignment);
     } else {
-      prior_cuio_host_mr().deallocate(ptr, bytes, alignment);
+      prior_cudf_pinned_mr().deallocate(ptr, bytes, alignment);
     }
   }
 
@@ -1025,7 +1027,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_newPinnedPoolMemoryResource(JNIE
   CATCH_STD(env, 0)
 }
 
-JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(JNIEnv* env,
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCudfPinnedPoolMemoryResource(JNIEnv* env,
                                                                                jclass clazz,
                                                                                jlong pool_ptr)
 {
@@ -1035,7 +1037,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(J
     // create a pinned fallback pool that will allocate pinned memory
     // if the regular pinned pool is exhausted
     pinned_fallback_mr.reset(new pinned_fallback_host_memory_resource(pool));
-    prior_cuio_host_mr() = cudf::io::set_host_memory_resource(*pinned_fallback_mr);
+    prior_cudf_pinned_mr() = cudf::set_pinned_memory_resource(*pinned_fallback_mr);
   }
   CATCH_STD(env, )
 }
@@ -1048,7 +1050,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releasePinnedPoolMemoryResource(J
     cudf::jni::auto_set_device(env);
     // set the cuio host memory resource to what it was before, or the same
     // if we didn't overwrite it with setCuioPinnedPoolMemoryResource
-    cudf::io::set_host_memory_resource(prior_cuio_host_mr());
+    cudf::set_pinned_memory_resource(prior_cudf_pinned_mr());
     pinned_fallback_mr.reset();
     delete reinterpret_cast<rmm_pinned_pool_t*>(pool_ptr);
   }
@@ -1088,7 +1090,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_allocFromFallbackPinnedPool(JNIE
                                                                             jlong size)
 {
   cudf::jni::auto_set_device(env);
-  void* ret = cudf::io::get_host_memory_resource().allocate(size);
+  void* ret = cudf::get_pinned_memory_resource().allocate(size);
   return reinterpret_cast<jlong>(ret);
 }
 
@@ -1101,7 +1103,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_freeFromFallbackPinnedPool(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     void* cptr = reinterpret_cast<void*>(ptr);
-    cudf::io::get_host_memory_resource().deallocate(cptr, size);
+    cudf::get_pinned_memory_resource().deallocate(cptr, size);
   }
   CATCH_STD(env, )
 }
@@ -1112,7 +1114,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Rmm_configureDefaultCudfPinnedPoo
 {
   try {
     cudf::jni::auto_set_device(env);
-    return cudf::io::config_default_host_memory_resource(cudf::io::host_mr_options{size});
+    return cudf::config_default_pinned_memory_resource(cudf::pinned_mr_options{size});
   }
   CATCH_STD(env, false)
 }

From ac0ce9c0ef5c2b3c6c4dfe1e1b4ee5330100f999 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 31 May 2024 12:04:05 -0700
Subject: [PATCH 06/31] java fix

---
 java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 2 +-
 java/src/main/java/ai/rapids/cudf/Rmm.java              | 2 +-
 java/src/main/native/src/RmmJni.cpp                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
index 83b801db7fb..5050834303d 100644
--- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
+++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
@@ -224,7 +224,7 @@ private PinnedMemoryPool(long poolSize, int gpuId, boolean setCuioHostMemoryReso
     }
     this.poolHandle = Rmm.newPinnedPoolMemoryResource(poolSize, poolSize);
     if (setCuioHostMemoryResource) {
-      Rmm.setCuioPinnedPoolMemoryResource(this.poolHandle);
+      Rmm.setCudfPinnedPoolMemoryResource(this.poolHandle);
     }
     this.poolSize = poolSize;
   }
diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index 4dee1b7aa24..ed029c918e4 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -597,7 +597,7 @@ static native long newEventHandlerResourceAdaptor(long handle, long trackerHandl
 
   public static native long newPinnedPoolMemoryResource(long initSize, long maxSize);
 
-  public static native long setCuioPinnedPoolMemoryResource(long poolPtr);
+  public static native long setCudfPinnedPoolMemoryResource(long poolPtr);
 
   public static native void releasePinnedPoolMemoryResource(long poolPtr);
 
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index e1cb7845b77..8bd0f7793b4 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -1049,7 +1049,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releasePinnedPoolMemoryResource(J
   try {
     cudf::jni::auto_set_device(env);
     // set the cuio host memory resource to what it was before, or the same
-    // if we didn't overwrite it with setCuioPinnedPoolMemoryResource
+    // if we didn't overwrite it with setCudfPinnedPoolMemoryResource
     cudf::set_pinned_memory_resource(prior_cudf_pinned_mr());
     pinned_fallback_mr.reset();
     delete reinterpret_cast<rmm_pinned_pool_t*>(pool_ptr);

From ab36162ac32cd0be5fe69ef3d92e421f4e5ea798 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 31 May 2024 15:48:45 -0700
Subject: [PATCH 07/31] move test out of io util

---
 cpp/tests/CMakeLists.txt                      |  5 +-
 .../utilities_tests/io_utilities_tests.cpp    | 45 -------------
 .../utilities_tests/pinned_memory_tests.cpp   | 65 +++++++++++++++++++
 3 files changed, 68 insertions(+), 47 deletions(-)
 create mode 100644 cpp/tests/utilities_tests/pinned_memory_tests.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 2f2c12f265c..19c87facb51 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -378,15 +378,16 @@ ConfigureTest(
 # * utilities tests -------------------------------------------------------------------------------
 ConfigureTest(
   UTILITIES_TEST
-  utilities_tests/type_list_tests.cpp
   utilities_tests/column_debug_tests.cpp
   utilities_tests/column_utilities_tests.cpp
   utilities_tests/column_wrapper_tests.cpp
+  utilities_tests/default_stream_tests.cpp
   utilities_tests/io_utilities_tests.cpp
   utilities_tests/lists_column_wrapper_tests.cpp
   utilities_tests/logger_tests.cpp
-  utilities_tests/default_stream_tests.cpp
+  utilities_tests/pinned_memory_tests.cpp
   utilities_tests/type_check_tests.cpp
+  utilities_tests/type_list_tests.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp
index c00f1ab8863..9ed8f18f5cc 100644
--- a/cpp/tests/utilities_tests/io_utilities_tests.cpp
+++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp
@@ -16,14 +16,6 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/table_utilities.hpp>
-
-#include <cudf/io/parquet.hpp>
-#include <cudf/utilities/pinned_memory.hpp>
-
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
 
 #include <src/io/utilities/base64_utilities.hpp>
 
@@ -32,43 +24,6 @@ using cudf::io::detail::base64_encode;
 
 class IoUtilitiesTest : public cudf::test::BaseFixture {};
 
-TEST(IoUtilitiesTest, HostMemoryGetAndSet)
-{
-  // Global environment for temporary files
-  auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
-    ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
-
-  // pinned/pooled host memory resource
-  using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-  host_pooled_mr mr(std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
-                    size_t{128} * 1024 * 1024);
-
-  // set new resource
-  auto last_mr = cudf::get_pinned_memory_resource();
-  cudf::set_pinned_memory_resource(mr);
-
-  constexpr int num_rows = 32 * 1024;
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
-  auto values = thrust::make_counting_iterator(0);
-
-  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
-
-  cudf::table_view expected({col});
-  auto filepath = temp_env->get_temp_filepath("IoUtilsMemTest.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_args);
-
-  cudf::io::parquet_reader_options const read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(read_opts);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected);
-
-  // reset memory resource back
-  cudf::set_pinned_memory_resource(last_mr);
-}
-
 TEST(IoUtilitiesTest, Base64EncodeAndDecode)
 {
   // a vector of lorem ipsum strings
diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
new file mode 100644
index 00000000000..df9103640f4
--- /dev/null
+++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+
+#include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+class PinnedMemoryTest : public cudf::test::BaseFixture {};
+
+TEST(PinnedMemoryTest, MemoryResourceGetAndSet)
+{
+  // Global environment for temporary files
+  auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+    ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+  // pinned/pooled host memory resource
+  using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
+  host_pooled_mr mr(std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
+                    4 * 1024 * 1024);
+
+  // set new resource
+  auto last_mr = cudf::get_pinned_memory_resource();
+  cudf::set_pinned_memory_resource(mr);
+
+  constexpr int num_rows = 32 * 1024;
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; });
+  auto values = thrust::make_counting_iterator(0);
+
+  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
+
+  cudf::table_view expected({col});
+  auto filepath = temp_env->get_temp_filepath("MemoryResourceGetAndSetTest.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_args);
+
+  cudf::io::parquet_reader_options const read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(read_opts);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected);
+
+  // reset memory resource back
+  cudf::set_pinned_memory_resource(last_mr);
+}

From 83f665a15f4aba3040079b58306fd964621a91c8 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 3 Jun 2024 15:01:38 -0700
Subject: [PATCH 08/31] missed rename

---
 .../src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
index 5050834303d..5ca5bc0db68 100644
--- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
+++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
@@ -128,9 +128,9 @@ public static synchronized void initialize(long poolSize, int gpuId) {
    *
    * @param poolSize size of the pool to initialize.
    * @param gpuId    gpu id to set to get memory pool from, -1 means to use default
-   * @param setCuioHostMemoryResource true if this pinned pool should be used by cuIO for host memory
+   * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuIO for host memory
    */
-  public static synchronized void initialize(long poolSize, int gpuId, boolean setCuioHostMemoryResource) {
+  public static synchronized void initialize(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) {
     if (isInitialized()) {
       throw new IllegalStateException("Can only initialize the pool once.");
     }
@@ -139,7 +139,7 @@ public static synchronized void initialize(long poolSize, int gpuId, boolean set
       t.setDaemon(true);
       return t;
     });
-    initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCuioHostMemoryResource));
+    initFuture = initService.submit(() -> new PinnedMemoryPool(poolSize, gpuId, setCudfPinnedPoolMemoryResource));
     initService.shutdown();
   }
 
@@ -216,14 +216,14 @@ public static long getTotalPoolSizeBytes() {
     return 0;
   }
 
-  private PinnedMemoryPool(long poolSize, int gpuId, boolean setCuioHostMemoryResource) {
+  private PinnedMemoryPool(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) {
     if (gpuId > -1) {
       // set the gpu device to use
       Cuda.setDevice(gpuId);
       Cuda.freeZero();
     }
     this.poolHandle = Rmm.newPinnedPoolMemoryResource(poolSize, poolSize);
-    if (setCuioHostMemoryResource) {
+    if (setCudfPinnedPoolMemoryResource) {
       Rmm.setCudfPinnedPoolMemoryResource(this.poolHandle);
     }
     this.poolSize = poolSize;

From c1ae478c2b3ce28101e35ec3f5a5af5e03d5452c Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 3 Jun 2024 15:52:34 -0700
Subject: [PATCH 09/31] update benchmark changes

---
 cpp/benchmarks/io/cuio_common.cpp | 12 ++++++++++++
 cpp/benchmarks/io/cuio_common.hpp |  4 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 37ced8ea703..f06938bd721 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "rmm/mr/pinned_host_memory_resource.hpp"
+#include "rmm/resource_ref.hpp"
+
 #include <benchmarks/io/cuio_common.hpp>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
@@ -28,6 +31,14 @@
 
 temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};
 
+// Don't use cudf's pinned pool for the source data
+rmm::host_async_resource_ref pinned_memory_resource()
+{
+  static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{};
+
+  return mr;
+}
+
 std::string random_file_in_dir(std::string const& dir_path)
 {
   // `mkstemp` modifies the template in place
@@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)
 
 cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
   : type{type},
+    pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
     d_buffer{0, cudf::get_default_stream()},
     file_name{random_file_in_dir(tmpdir.path())},
     void_sink{cudf::io::data_sink::create()}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index d4f39a5f243..407805a8a1a 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/rmm_host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::pinned_host_vector<char> pinned_buffer;
+  cudf::detail::rmm_host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;

From 1c09d0cfe15bcf6b5d2ce775129f0db7e0c662f0 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 4 Jun 2024 11:52:48 -0700
Subject: [PATCH 10/31] rename rmm_host_vector

---
 cpp/benchmarks/io/cuio_common.hpp                |  4 ++--
 .../{rmm_host_vector.hpp => host_vector.hpp}     |  2 +-
 .../cudf/detail/utilities/vector_factories.hpp   | 16 ++++++++--------
 cpp/src/io/text/bgzip_data_chunk_source.cu       | 10 +++++-----
 cpp/src/io/text/data_chunk_source_factories.cpp  |  8 ++++----
 cpp/src/io/utilities/hostdevice_vector.hpp       |  4 ++--
 6 files changed, 22 insertions(+), 22 deletions(-)
 rename cpp/include/cudf/detail/utilities/{rmm_host_vector.hpp => host_vector.hpp} (98%)

diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 407805a8a1a..64d6021cf50 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::rmm_host_vector<char> pinned_buffer;
+  cudf::detail::host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;
diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
similarity index 98%
rename from cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
rename to cpp/include/cudf/detail/utilities/host_vector.hpp
index 6604020c224..e62c8017f8b 100644
--- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -178,6 +178,6 @@ class rmm_host_allocator {
  * @brief A vector class with rmm host memory allocator
  */
 template <typename T>
-using rmm_host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
+using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
 
 }  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 7343ee25c8f..6f859ded456 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -21,7 +21,7 @@
  * @file vector_factories.hpp
  */
 
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/pinned_memory.hpp>
@@ -442,33 +442,33 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 }
 
 /**
- * @brief Asynchronously construct a `rmm_host_vector` of the given size
+ * @brief Asynchronously construct a `host_vector` of the given size
  *
  * @note This function does not synchronize `stream`.
  *
  * @tparam T The type of the vector data
  * @param size The number of elements in the created vector
  * @param stream The stream on which to allocate memory
- * @return A rmm_host_vector of the given size
+ * @return A host_vector of the given size
  */
 template <typename T>
-rmm_host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
+host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
 {
-  return rmm_host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
+  return host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
 }
 
 /**
- * @brief Synchronously construct a `rmm_host_vector` of the given size
+ * @brief Synchronously construct a `cudf::detail::host_vector` of the given size
  *
  * @note This function synchronizes `stream`.
  *
  * @tparam T The type of the vector data
  * @param size The number of elements in the created vector
  * @param stream The stream on which to allocate memory
- * @return A rmm_host_vector of the given size
+ * @return A host_vector of the given size
  */
 template <typename T>
-rmm_host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
+host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
 {
   auto result = make_pinned_vector_async<T>(size, stream);
   stream.synchronize();
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 190015686df..896123d95a9 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -20,8 +20,8 @@
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  static void copy_to_device(cudf::detail::rmm_host_vector<T> const& host,
+  static void copy_to_device(cudf::detail::host_vector<T> const& host,
                              rmm::device_uvector<T>& device,
                              rmm::cuda_stream_view stream)
   {
@@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       1 << 16;  // 64k offset allocation, resized on demand
 
     cudaEvent_t event;
-    cudf::detail::rmm_host_vector<char> h_compressed_blocks;
-    cudf::detail::rmm_host_vector<std::size_t> h_compressed_offsets;
-    cudf::detail::rmm_host_vector<std::size_t> h_decompressed_offsets;
+    cudf::detail::host_vector<char> h_compressed_blocks;
+    cudf::detail::host_vector<std::size_t> h_compressed_offsets;
+    cudf::detail::host_vector<std::size_t> h_decompressed_offsets;
     rmm::device_uvector<char> d_compressed_blocks;
     rmm::device_uvector<char> d_decompressed_blocks;
     rmm::device_uvector<std::size_t> d_compressed_offsets;
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 46149db929f..1e1671a1545 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -18,7 +18,7 @@
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
 #include <rmm/device_buffer.hpp>
@@ -33,7 +33,7 @@ namespace {
 
 struct host_ticket {
   cudaEvent_t event;
-  std::unique_ptr<cudf::detail::rmm_host_vector<char>> buffer;
+  std::unique_ptr<cudf::detail::host_vector<char>> buffer;
 };
 
 /**
@@ -86,7 +86,7 @@ class datasource_chunk_reader : public data_chunk_reader {
 
       // resize the host buffer as necessary to contain the requested number of bytes
       if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-        h_ticket.buffer = std::make_unique<cudf::detail::rmm_host_vector<char>>(
+        h_ticket.buffer = std::make_unique<cudf::detail::host_vector<char>>(
           cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
       }
 
@@ -153,7 +153,7 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
     // resize the host buffer as necessary to contain the requested number of bytes
     if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-      h_ticket.buffer = std::make_unique<cudf::detail::rmm_host_vector<char>>(
+      h_ticket.buffer = std::make_unique<cudf::detail::host_vector<char>>(
         cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
     }
 
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index f6f7ff14d38..1ae27a2f4ae 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -18,7 +18,7 @@
 
 #include "hostdevice_span.hpp"
 
-#include <cudf/detail/utilities/rmm_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -172,7 +172,7 @@ class hostdevice_vector {
   }
 
  private:
-  cudf::detail::rmm_host_vector<T> h_data;
+  cudf::detail::host_vector<T> h_data;
   rmm::device_uvector<T> d_data;
 };
 

From c343c3194f48dedb10e49c8610e3e0deaacf315b Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 4 Jun 2024 13:00:10 -0700
Subject: [PATCH 11/31] remove do_xyz

---
 cpp/src/utilities/pinned_memory.cpp | 39 ++++++++++-------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 23d673a1382..47c09217363 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -53,7 +53,7 @@ class fixed_pinned_pool_memory_resource {
     pool_->deallocate_async(pool_begin_, pool_size_, stream_);
   }
 
-  void* do_allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
+  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
   {
     if (bytes <= pool_size_) {
       try {
@@ -66,46 +66,33 @@ class fixed_pinned_pool_memory_resource {
     return upstream_mr_.allocate_async(bytes, alignment, stream);
   }
 
-  void do_deallocate_async(void* ptr,
-                           std::size_t bytes,
-                           std::size_t alignment,
-                           cuda::stream_ref stream) noexcept
-  {
-    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
-      pool_->deallocate_async(ptr, bytes, alignment, stream);
-    } else {
-      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
-    }
-  }
-
   void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
   {
-    return do_allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
-  {
-    return do_allocate_async(bytes, alignment, stream);
+    return allocate_async(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
   }
 
   void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    auto const result = do_allocate_async(bytes, alignment, stream_);
+    auto const result = allocate_async(bytes, alignment, stream_);
     stream_.wait();
     return result;
   }
 
-  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
-  {
-    return do_deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
   void deallocate_async(void* ptr,
                         std::size_t bytes,
                         std::size_t alignment,
                         cuda::stream_ref stream) noexcept
   {
-    return do_deallocate_async(ptr, bytes, alignment, stream);
+    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
+      pool_->deallocate_async(ptr, bytes, alignment, stream);
+    } else {
+      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
+    }
+  }
+
+  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
+  {
+    return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
   }
 
   void deallocate(void* ptr,

From 50f4d3ee3d27a2a0a10a2a2cc8a7f425ab8c82e1 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 4 Jun 2024 15:14:21 -0700
Subject: [PATCH 12/31] comment

---
 cpp/include/cudf/detail/utilities/vector_factories.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 6f859ded456..06dfcbfc5e5 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -442,7 +442,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 }
 
 /**
- * @brief Asynchronously construct a `host_vector` of the given size
+ * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size
  *
  * @note This function does not synchronize `stream`.
  *
@@ -458,7 +458,7 @@ host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view strea
 }
 
 /**
- * @brief Synchronously construct a `cudf::detail::host_vector` of the given size
+ * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size
  *
  * @note This function synchronizes `stream`.
  *

From 9082ccc979383b0bcfa7181c54d097d5036f2904 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 5 Jun 2024 15:14:34 -0700
Subject: [PATCH 13/31] include style

Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com>
---
 cpp/benchmarks/io/cuio_common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index f06938bd721..45dc812e247 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "rmm/mr/pinned_host_memory_resource.hpp"
-#include "rmm/resource_ref.hpp"
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <benchmarks/io/cuio_common.hpp>
 

From 17b1ee0e736a0dbbbf152d99aad3a27de3bc9c3a Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 5 Jun 2024 17:05:40 -0700
Subject: [PATCH 14/31] reviews

---
 cpp/src/io/text/bgzip_data_chunk_source.cu              | 2 +-
 cpp/src/io/text/data_chunk_source_factories.cpp         | 2 +-
 java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 896123d95a9..0e3ce779089 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/detail/utilities/vector_factories.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/text/device_data_chunks.hpp"
 #include "io/utilities/config_utils.hpp"
@@ -22,6 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 1e1671a1545..45096b7155c 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "cudf/detail/utilities/vector_factories.hpp"
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
 #include <rmm/device_buffer.hpp>
diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
index 5ca5bc0db68..df0d9dc7c3e 100644
--- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
+++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
@@ -128,7 +128,7 @@ public static synchronized void initialize(long poolSize, int gpuId) {
    *
    * @param poolSize size of the pool to initialize.
    * @param gpuId    gpu id to set to get memory pool from, -1 means to use default
-   * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuIO for host memory
+   * @param setCudfPinnedPoolMemoryResource true if this pinned pool should be used by cuDF for pinned memory
    */
   public static synchronized void initialize(long poolSize, int gpuId, boolean setCudfPinnedPoolMemoryResource) {
     if (isInitialized()) {

From 2dbb68f9d66e752eef4015082d2c877b8145e068 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 6 Jun 2024 09:52:44 -0700
Subject: [PATCH 15/31] available_device_memory

---
 cpp/src/utilities/pinned_memory.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 47c09217363..d0709c6a40e 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -142,8 +142,7 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
 
       if (config_size.has_value()) { return *config_size; }
 
-      size_t free{}, total{};
-      CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
+      auto const total = rmm::available_device_memory().second;
       // 0.5% of the total device memory, capped at 100MB
       return std::min(total / 200, size_t{100} * 1024 * 1024);
     }();

From cb9cc228bdc8fa74b5a517a149b11f7e6201d71b Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 6 Jun 2024 10:58:48 -0700
Subject: [PATCH 16/31] reviews

---
 cpp/src/utilities/pinned_memory.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index d0709c6a40e..53e0d10c6f4 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -83,7 +83,7 @@ class fixed_pinned_pool_memory_resource {
                         std::size_t alignment,
                         cuda::stream_ref stream) noexcept
   {
-    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr <= pool_end_) {
+    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {
       pool_->deallocate_async(ptr, bytes, alignment, stream);
     } else {
       upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
@@ -113,13 +113,13 @@ class fixed_pinned_pool_memory_resource {
     return !operator==(other);
   }
 
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::device_accessible) noexcept
+  friend void get_property(fixed_pinned_pool_memory_resource const&,
+                           cuda::mr::device_accessible) noexcept
   {
   }
 
-  [[maybe_unused]] friend void get_property(fixed_pinned_pool_memory_resource const&,
-                                            cuda::mr::host_accessible) noexcept
+  friend void get_property(fixed_pinned_pool_memory_resource const&,
+                           cuda::mr::host_accessible) noexcept
   {
   }
 };
@@ -148,7 +148,7 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
     }();
 
     // rmm requires the pool size to be a multiple of 256 bytes
-    auto const aligned_size = (size + 255) & ~255;
+    auto const aligned_size = rmm::align_up(size, rmm::RMM_DEFAULT_HOST_ALIGNMENT);
     CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
 
     // make the pool with max size equal to the initial size

From cf67a14795017eb4a0835bd727acebfff0a066f9 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 6 Jun 2024 11:01:20 -0700
Subject: [PATCH 17/31] expand anon namespace

---
 cpp/src/utilities/pinned_memory.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 53e0d10c6f4..9cebf980d00 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -129,8 +129,6 @@ static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
                                       cuda::mr::host_accessible>,
               "");
 
-}  // namespace
-
 CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
   std::optional<size_t> config_size)
 {
@@ -190,6 +188,8 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr()
   return mr_ref;
 }
 
+}  // namespace
+
 rmm::host_device_async_resource_ref set_pinned_memory_resource(
   rmm::host_device_async_resource_ref mr)
 {

From 24c15498b9ad53ec452a99b94fb767b90f4551a0 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 11:07:20 -0700
Subject: [PATCH 18/31] host_uvector

---
 cpp/benchmarks/io/cuio_common.cpp             |   9 +-
 cpp/benchmarks/io/cuio_common.hpp             |   4 +-
 .../cudf/detail/utilities/host_uvector.hpp    | 142 ++++++++++++++
 .../cudf/detail/utilities/host_vector.hpp     | 183 ------------------
 .../detail/utilities/vector_factories.hpp     |  12 +-
 cpp/include/cudf/utilities/span.hpp           |   6 +
 cpp/src/io/text/bgzip_data_chunk_source.cu    |  10 +-
 .../io/text/data_chunk_source_factories.cpp   |   8 +-
 cpp/src/io/utilities/hostdevice_vector.hpp    |   4 +-
 9 files changed, 172 insertions(+), 206 deletions(-)
 create mode 100644 cpp/include/cudf/detail/utilities/host_uvector.hpp
 delete mode 100644 cpp/include/cudf/detail/utilities/host_vector.hpp

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 45dc812e247..09d7d8a9db6 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
-
 #include <benchmarks/io/cuio_common.hpp>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <unistd.h>
 
@@ -52,7 +53,7 @@ std::string random_file_in_dir(std::string const& dir_path)
 
 cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
   : type{type},
-    pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
+    pinned_buffer(0, pinned_memory_resource(), cudf::get_default_stream()),
     d_buffer{0, cudf::get_default_stream()},
     file_name{random_file_in_dir(tmpdir.path())},
     void_sink{cudf::io::data_sink::create()}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 64d6021cf50..020fd7e00c1 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/host_uvector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::host_vector<char> pinned_buffer;
+  cudf::detail::host_uvector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;
diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp
new file mode 100644
index 00000000000..39bde04e985
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp
@@ -0,0 +1,142 @@
+/*
+ *  Copyright 2024 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <algorithm>
+#include <cstddef>
+
+namespace cudf::detail {
+
+template <typename T>
+class host_uvector {
+ public:
+  host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
+    : _size{size}, _capacity{size}, _mr{mr}, _stream{stream}
+  {
+    if (_size != 0) { _data = static_cast<T*>(mr.allocate_async(_size * sizeof(T), _stream)); }
+  }
+
+  host_uvector(host_uvector const&) = delete;
+  host_uvector(host_uvector&& other)
+    : _data{other._data},
+      _size{other._size},
+      _capacity{other._capacity},
+      _mr{other._mr},
+      _stream{other._stream}
+  {
+    other._data     = nullptr;
+    other._size     = 0;
+    other._capacity = 0;
+  }
+
+  host_uvector& operator=(host_uvector const&) = delete;
+  host_uvector& operator=(host_uvector&& other)
+  {
+    if (this != &other) {
+      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
+      _data           = other._data;
+      _size           = other._size;
+      _capacity       = other._capacity;
+      _mr             = other._mr;
+      _stream         = other._stream;
+      other._data     = nullptr;
+      other._size     = 0;
+      other._capacity = 0;
+    }
+    return *this;
+  }
+
+  ~host_uvector()
+  {
+    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
+  }
+
+  void resize(std::size_t new_size)
+  {
+    if (new_size > _capacity) {
+      auto new_data = static_cast<T*>(_mr.allocate_async(new_size * sizeof(T), _stream));
+      _stream.synchronize();
+      if (_data != nullptr) {
+        std::copy(_data, _data + _size, new_data);
+        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
+      }
+      _data     = new_data;
+      _capacity = new_size;
+    }
+    _size = new_size;
+  }
+
+  void reserve(std::size_t new_capacity)
+  {
+    if (new_capacity > _capacity) {
+      auto new_data = static_cast<T*>(_mr.allocate_async(new_capacity * sizeof(T), _stream));
+      _stream.synchronize();
+      if (_data != nullptr) {
+        std::copy(_data, _data + _size, new_data);
+        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
+      }
+      _data     = new_data;
+      _capacity = new_capacity;
+    }
+  }
+
+  void push_back(T const& value)
+  {
+    if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); }
+    _data[_size++] = value;
+  }
+
+  void clear() { _size = 0; }
+
+  [[nodiscard]] std::size_t size() const { return _size; }
+  [[nodiscard]] std::int64_t ssize() const { return _size; }
+  [[nodiscard]] bool is_empty() const { return _size == 0; }
+  [[nodiscard]] std::size_t capacity() const { return _capacity; }
+
+  [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; }
+  [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; }
+
+  [[nodiscard]] T* data() { return _data; }
+  [[nodiscard]] T const* data() const { return _data; }
+
+  [[nodiscard]] T& front() { return _data[0]; }
+  [[nodiscard]] T const& front() const { return _data[0]; }
+
+  [[nodiscard]] T& back() { return _data[_size - 1]; }
+  [[nodiscard]] T const& back() const { return _data[_size - 1]; }
+
+  [[nodiscard]] T* begin() { return _data; }
+  [[nodiscard]] T const* begin() const { return _data; }
+
+  [[nodiscard]] T* end() { return _data + _size; }
+  [[nodiscard]] T const* end() const { return _data + _size; }
+
+  [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; }
+  [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; }
+
+ private:
+  T* _data{nullptr};
+  std::size_t _size;
+  std::size_t _capacity;
+  rmm::host_async_resource_ref _mr;
+  rmm::cuda_stream_view _stream;
+};
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
deleted file mode 100644
index e62c8017f8b..00000000000
--- a/cpp/include/cudf/detail/utilities/host_vector.hpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- *  Copyright 2024 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/aligned.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/host_vector.h>
-
-#include <cstddef>
-#include <limits>
-#include <new>  // for bad_alloc
-
-namespace cudf::detail {
-
-/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
- *  that employs \c a `rmm::host_async_resource_ref` for allocation.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class rmm_host_allocator;
-
-/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
- *  that employs \c an `cudf::host_async_resource_ref` for allocation.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <>
-class rmm_host_allocator<void> {
- public:
-  using value_type      = void;            ///< The type of the elements in the allocator
-  using pointer         = void*;           ///< The type returned by address() / allocate()
-  using const_pointer   = void const*;     ///< The type returned by address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  /**
-   * @brief converts a `rmm_host_allocator<void>` to `rmm_host_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = rmm_host_allocator<U>;  ///< The rebound type
-  };
-};
-
-/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
- *  that employs \c `rmm::host_async_resource_ref` for allocation.
- *
- * The \p rmm_host_allocator provides an interface for host memory allocation through the user
- * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
- * this reference and therefore it is the user's responsibility to ensure its lifetime for the
- * duration of the lifetime of the \p rmm_host_allocator.
- *
- *  \see https://en.cppreference.com/w/cpp/memory/allocator
- */
-template <typename T>
-class rmm_host_allocator {
- public:
-  using value_type      = T;               ///< The type of the elements in the allocator
-  using pointer         = T*;              ///< The type returned by address() / allocate()
-  using const_pointer   = T const*;        ///< The type returned by address()
-  using reference       = T&;              ///< The parameter type for address()
-  using const_reference = T const&;        ///< The parameter type for address()
-  using size_type       = std::size_t;     ///< The type used for the size of the allocation
-  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
-
-  typedef cuda::std::true_type propagate_on_container_move_assignment;
-
-  /**
-   * @brief converts a `rmm_host_allocator<T>` to `rmm_host_allocator<U>`
-   */
-  template <typename U>
-  struct rebind {
-    using other = rmm_host_allocator<U>;  ///< The rebound type
-  };
-
-  /**
-   * @brief Cannot declare an empty host allocator.
-   */
-  rmm_host_allocator() = delete;
-
-  /**
-   * @brief Construct from a `cudf::host_async_resource_ref`
-   */
-  rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream)
-    : mr(_mr), stream(_stream)
-  {
-  }
-
-  /**
-   * @brief This method allocates storage for objects in host memory.
-   *
-   *  @param cnt The number of objects to allocate.
-   *  @return a \c pointer to the newly allocated objects.
-   *  @note This method does not invoke \p value_type's constructor.
-   *        It is the responsibility of the caller to initialize the
-   *        objects at the returned \c pointer.
-   */
-  inline pointer allocate(size_type cnt)
-  {
-    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-    return static_cast<pointer>(
-      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
-  }
-
-  /**
-   * @brief This method deallocates host memory previously allocated
-   *  with this \c rmm_host_allocator.
-   *
-   *  @param p A \c pointer to the previously allocated memory.
-   *  @note The second parameter is the number of objects previously allocated.
-   *  @note This method does not invoke \p value_type's destructor.
-   *        It is the responsibility of the caller to destroy
-   *        the objects stored at \p p.
-   */
-  inline void deallocate(pointer p, size_type cnt)
-  {
-    mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-  }
-
-  /**
-   * @brief This method returns the maximum size of the \c cnt parameter
-   *  accepted by the \p allocate() method.
-   *
-   *  @return The maximum number of objects that may be allocated
-   *          by a single call to \p allocate().
-   */
-  constexpr inline size_type max_size() const
-  {
-    return (std::numeric_limits<size_type>::max)() / sizeof(T);
-  }
-
-  /**
-   * @brief This method tests this \p rmm_host_allocator for equality to
-   *  another.
-   *
-   *  @param x The other \p rmm_host_allocator of interest.
-   *  @return This method always returns \c true.
-   */
-  inline bool operator==(rmm_host_allocator const& x) const
-  {
-    return x.mr == mr && x.stream == stream;
-  }
-
-  /**
-   * @brief This method tests this \p rmm_host_allocator for inequality
-   *  to another.
-   *
-   *  @param x The other \p rmm_host_allocator of interest.
-   *  @return This method always returns \c false.
-   */
-  inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }
-
- private:
-  rmm::host_async_resource_ref mr;
-  rmm::cuda_stream_view stream;
-};
-
-/**
- * @brief A vector class with rmm host memory allocator
- */
-template <typename T>
-using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
-
-}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 06dfcbfc5e5..f67b671c610 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -21,7 +21,7 @@
  * @file vector_factories.hpp
  */
 
-#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/host_uvector.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/pinned_memory.hpp>
@@ -442,7 +442,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 }
 
 /**
- * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size
+ * @brief Asynchronously construct a pinned `cudf::detail::host_uvector` of the given size
  *
  * @note This function does not synchronize `stream`.
  *
@@ -452,13 +452,13 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
  * @return A host_vector of the given size
  */
 template <typename T>
-host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
+host_uvector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
 {
-  return host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
+  return host_uvector<T>(size, cudf::get_pinned_memory_resource(), stream);
 }
 
 /**
- * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size
+ * @brief Synchronously construct a pinned `cudf::detail::host_uvector` of the given size
  *
  * @note This function synchronizes `stream`.
  *
@@ -468,7 +468,7 @@ host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view strea
  * @return A host_vector of the given size
  */
 template <typename T>
-host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
+host_uvector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
 {
   auto result = make_pinned_vector_async<T>(size, stream);
   stream.synchronize();
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 47e92d61a9f..873d3e56acb 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <cudf/detail/utilities/host_uvector.hpp>
+
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/device_vector.hpp>
@@ -212,6 +214,10 @@ template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
   thrust::host_vector<T, Alloc>> : std::true_type {};
 
+template <typename T>
+struct is_host_span_supported_container<  //
+  cudf::detail::host_uvector<T>> : std::true_type {};
+
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
   std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {};
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 0e3ce779089..b7644a6fb9f 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -19,7 +19,7 @@
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/host_uvector.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
@@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  static void copy_to_device(cudf::detail::host_vector<T> const& host,
+  static void copy_to_device(cudf::detail::host_uvector<T> const& host,
                              rmm::device_uvector<T>& device,
                              rmm::cuda_stream_view stream)
   {
@@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       1 << 16;  // 64k offset allocation, resized on demand
 
     cudaEvent_t event;
-    cudf::detail::host_vector<char> h_compressed_blocks;
-    cudf::detail::host_vector<std::size_t> h_compressed_offsets;
-    cudf::detail::host_vector<std::size_t> h_decompressed_offsets;
+    cudf::detail::host_uvector<char> h_compressed_blocks;
+    cudf::detail::host_uvector<std::size_t> h_compressed_offsets;
+    cudf::detail::host_uvector<std::size_t> h_decompressed_offsets;
     rmm::device_uvector<char> d_compressed_blocks;
     rmm::device_uvector<char> d_decompressed_blocks;
     rmm::device_uvector<std::size_t> d_compressed_offsets;
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 45096b7155c..2c4160e48c5 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -17,7 +17,7 @@
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/host_uvector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
@@ -33,7 +33,7 @@ namespace {
 
 struct host_ticket {
   cudaEvent_t event;
-  std::unique_ptr<cudf::detail::host_vector<char>> buffer;
+  std::unique_ptr<cudf::detail::host_uvector<char>> buffer;
 };
 
 /**
@@ -86,7 +86,7 @@ class datasource_chunk_reader : public data_chunk_reader {
 
       // resize the host buffer as necessary to contain the requested number of bytes
       if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-        h_ticket.buffer = std::make_unique<cudf::detail::host_vector<char>>(
+        h_ticket.buffer = std::make_unique<cudf::detail::host_uvector<char>>(
           cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
       }
 
@@ -153,7 +153,7 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
     // resize the host buffer as necessary to contain the requested number of bytes
     if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-      h_ticket.buffer = std::make_unique<cudf::detail::host_vector<char>>(
+      h_ticket.buffer = std::make_unique<cudf::detail::host_uvector<char>>(
         cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
     }
 
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index 1ae27a2f4ae..ae2ab03ded3 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -18,7 +18,7 @@
 
 #include "hostdevice_span.hpp"
 
-#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/detail/utilities/host_uvector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -172,7 +172,7 @@ class hostdevice_vector {
   }
 
  private:
-  cudf::detail::host_vector<T> h_data;
+  cudf::detail::host_uvector<T> h_data;
   rmm::device_uvector<T> d_data;
 };
 

From 075deca7c87b70b62f30a5b8a266da39a3e852cb Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 11:33:10 -0700
Subject: [PATCH 19/31] style

---
 cpp/include/cudf/utilities/span.hpp | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 873d3e56acb..2f622612209 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -204,23 +204,28 @@ class span_base {
 // ===== host_span =================================================================================
 
 template <typename T>
-struct is_host_span_supported_container : std::false_type {};
+struct is_host_span_supported_container : std::false_type {
+};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  std::vector<T, Alloc>> : std::true_type {};
+  std::vector<T, Alloc>> : std::true_type {
+};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  thrust::host_vector<T, Alloc>> : std::true_type {};
+  thrust::host_vector<T, Alloc>> : std::true_type {
+};
 
 template <typename T>
 struct is_host_span_supported_container<  //
-  cudf::detail::host_uvector<T>> : std::true_type {};
+  cudf::detail::host_uvector<T>> : std::true_type {
+};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {};
+  std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {
+};
 
 /**
  * @brief C++20 std::span with reduced feature set.
@@ -275,19 +280,23 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
 // ===== device_span ===============================================================================
 
 template <typename T>
-struct is_device_span_supported_container : std::false_type {};
+struct is_device_span_supported_container : std::false_type {
+};
 
 template <typename T, typename Alloc>
 struct is_device_span_supported_container<  //
-  thrust::device_vector<T, Alloc>> : std::true_type {};
+  thrust::device_vector<T, Alloc>> : std::true_type {
+};
 
 template <typename T>
 struct is_device_span_supported_container<  //
-  rmm::device_vector<T>> : std::true_type {};
+  rmm::device_vector<T>> : std::true_type {
+};
 
 template <typename T>
 struct is_device_span_supported_container<  //
-  rmm::device_uvector<T>> : std::true_type {};
+  rmm::device_uvector<T>> : std::true_type {
+};
 
 /**
  * @brief Device version of C++20 std::span with reduced feature set.

From 164fce20ad07632b5a9899668d9da7d23ced6b97 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 11:53:00 -0700
Subject: [PATCH 20/31] docs; prefixes

---
 cpp/src/utilities/pinned_memory.cpp | 43 ++++++++++++++++-------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 9cebf980d00..85d4b7e2283 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -28,34 +28,39 @@
 namespace cudf {
 
 namespace {
+
+// Asynchronous memory resource that allocates a fixed-size pool of pinned memory and falls back to
+// additional pinned allocations if the pool is exhausted.
 class fixed_pinned_pool_memory_resource {
   using upstream_mr    = rmm::mr::pinned_host_memory_resource;
   using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
 
  private:
-  upstream_mr upstream_mr_{};
-  size_t pool_size_{0};
+  upstream_mr _upstream_mr{};
+  size_t _pool_size{0};
   // Raw pointer to avoid a segfault when the pool is destroyed on exit
   host_pooled_mr* pool_{nullptr};
-  void* pool_begin_{nullptr};
-  void* pool_end_{nullptr};
-  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
+  // The beginning and end of the pool memory range; pool is never reallocated so these are constant
+  // and can be used to determine if a pointer is within the pool
+  void* _pool_begin{nullptr};
+  void* _pool_end{nullptr};
+  cuda::stream_ref _stream{cudf::detail::global_cuda_stream_pool().get_stream().value()};
 
  public:
   fixed_pinned_pool_memory_resource(size_t size)
-    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
+    : _pool_size{size}, pool_{new host_pooled_mr(_upstream_mr, size, size)}
   {
-    if (pool_size_ == 0) { return; }
+    if (_pool_size == 0) { return; }
 
     // Allocate full size from the pinned pool to figure out the beginning and end address
-    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
-    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
-    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
+    _pool_begin = pool_->allocate_async(_pool_size, _stream);
+    _pool_end   = static_cast<void*>(static_cast<uint8_t*>(_pool_begin) + _pool_size);
+    pool_->deallocate_async(_pool_begin, _pool_size, _stream);
   }
 
   void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
   {
-    if (bytes <= pool_size_) {
+    if (bytes <= _pool_size) {
       try {
         return pool_->allocate_async(bytes, alignment, stream);
       } catch (...) {
@@ -63,7 +68,7 @@ class fixed_pinned_pool_memory_resource {
       }
     }
 
-    return upstream_mr_.allocate_async(bytes, alignment, stream);
+    return _upstream_mr.allocate_async(bytes, alignment, stream);
   }
 
   void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
@@ -73,8 +78,8 @@ class fixed_pinned_pool_memory_resource {
 
   void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    auto const result = allocate_async(bytes, alignment, stream_);
-    stream_.wait();
+    auto const result = allocate_async(bytes, alignment, _stream);
+    _stream.wait();
     return result;
   }
 
@@ -83,10 +88,10 @@ class fixed_pinned_pool_memory_resource {
                         std::size_t alignment,
                         cuda::stream_ref stream) noexcept
   {
-    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {
+    if (bytes <= _pool_size && ptr >= _pool_begin && ptr < _pool_end) {
       pool_->deallocate_async(ptr, bytes, alignment, stream);
     } else {
-      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
+      _upstream_mr.deallocate_async(ptr, bytes, alignment, stream);
     }
   }
 
@@ -99,13 +104,13 @@ class fixed_pinned_pool_memory_resource {
                   std::size_t bytes,
                   std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
-    deallocate_async(ptr, bytes, alignment, stream_);
-    stream_.wait();
+    deallocate_async(ptr, bytes, alignment, _stream);
+    _stream.wait();
   }
 
   bool operator==(fixed_pinned_pool_memory_resource const& other) const
   {
-    return pool_ == other.pool_ and stream_ == other.stream_;
+    return pool_ == other.pool_ and _stream == other._stream;
   }
 
   bool operator!=(fixed_pinned_pool_memory_resource const& other) const

From b566babb87696cf54656605ab76e9e25b5c42bed Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 12:00:40 -0700
Subject: [PATCH 21/31] type aliases in host_uvector

---
 .../cudf/detail/utilities/host_uvector.hpp    | 69 +++++++++++--------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp
index 39bde04e985..c8166217a73 100644
--- a/cpp/include/cudf/detail/utilities/host_uvector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp
@@ -27,10 +27,21 @@ namespace cudf::detail {
 template <typename T>
 class host_uvector {
  public:
-  host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
+  using value_type      = T;
+  using size_type       = std::size_t;
+  using reference       = value_type&;
+  using const_reference = value_type const&;
+  using pointer         = value_type*;
+  using const_pointer   = value_type const*;
+  using iterator        = pointer;
+  using const_iterator  = const_pointer;
+
+  host_uvector(size_type size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
     : _size{size}, _capacity{size}, _mr{mr}, _stream{stream}
   {
-    if (_size != 0) { _data = static_cast<T*>(mr.allocate_async(_size * sizeof(T), _stream)); }
+    if (_size != 0) {
+      _data = static_cast<pointer>(mr.allocate_async(_size * sizeof(value_type), _stream));
+    }
   }
 
   host_uvector(host_uvector const&) = delete;
@@ -50,7 +61,7 @@ class host_uvector {
   host_uvector& operator=(host_uvector&& other)
   {
     if (this != &other) {
-      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
+      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); }
       _data           = other._data;
       _size           = other._size;
       _capacity       = other._capacity;
@@ -65,17 +76,18 @@ class host_uvector {
 
   ~host_uvector()
   {
-    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
+    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); }
   }
 
-  void resize(std::size_t new_size)
+  void resize(size_type new_size)
   {
     if (new_size > _capacity) {
-      auto new_data = static_cast<T*>(_mr.allocate_async(new_size * sizeof(T), _stream));
+      auto new_data =
+        static_cast<pointer>(_mr.allocate_async(new_size * sizeof(value_type), _stream));
       _stream.synchronize();
       if (_data != nullptr) {
         std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
+        _mr.deallocate_async(_data, _size * sizeof(value_type), _stream);
       }
       _data     = new_data;
       _capacity = new_size;
@@ -83,21 +95,22 @@ class host_uvector {
     _size = new_size;
   }
 
-  void reserve(std::size_t new_capacity)
+  void reserve(size_type new_capacity)
   {
     if (new_capacity > _capacity) {
-      auto new_data = static_cast<T*>(_mr.allocate_async(new_capacity * sizeof(T), _stream));
+      auto new_data =
+        static_cast<pointer>(_mr.allocate_async(new_capacity * sizeof(value_type), _stream));
       _stream.synchronize();
       if (_data != nullptr) {
         std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
+        _mr.deallocate_async(_data, _size * sizeof(value_type), _stream);
       }
       _data     = new_data;
       _capacity = new_capacity;
     }
   }
 
-  void push_back(T const& value)
+  void push_back(const_reference value)
   {
     if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); }
     _data[_size++] = value;
@@ -105,36 +118,36 @@ class host_uvector {
 
   void clear() { _size = 0; }
 
-  [[nodiscard]] std::size_t size() const { return _size; }
+  [[nodiscard]] size_type size() const { return _size; }
   [[nodiscard]] std::int64_t ssize() const { return _size; }
   [[nodiscard]] bool is_empty() const { return _size == 0; }
-  [[nodiscard]] std::size_t capacity() const { return _capacity; }
+  [[nodiscard]] size_type capacity() const { return _capacity; }
 
-  [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; }
-  [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; }
+  [[nodiscard]] reference operator[](size_type idx) { return _data[idx]; }
+  [[nodiscard]] const_reference operator[](size_type idx) const { return _data[idx]; }
 
-  [[nodiscard]] T* data() { return _data; }
-  [[nodiscard]] T const* data() const { return _data; }
+  [[nodiscard]] pointer data() { return _data; }
+  [[nodiscard]] const_pointer data() const { return _data; }
 
-  [[nodiscard]] T& front() { return _data[0]; }
-  [[nodiscard]] T const& front() const { return _data[0]; }
+  [[nodiscard]] reference front() { return _data[0]; }
+  [[nodiscard]] const_reference front() const { return _data[0]; }
 
-  [[nodiscard]] T& back() { return _data[_size - 1]; }
-  [[nodiscard]] T const& back() const { return _data[_size - 1]; }
+  [[nodiscard]] reference back() { return _data[_size - 1]; }
+  [[nodiscard]] const_reference back() const { return _data[_size - 1]; }
 
-  [[nodiscard]] T* begin() { return _data; }
-  [[nodiscard]] T const* begin() const { return _data; }
+  [[nodiscard]] iterator begin() { return _data; }
+  [[nodiscard]] const_iterator begin() const { return _data; }
 
-  [[nodiscard]] T* end() { return _data + _size; }
-  [[nodiscard]] T const* end() const { return _data + _size; }
+  [[nodiscard]] iterator end() { return _data + _size; }
+  [[nodiscard]] const_iterator end() const { return _data + _size; }
 
   [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; }
   [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; }
 
  private:
-  T* _data{nullptr};
-  std::size_t _size;
-  std::size_t _capacity;
+  pointer _data{nullptr};
+  size_type _size;
+  size_type _capacity;
   rmm::host_async_resource_ref _mr;
   rmm::cuda_stream_view _stream;
 };

From 21edb534a15c836963c116f3c9ca360cadb1844c Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 13:24:37 -0700
Subject: [PATCH 22/31] refactor host_ticket

---
 .../io/text/data_chunk_source_factories.cpp   | 56 +++++++------------
 1 file changed, 19 insertions(+), 37 deletions(-)

diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 2c4160e48c5..39e955232e3 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cudf/utilities/default_stream.hpp"
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -32,8 +33,15 @@ namespace cudf::io::text {
 namespace {
 
 struct host_ticket {
-  cudaEvent_t event;
-  std::unique_ptr<cudf::detail::host_uvector<char>> buffer;
+  cudaEvent_t event{};  // tracks the completion of the last device-to-host copy.
+  cudf::detail::host_uvector<char> buffer;
+
+  host_ticket() : buffer{cudf::detail::make_pinned_vector_sync<char>(0, cudf::get_default_stream())}
+  {
+    cudaEventCreate(&event);
+  }
+
+  ~host_ticket() { cudaEventDestroy(event); }
 };
 
 /**
@@ -44,20 +52,7 @@ class datasource_chunk_reader : public data_chunk_reader {
   constexpr static int num_tickets = 2;
 
  public:
-  datasource_chunk_reader(datasource* source) : _source(source)
-  {
-    // create an event to track the completion of the last device-to-host copy.
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event)));
-    }
-  }
-
-  ~datasource_chunk_reader() override
-  {
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventDestroy(ticket.event));
-    }
-  }
+  datasource_chunk_reader(datasource* source) : _source(source) {}
 
   void skip_bytes(std::size_t size) override
   {
@@ -85,16 +80,15 @@ class datasource_chunk_reader : public data_chunk_reader {
       CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
       // resize the host buffer as necessary to contain the requested number of bytes
-      if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-        h_ticket.buffer = std::make_unique<cudf::detail::host_uvector<char>>(
-          cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
+      if (h_ticket.buffer.size() < read_size) {
+        h_ticket.buffer = cudf::detail::make_pinned_vector_sync<char>(read_size, stream);
       }
 
-      _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer->data()));
+      _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer.data()));
 
       // copy the host-pinned data on to device
       CUDF_CUDA_TRY(cudaMemcpyAsync(
-        chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value()));
+        chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value()));
 
       // record the host-to-device copy.
       CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));
@@ -124,17 +118,6 @@ class istream_data_chunk_reader : public data_chunk_reader {
   istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
     : _datastream(std::move(datastream))
   {
-    // create an event to track the completion of the last device-to-host copy.
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event)));
-    }
-  }
-
-  ~istream_data_chunk_reader() override
-  {
-    for (auto& ticket : _tickets) {
-      CUDF_CUDA_TRY(cudaEventDestroy(ticket.event));
-    }
   }
 
   void skip_bytes(std::size_t size) override { _datastream->ignore(size); };
@@ -152,13 +135,12 @@ class istream_data_chunk_reader : public data_chunk_reader {
     CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
 
     // resize the host buffer as necessary to contain the requested number of bytes
-    if (h_ticket.buffer == nullptr or h_ticket.buffer->size() < read_size) {
-      h_ticket.buffer = std::make_unique<cudf::detail::host_uvector<char>>(
-        cudf::detail::make_pinned_vector_sync<char>(read_size, stream));
+    if (h_ticket.buffer.size() < read_size) {
+      h_ticket.buffer = cudf::detail::make_pinned_vector_sync<char>(read_size, stream);
     }
 
     // read data from the host istream in to the pinned host memory buffer
-    _datastream->read(h_ticket.buffer->data(), read_size);
+    _datastream->read(h_ticket.buffer.data(), read_size);
 
     // adjust the read size to reflect how many bytes were actually read from the data stream
     read_size = _datastream->gcount();
@@ -168,7 +150,7 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
     // copy the host-pinned data on to device
     CUDF_CUDA_TRY(cudaMemcpyAsync(
-      chunk.data(), h_ticket.buffer->data(), read_size, cudaMemcpyDefault, stream.value()));
+      chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value()));
 
     // record the host-to-device copy.
     CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));

From 3814797d5b2d3478901e14f9ecbb733d2168a06a Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 7 Jun 2024 13:33:50 -0700
Subject: [PATCH 23/31] style

---
 cpp/include/cudf/utilities/span.hpp | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 2f622612209..6deef974c0e 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -204,28 +204,23 @@ class span_base {
 // ===== host_span =================================================================================
 
 template <typename T>
-struct is_host_span_supported_container : std::false_type {
-};
+struct is_host_span_supported_container : std::false_type {};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  std::vector<T, Alloc>> : std::true_type {
-};
+  std::vector<T, Alloc>> : std::true_type {};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  thrust::host_vector<T, Alloc>> : std::true_type {
-};
+  thrust::host_vector<T, Alloc>> : std::true_type {};
 
 template <typename T>
 struct is_host_span_supported_container<  //
-  cudf::detail::host_uvector<T>> : std::true_type {
-};
+  cudf::detail::host_uvector<T>> : std::true_type {};
 
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
-  std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {
-};
+  std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {};
 
 /**
  * @brief C++20 std::span with reduced feature set.
@@ -280,23 +275,19 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
 // ===== device_span ===============================================================================
 
 template <typename T>
-struct is_device_span_supported_container : std::false_type {
-};
+struct is_device_span_supported_container : std::false_type {};
 
 template <typename T, typename Alloc>
 struct is_device_span_supported_container<  //
-  thrust::device_vector<T, Alloc>> : std::true_type {
-};
+  thrust::device_vector<T, Alloc>> : std::true_type {};
 
 template <typename T>
 struct is_device_span_supported_container<  //
-  rmm::device_vector<T>> : std::true_type {
-};
+  rmm::device_vector<T>> : std::true_type {};
 
 template <typename T>
 struct is_device_span_supported_container<  //
-  rmm::device_uvector<T>> : std::true_type {
-};
+  rmm::device_uvector<T>> : std::true_type {};
 
 /**
  * @brief Device version of C++20 std::span with reduced feature set.

From f7999aae606269e187de88279f96d5034ad48753 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 16:58:00 -0700
Subject: [PATCH 24/31] Revert "type aliases in host_uvector"

This reverts commit b566babb87696cf54656605ab76e9e25b5c42bed.
---
 .../cudf/detail/utilities/host_uvector.hpp    | 69 ++++++++-----------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp
index c8166217a73..39bde04e985 100644
--- a/cpp/include/cudf/detail/utilities/host_uvector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_uvector.hpp
@@ -27,21 +27,10 @@ namespace cudf::detail {
 template <typename T>
 class host_uvector {
  public:
-  using value_type      = T;
-  using size_type       = std::size_t;
-  using reference       = value_type&;
-  using const_reference = value_type const&;
-  using pointer         = value_type*;
-  using const_pointer   = value_type const*;
-  using iterator        = pointer;
-  using const_iterator  = const_pointer;
-
-  host_uvector(size_type size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
+  host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
     : _size{size}, _capacity{size}, _mr{mr}, _stream{stream}
   {
-    if (_size != 0) {
-      _data = static_cast<pointer>(mr.allocate_async(_size * sizeof(value_type), _stream));
-    }
+    if (_size != 0) { _data = static_cast<T*>(mr.allocate_async(_size * sizeof(T), _stream)); }
   }
 
   host_uvector(host_uvector const&) = delete;
@@ -61,7 +50,7 @@ class host_uvector {
   host_uvector& operator=(host_uvector&& other)
   {
     if (this != &other) {
-      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); }
+      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
       _data           = other._data;
       _size           = other._size;
       _capacity       = other._capacity;
@@ -76,18 +65,17 @@ class host_uvector {
 
   ~host_uvector()
   {
-    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(value_type), _stream); }
+    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
   }
 
-  void resize(size_type new_size)
+  void resize(std::size_t new_size)
   {
     if (new_size > _capacity) {
-      auto new_data =
-        static_cast<pointer>(_mr.allocate_async(new_size * sizeof(value_type), _stream));
+      auto new_data = static_cast<T*>(_mr.allocate_async(new_size * sizeof(T), _stream));
       _stream.synchronize();
       if (_data != nullptr) {
         std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(value_type), _stream);
+        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
       }
       _data     = new_data;
       _capacity = new_size;
@@ -95,22 +83,21 @@ class host_uvector {
     _size = new_size;
   }
 
-  void reserve(size_type new_capacity)
+  void reserve(std::size_t new_capacity)
   {
     if (new_capacity > _capacity) {
-      auto new_data =
-        static_cast<pointer>(_mr.allocate_async(new_capacity * sizeof(value_type), _stream));
+      auto new_data = static_cast<T*>(_mr.allocate_async(new_capacity * sizeof(T), _stream));
       _stream.synchronize();
       if (_data != nullptr) {
         std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(value_type), _stream);
+        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
       }
       _data     = new_data;
       _capacity = new_capacity;
     }
   }
 
-  void push_back(const_reference value)
+  void push_back(T const& value)
   {
     if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); }
     _data[_size++] = value;
@@ -118,36 +105,36 @@ class host_uvector {
 
   void clear() { _size = 0; }
 
-  [[nodiscard]] size_type size() const { return _size; }
+  [[nodiscard]] std::size_t size() const { return _size; }
   [[nodiscard]] std::int64_t ssize() const { return _size; }
   [[nodiscard]] bool is_empty() const { return _size == 0; }
-  [[nodiscard]] size_type capacity() const { return _capacity; }
+  [[nodiscard]] std::size_t capacity() const { return _capacity; }
 
-  [[nodiscard]] reference operator[](size_type idx) { return _data[idx]; }
-  [[nodiscard]] const_reference operator[](size_type idx) const { return _data[idx]; }
+  [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; }
+  [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; }
 
-  [[nodiscard]] pointer data() { return _data; }
-  [[nodiscard]] const_pointer data() const { return _data; }
+  [[nodiscard]] T* data() { return _data; }
+  [[nodiscard]] T const* data() const { return _data; }
 
-  [[nodiscard]] reference front() { return _data[0]; }
-  [[nodiscard]] const_reference front() const { return _data[0]; }
+  [[nodiscard]] T& front() { return _data[0]; }
+  [[nodiscard]] T const& front() const { return _data[0]; }
 
-  [[nodiscard]] reference back() { return _data[_size - 1]; }
-  [[nodiscard]] const_reference back() const { return _data[_size - 1]; }
+  [[nodiscard]] T& back() { return _data[_size - 1]; }
+  [[nodiscard]] T const& back() const { return _data[_size - 1]; }
 
-  [[nodiscard]] iterator begin() { return _data; }
-  [[nodiscard]] const_iterator begin() const { return _data; }
+  [[nodiscard]] T* begin() { return _data; }
+  [[nodiscard]] T const* begin() const { return _data; }
 
-  [[nodiscard]] iterator end() { return _data + _size; }
-  [[nodiscard]] const_iterator end() const { return _data + _size; }
+  [[nodiscard]] T* end() { return _data + _size; }
+  [[nodiscard]] T const* end() const { return _data + _size; }
 
   [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; }
   [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; }
 
  private:
-  pointer _data{nullptr};
-  size_type _size;
-  size_type _capacity;
+  T* _data{nullptr};
+  std::size_t _size;
+  std::size_t _capacity;
   rmm::host_async_resource_ref _mr;
   rmm::cuda_stream_view _stream;
 };

From c9a82d010a997d4c1f4afad94b36709e859d98fe Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 16:58:14 -0700
Subject: [PATCH 25/31] Revert "docs; prefixes"

This reverts commit 164fce20ad07632b5a9899668d9da7d23ced6b97.
---
 cpp/src/utilities/pinned_memory.cpp | 43 +++++++++++++----------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 85d4b7e2283..9cebf980d00 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -28,39 +28,34 @@
 namespace cudf {
 
 namespace {
-
-// Asynchronous memory resource that allocates a fixed-size pool of pinned memory and falls back to
-// additional pinned allocations if the pool is exhausted.
 class fixed_pinned_pool_memory_resource {
   using upstream_mr    = rmm::mr::pinned_host_memory_resource;
   using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
 
  private:
-  upstream_mr _upstream_mr{};
-  size_t _pool_size{0};
+  upstream_mr upstream_mr_{};
+  size_t pool_size_{0};
   // Raw pointer to avoid a segfault when the pool is destroyed on exit
   host_pooled_mr* pool_{nullptr};
-  // The beginning and end of the pool memory range; pool is never reallocated so these are constant
-  // and can be used to determine if a pointer is within the pool
-  void* _pool_begin{nullptr};
-  void* _pool_end{nullptr};
-  cuda::stream_ref _stream{cudf::detail::global_cuda_stream_pool().get_stream().value()};
+  void* pool_begin_{nullptr};
+  void* pool_end_{nullptr};
+  cuda::stream_ref stream_{cudf::detail::global_cuda_stream_pool().get_stream().value()};
 
  public:
   fixed_pinned_pool_memory_resource(size_t size)
-    : _pool_size{size}, pool_{new host_pooled_mr(_upstream_mr, size, size)}
+    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
   {
-    if (_pool_size == 0) { return; }
+    if (pool_size_ == 0) { return; }
 
     // Allocate full size from the pinned pool to figure out the beginning and end address
-    _pool_begin = pool_->allocate_async(_pool_size, _stream);
-    _pool_end   = static_cast<void*>(static_cast<uint8_t*>(_pool_begin) + _pool_size);
-    pool_->deallocate_async(_pool_begin, _pool_size, _stream);
+    pool_begin_ = pool_->allocate_async(pool_size_, stream_);
+    pool_end_   = static_cast<void*>(static_cast<uint8_t*>(pool_begin_) + pool_size_);
+    pool_->deallocate_async(pool_begin_, pool_size_, stream_);
   }
 
   void* allocate_async(std::size_t bytes, std::size_t alignment, cuda::stream_ref stream)
   {
-    if (bytes <= _pool_size) {
+    if (bytes <= pool_size_) {
       try {
         return pool_->allocate_async(bytes, alignment, stream);
       } catch (...) {
@@ -68,7 +63,7 @@ class fixed_pinned_pool_memory_resource {
       }
     }
 
-    return _upstream_mr.allocate_async(bytes, alignment, stream);
+    return upstream_mr_.allocate_async(bytes, alignment, stream);
   }
 
   void* allocate_async(std::size_t bytes, cuda::stream_ref stream)
@@ -78,8 +73,8 @@ class fixed_pinned_pool_memory_resource {
 
   void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
-    auto const result = allocate_async(bytes, alignment, _stream);
-    _stream.wait();
+    auto const result = allocate_async(bytes, alignment, stream_);
+    stream_.wait();
     return result;
   }
 
@@ -88,10 +83,10 @@ class fixed_pinned_pool_memory_resource {
                         std::size_t alignment,
                         cuda::stream_ref stream) noexcept
   {
-    if (bytes <= _pool_size && ptr >= _pool_begin && ptr < _pool_end) {
+    if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {
       pool_->deallocate_async(ptr, bytes, alignment, stream);
     } else {
-      _upstream_mr.deallocate_async(ptr, bytes, alignment, stream);
+      upstream_mr_.deallocate_async(ptr, bytes, alignment, stream);
     }
   }
 
@@ -104,13 +99,13 @@ class fixed_pinned_pool_memory_resource {
                   std::size_t bytes,
                   std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
-    deallocate_async(ptr, bytes, alignment, _stream);
-    _stream.wait();
+    deallocate_async(ptr, bytes, alignment, stream_);
+    stream_.wait();
   }
 
   bool operator==(fixed_pinned_pool_memory_resource const& other) const
   {
-    return pool_ == other.pool_ and _stream == other._stream;
+    return pool_ == other.pool_ and stream_ == other.stream_;
   }
 
   bool operator!=(fixed_pinned_pool_memory_resource const& other) const

From 930efef8fcec62a4ac87a1f8faebab9783ccabd4 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 16:58:24 -0700
Subject: [PATCH 26/31] Revert "style"

This reverts commit 075deca7c87b70b62f30a5b8a266da39a3e852cb.
---
 cpp/include/cudf/utilities/span.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 6deef974c0e..873d3e56acb 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 046694978dbe65ea515ad46b079ccbdcd9bc1206 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 16:59:50 -0700
Subject: [PATCH 27/31] Revert "host_uvector"

This reverts commit 24c15498b9ad53ec452a99b94fb767b90f4551a0.
---
 cpp/benchmarks/io/cuio_common.cpp             |   9 +-
 cpp/benchmarks/io/cuio_common.hpp             |   4 +-
 .../cudf/detail/utilities/host_uvector.hpp    | 142 --------------
 .../cudf/detail/utilities/host_vector.hpp     | 183 ++++++++++++++++++
 .../detail/utilities/vector_factories.hpp     |  12 +-
 cpp/include/cudf/utilities/span.hpp           |   6 -
 cpp/src/io/text/bgzip_data_chunk_source.cu    |  10 +-
 .../io/text/data_chunk_source_factories.cpp   |   4 +-
 cpp/src/io/utilities/hostdevice_vector.hpp    |   4 +-
 9 files changed, 204 insertions(+), 170 deletions(-)
 delete mode 100644 cpp/include/cudf/detail/utilities/host_uvector.hpp
 create mode 100644 cpp/include/cudf/detail/utilities/host_vector.hpp

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 09d7d8a9db6..45dc812e247 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
 #include <benchmarks/io/cuio_common.hpp>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
 
 #include <unistd.h>
 
@@ -53,7 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)
 
 cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
   : type{type},
-    pinned_buffer(0, pinned_memory_resource(), cudf::get_default_stream()),
+    pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
     d_buffer{0, cudf::get_default_stream()},
     file_name{random_file_in_dir(tmpdir.path())},
     void_sink{cudf::io::data_sink::create()}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 020fd7e00c1..64d6021cf50 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/host_uvector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::host_uvector<char> pinned_buffer;
+  cudf::detail::host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;
diff --git a/cpp/include/cudf/detail/utilities/host_uvector.hpp b/cpp/include/cudf/detail/utilities/host_uvector.hpp
deleted file mode 100644
index 39bde04e985..00000000000
--- a/cpp/include/cudf/detail/utilities/host_uvector.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- *  Copyright 2024 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <algorithm>
-#include <cstddef>
-
-namespace cudf::detail {
-
-template <typename T>
-class host_uvector {
- public:
-  host_uvector(std::size_t size, rmm::host_async_resource_ref mr, rmm::cuda_stream_view stream)
-    : _size{size}, _capacity{size}, _mr{mr}, _stream{stream}
-  {
-    if (_size != 0) { _data = static_cast<T*>(mr.allocate_async(_size * sizeof(T), _stream)); }
-  }
-
-  host_uvector(host_uvector const&) = delete;
-  host_uvector(host_uvector&& other)
-    : _data{other._data},
-      _size{other._size},
-      _capacity{other._capacity},
-      _mr{other._mr},
-      _stream{other._stream}
-  {
-    other._data     = nullptr;
-    other._size     = 0;
-    other._capacity = 0;
-  }
-
-  host_uvector& operator=(host_uvector const&) = delete;
-  host_uvector& operator=(host_uvector&& other)
-  {
-    if (this != &other) {
-      if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
-      _data           = other._data;
-      _size           = other._size;
-      _capacity       = other._capacity;
-      _mr             = other._mr;
-      _stream         = other._stream;
-      other._data     = nullptr;
-      other._size     = 0;
-      other._capacity = 0;
-    }
-    return *this;
-  }
-
-  ~host_uvector()
-  {
-    if (_data != nullptr) { _mr.deallocate_async(_data, _size * sizeof(T), _stream); }
-  }
-
-  void resize(std::size_t new_size)
-  {
-    if (new_size > _capacity) {
-      auto new_data = static_cast<T*>(_mr.allocate_async(new_size * sizeof(T), _stream));
-      _stream.synchronize();
-      if (_data != nullptr) {
-        std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
-      }
-      _data     = new_data;
-      _capacity = new_size;
-    }
-    _size = new_size;
-  }
-
-  void reserve(std::size_t new_capacity)
-  {
-    if (new_capacity > _capacity) {
-      auto new_data = static_cast<T*>(_mr.allocate_async(new_capacity * sizeof(T), _stream));
-      _stream.synchronize();
-      if (_data != nullptr) {
-        std::copy(_data, _data + _size, new_data);
-        _mr.deallocate_async(_data, _size * sizeof(T), _stream);
-      }
-      _data     = new_data;
-      _capacity = new_capacity;
-    }
-  }
-
-  void push_back(T const& value)
-  {
-    if (_size == _capacity) { reserve(_capacity == 0 ? 2 : _capacity * 2); }
-    _data[_size++] = value;
-  }
-
-  void clear() { _size = 0; }
-
-  [[nodiscard]] std::size_t size() const { return _size; }
-  [[nodiscard]] std::int64_t ssize() const { return _size; }
-  [[nodiscard]] bool is_empty() const { return _size == 0; }
-  [[nodiscard]] std::size_t capacity() const { return _capacity; }
-
-  [[nodiscard]] T& operator[](std::size_t idx) { return _data[idx]; }
-  [[nodiscard]] T const& operator[](std::size_t idx) const { return _data[idx]; }
-
-  [[nodiscard]] T* data() { return _data; }
-  [[nodiscard]] T const* data() const { return _data; }
-
-  [[nodiscard]] T& front() { return _data[0]; }
-  [[nodiscard]] T const& front() const { return _data[0]; }
-
-  [[nodiscard]] T& back() { return _data[_size - 1]; }
-  [[nodiscard]] T const& back() const { return _data[_size - 1]; }
-
-  [[nodiscard]] T* begin() { return _data; }
-  [[nodiscard]] T const* begin() const { return _data; }
-
-  [[nodiscard]] T* end() { return _data + _size; }
-  [[nodiscard]] T const* end() const { return _data + _size; }
-
-  [[nodiscard]] rmm::host_async_resource_ref memory_resource() const { return _mr; }
-  [[nodiscard]] rmm::cuda_stream_view stream() const { return _stream; }
-
- private:
-  T* _data{nullptr};
-  std::size_t _size;
-  std::size_t _capacity;
-  rmm::host_async_resource_ref _mr;
-  rmm::cuda_stream_view _stream;
-};
-
-}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
new file mode 100644
index 00000000000..e62c8017f8b
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -0,0 +1,183 @@
+/*
+ *  Copyright 2024 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/aligned.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <thrust/host_vector.h>
+
+#include <cstddef>
+#include <limits>
+#include <new>  // for bad_alloc
+
+namespace cudf::detail {
+
+/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
+ *  that employs \c a `rmm::host_async_resource_ref` for allocation.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <typename T>
+class rmm_host_allocator;
+
+/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
+ *  that employs \c an `cudf::host_async_resource_ref` for allocation.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <>
+class rmm_host_allocator<void> {
+ public:
+  using value_type      = void;            ///< The type of the elements in the allocator
+  using pointer         = void*;           ///< The type returned by address() / allocate()
+  using const_pointer   = void const*;     ///< The type returned by address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  /**
+   * @brief converts a `rmm_host_allocator<void>` to `rmm_host_allocator<U>`
+   */
+  template <typename U>
+  struct rebind {
+    using other = rmm_host_allocator<U>;  ///< The rebound type
+  };
+};
+
+/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
+ *  that employs \c `rmm::host_async_resource_ref` for allocation.
+ *
+ * The \p rmm_host_allocator provides an interface for host memory allocation through the user
+ * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
+ * this reference and therefore it is the user's responsibility to ensure its lifetime for the
+ * duration of the lifetime of the \p rmm_host_allocator.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <typename T>
+class rmm_host_allocator {
+ public:
+  using value_type      = T;               ///< The type of the elements in the allocator
+  using pointer         = T*;              ///< The type returned by address() / allocate()
+  using const_pointer   = T const*;        ///< The type returned by address()
+  using reference       = T&;              ///< The parameter type for address()
+  using const_reference = T const&;        ///< The parameter type for address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  typedef cuda::std::true_type propagate_on_container_move_assignment;
+
+  /**
+   * @brief converts a `rmm_host_allocator<T>` to `rmm_host_allocator<U>`
+   */
+  template <typename U>
+  struct rebind {
+    using other = rmm_host_allocator<U>;  ///< The rebound type
+  };
+
+  /**
+   * @brief Cannot declare an empty host allocator.
+   */
+  rmm_host_allocator() = delete;
+
+  /**
+   * @brief Construct from a `cudf::host_async_resource_ref`
+   */
+  rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream)
+    : mr(_mr), stream(_stream)
+  {
+  }
+
+  /**
+   * @brief This method allocates storage for objects in host memory.
+   *
+   *  @param cnt The number of objects to allocate.
+   *  @return a \c pointer to the newly allocated objects.
+   *  @note This method does not invoke \p value_type's constructor.
+   *        It is the responsibility of the caller to initialize the
+   *        objects at the returned \c pointer.
+   */
+  inline pointer allocate(size_type cnt)
+  {
+    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
+    return static_cast<pointer>(
+      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
+  }
+
+  /**
+   * @brief This method deallocates host memory previously allocated
+   *  with this \c rmm_host_allocator.
+   *
+   *  @param p A \c pointer to the previously allocated memory.
+   *  @note The second parameter is the number of objects previously allocated.
+   *  @note This method does not invoke \p value_type's destructor.
+   *        It is the responsibility of the caller to destroy
+   *        the objects stored at \p p.
+   */
+  inline void deallocate(pointer p, size_type cnt)
+  {
+    mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+  }
+
+  /**
+   * @brief This method returns the maximum size of the \c cnt parameter
+   *  accepted by the \p allocate() method.
+   *
+   *  @return The maximum number of objects that may be allocated
+   *          by a single call to \p allocate().
+   */
+  constexpr inline size_type max_size() const
+  {
+    return (std::numeric_limits<size_type>::max)() / sizeof(T);
+  }
+
+  /**
+   * @brief This method tests this \p rmm_host_allocator for equality to
+   *  another.
+   *
+   *  @param x The other \p rmm_host_allocator of interest.
+   *  @return This method always returns \c true.
+   */
+  inline bool operator==(rmm_host_allocator const& x) const
+  {
+    return x.mr == mr && x.stream == stream;
+  }
+
+  /**
+   * @brief This method tests this \p rmm_host_allocator for inequality
+   *  to another.
+   *
+   *  @param x The other \p rmm_host_allocator of interest.
+   *  @return This method always returns \c false.
+   */
+  inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }
+
+ private:
+  rmm::host_async_resource_ref mr;
+  rmm::cuda_stream_view stream;
+};
+
+/**
+ * @brief A vector class with rmm host memory allocator
+ */
+template <typename T>
+using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index f67b671c610..06dfcbfc5e5 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -21,7 +21,7 @@
  * @file vector_factories.hpp
  */
 
-#include <cudf/detail/utilities/host_uvector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/pinned_memory.hpp>
@@ -442,7 +442,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 }
 
 /**
- * @brief Asynchronously construct a pinned `cudf::detail::host_uvector` of the given size
+ * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size
  *
  * @note This function does not synchronize `stream`.
  *
@@ -452,13 +452,13 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
  * @return A host_vector of the given size
  */
 template <typename T>
-host_uvector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
+host_vector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream)
 {
-  return host_uvector<T>(size, cudf::get_pinned_memory_resource(), stream);
+  return host_vector<T>(size, {cudf::get_pinned_memory_resource(), stream});
 }
 
 /**
- * @brief Synchronously construct a pinned `cudf::detail::host_uvector` of the given size
+ * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size
  *
  * @note This function synchronizes `stream`.
  *
@@ -468,7 +468,7 @@ host_uvector<T> make_pinned_vector_async(size_t size, rmm::cuda_stream_view stre
  * @return A host_vector of the given size
  */
 template <typename T>
-host_uvector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
+host_vector<T> make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream)
 {
   auto result = make_pinned_vector_async<T>(size, stream);
   stream.synchronize();
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 873d3e56acb..47e92d61a9f 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <cudf/detail/utilities/host_uvector.hpp>
-
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/device_vector.hpp>
@@ -214,10 +212,6 @@ template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
   thrust::host_vector<T, Alloc>> : std::true_type {};
 
-template <typename T>
-struct is_host_span_supported_container<  //
-  cudf::detail::host_uvector<T>> : std::true_type {};
-
 template <typename T, typename Alloc>
 struct is_host_span_supported_container<  //
   std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {};
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index b7644a6fb9f..0e3ce779089 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -19,7 +19,7 @@
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/host_uvector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
@@ -67,7 +67,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  static void copy_to_device(cudf::detail::host_uvector<T> const& host,
+  static void copy_to_device(cudf::detail::host_vector<T> const& host,
                              rmm::device_uvector<T>& device,
                              rmm::cuda_stream_view stream)
   {
@@ -85,9 +85,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       1 << 16;  // 64k offset allocation, resized on demand
 
     cudaEvent_t event;
-    cudf::detail::host_uvector<char> h_compressed_blocks;
-    cudf::detail::host_uvector<std::size_t> h_compressed_offsets;
-    cudf::detail::host_uvector<std::size_t> h_decompressed_offsets;
+    cudf::detail::host_vector<char> h_compressed_blocks;
+    cudf::detail::host_vector<std::size_t> h_compressed_offsets;
+    cudf::detail::host_vector<std::size_t> h_decompressed_offsets;
     rmm::device_uvector<char> d_compressed_blocks;
     rmm::device_uvector<char> d_decompressed_blocks;
     rmm::device_uvector<std::size_t> d_compressed_offsets;
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 39e955232e3..596ca3458c8 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -18,7 +18,7 @@
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/host_uvector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
@@ -34,7 +34,7 @@ namespace {
 
 struct host_ticket {
   cudaEvent_t event{};  // tracks the completion of the last device-to-host copy.
-  cudf::detail::host_uvector<char> buffer;
+  cudf::detail::host_vector<char> buffer;
 
   host_ticket() : buffer{cudf::detail::make_pinned_vector_sync<char>(0, cudf::get_default_stream())}
   {
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index ae2ab03ded3..1ae27a2f4ae 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -18,7 +18,7 @@
 
 #include "hostdevice_span.hpp"
 
-#include <cudf/detail/utilities/host_uvector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -172,7 +172,7 @@ class hostdevice_vector {
   }
 
  private:
-  cudf::detail::host_uvector<T> h_data;
+  cudf::detail::host_vector<T> h_data;
   rmm::device_uvector<T> d_data;
 };
 

From f31221901aab0712b7e4e416c7454d4ef03a7019 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 17:38:31 -0700
Subject: [PATCH 28/31] make do without host_uvector

---
 cpp/include/cudf/detail/utilities/host_vector.hpp      | 8 ++++++--
 cpp/include/cudf/detail/utilities/vector_factories.hpp | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
index e62c8017f8b..756fdab177a 100644
--- a/cpp/include/cudf/detail/utilities/host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -117,8 +117,12 @@ class rmm_host_allocator {
   inline pointer allocate(size_type cnt)
   {
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-    return static_cast<pointer>(
-      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
+    auto const result =
+      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+    // synchronize to ensure the memory is allocated before thrust::host_vector initialization
+    // TODO: replace thrust::host_vector with a type that does not require synchronization
+    stream.synchronize();
+    return static_cast<pointer>(result);
   }
 
   /**
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 06dfcbfc5e5..20cb55bb1c7 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -444,7 +444,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_sync(
 /**
  * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size
  *
- * @note This function does not synchronize `stream`.
+ * @note This function may not synchronize `stream`.
  *
  * @tparam T The type of the vector data
  * @param size The number of elements in the created vector

From 7cfee0ab2d3bfc3b261edce2340555a69840ebcc Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 17:41:16 -0700
Subject: [PATCH 29/31] missed change

---
 cpp/include/cudf/detail/utilities/host_vector.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
index 756fdab177a..6a115177ab5 100644
--- a/cpp/include/cudf/detail/utilities/host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -119,7 +119,7 @@ class rmm_host_allocator {
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
     auto const result =
       mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
-    // synchronize to ensure the memory is allocated before thrust::host_vector initialization
+    // Synchronize to ensure the memory is allocated before thrust::host_vector initialization
     // TODO: replace thrust::host_vector with a type that does not require synchronization
     stream.synchronize();
     return static_cast<pointer>(result);

From fe4d668fb4e6dddf0a019e5443acf2ecc34ff0e8 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Jun 2024 17:54:30 -0700
Subject: [PATCH 30/31] style

---
 cpp/benchmarks/io/cuio_common.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 45dc812e247..645994f3f0d 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
-
 #include <benchmarks/io/cuio_common.hpp>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
 
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
 #include <unistd.h>
 
 #include <cstdio>

From 2d63f5ab3826b3db7e83b99cac46d2aa15312eb6 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 11 Jun 2024 11:53:36 -0700
Subject: [PATCH 31/31] static assert message

---
 cpp/src/utilities/pinned_memory.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 9cebf980d00..5d2e3ac332a 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -127,7 +127,7 @@ class fixed_pinned_pool_memory_resource {
 static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
                                       cuda::mr::device_accessible,
                                       cuda::mr::host_accessible>,
-              "");
+              "Pinned pool mr must be accessible from both host and device");
 
 CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
   std::optional<size_t> config_size)