remove ConcateSlice class

irexyc · Sep 3, 2024 · f8a1289 · f8a1289
1 parent df2c846
commit f8a1289
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 234 deletions.
diff --git a/src/turbomind/utils/memory_utils.cu b/src/turbomind/utils/memory_utils.cu
@@ -302,8 +302,7 @@ template void cudaRandomUniform(__nv_fp8_e4m3* buffer, const size_t size);
 // loads data from binary file. If it succeeds, returns a non-empty vector. If loading fails or
 // the product of the elements in shape is 0, this function will return an empty vector.
 template<typename T>
-std::vector<T>
-loadWeightFromBinHelper(std::vector<size_t> shape, std::string filename, std::vector<ConcateSlice> slices = {})
+std::vector<T> loadWeightFromBinHelper(std::vector<size_t> shape, std::string filename)
 {
     if (shape.size() > 2) {
         printf("[ERROR] shape should have less than two dims \n");
@@ -315,145 +314,48 @@ loadWeightFromBinHelper(std::vector<size_t> shape, std::string filename, std::ve
         dim1 = shape[1];
     }
 
-    if (slices.size() == 0) {
-        size_t size = dim0 * dim1;
-        if (size == 0) {
-            TM_LOG_WARNING("shape is zero, skip loading weight from file %s \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        std::vector<T> host_array(size);
-        std::ifstream  in(filename, std::ios::in | std::ios::binary);
-        if (!in.is_open()) {
-            TM_LOG_WARNING("file %s cannot be opened, loading model fails! \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        size_t loaded_data_size = sizeof(T) * size;
-        in.seekg(0, in.end);
-        const auto file_size_in_bytes = (size_t)in.tellg();
-        in.seekg(0, in.beg);
-
-        TM_LOG_DEBUG("Read " + std::to_string(loaded_data_size) + " bytes from " + filename);
-        in.read((char*)host_array.data(), loaded_data_size);
-
-        if (file_size_in_bytes != loaded_data_size) {
-            TM_LOG_WARNING("file %s has %ld, but request %ld, loading model fails!",
-                           filename.c_str(),
-                           file_size_in_bytes,
-                           loaded_data_size);
-            return std::vector<T>();
-        }
-        in.close();
-        // If we succeed, return an array with values.
-        return host_array;
+    size_t size = dim0 * dim1;
+    if (size == 0) {
+        TM_LOG_WARNING("shape is zero, skip loading weight from file %s \n", filename.c_str());
+        return std::vector<T>();
     }
-    else {
-        // concate all slices on the same dims
-
-        if (slices.size() != shape.size()) {
-            printf("[ERROR] slices should have same dims as shape \n");
-            return std::vector<T>();
-        }
-
-        // get slices
-        ConcateSlice slice0{{{0, dim0}}};
-        ConcateSlice slice1{{{0, dim1}}};
-        if (slices.size() > 0 && slices[0].slices.size() > 0) {
-            slice0 = slices[0];
-        }
-        if (shape.size() == 2 && slices[1].slices.size() > 0) {
-            slice1 = slices[1];
-        }
-
-        size_t w0 = 0;
-        for (auto& s : slice0.slices) {
-            if (s.second > dim0) {
-                s.second = dim0;
-            }
-            if (s.second < s.first) {
-                printf("[ERROR] slice0: end < start \n");
-                return std::vector<T>();
-            }
-            w0 += s.second - s.first;
-        }
 
-        size_t w1 = 0;
-        for (auto& s : slice1.slices) {
-            if (s.second > dim1) {
-                s.second = dim1;
-            }
-            if (s.second < s.first) {
-                printf("[ERROR] slice1: end < start \n");
-                return std::vector<T>();
-            }
-            w1 += s.second - s.first;
-        }
-
-        size_t size             = w0 * w1;
-        size_t loaded_data_size = size * sizeof(T);
-
-        TM_LOG_DEBUG("Read " + std::to_string(loaded_data_size) + " bytes from " + filename + " with slice.");
-        if (size == 0) {
-            TM_LOG_WARNING("shape is zero, skip loading weight from file %s \n", filename.c_str());
-            return std::vector<T>();
-        }
+    std::vector<T> host_array(size);
+    std::ifstream  in(filename, std::ios::in | std::ios::binary);
+    if (!in.is_open()) {
+        TM_LOG_WARNING("file %s cannot be opened, loading model fails! \n", filename.c_str());
+        return std::vector<T>();
+    }
 
-        std::vector<T> host_array(size);
-        std::ifstream  in(filename, std::ios::in | std::ios::binary);
-        if (!in.is_open()) {
-            TM_LOG_WARNING("file %s cannot be opened, loading model fails! \n", filename.c_str());
-            return std::vector<T>();
-        }
+    size_t loaded_data_size = sizeof(T) * size;
+    in.seekg(0, in.end);
+    const auto file_size_in_bytes = (size_t)in.tellg();
+    in.seekg(0, in.beg);
 
-        char* host_ptr = (char*)host_array.data();
-        if (slice1.slices.size() == 0
-            || (slice1.slices.size() == 1 && slice1.slices[0].second - slice1.slices[0].first == dim1)) {
-            for (auto& s : slice0.slices) {
-                size_t read_size = (s.second - s.first) * dim1 * sizeof(T);
-                size_t pos       = s.first * dim1;
-                in.seekg(pos * sizeof(T));
-                in.read((char*)host_ptr, read_size);
-                host_ptr += read_size;
-            }
-            in.close();
-            return host_array;
-        }
+    TM_LOG_DEBUG("Read " + std::to_string(loaded_data_size) + " bytes from " + filename);
+    in.read((char*)host_array.data(), loaded_data_size);
 
-        {
-            for (auto& s0 : slice0.slices) {
-                // loop over outer slice
-                for (size_t line_id = s0.first; line_id < s0.second; ++line_id) {
-                    // loop over lines
-                    size_t pos0 = line_id * dim1;
-                    for (auto& s1 : slice1.slices) {
-                        // loop over inner slice
-                        size_t pos       = pos0 + s1.first;
-                        size_t read_size = (s1.second - s1.first) * sizeof(T);
-                        in.seekg(pos * sizeof(T));
-                        in.read(host_ptr, read_size);
-                        host_ptr += read_size;
-                    }
-                }
-            }
-            in.close();
-        }
-        return host_array;
+    if (file_size_in_bytes != loaded_data_size) {
+        TM_LOG_WARNING("file %s has %ld, but request %ld, loading model fails!",
+                       filename.c_str(),
+                       file_size_in_bytes,
+                       loaded_data_size);
+        return std::vector<T>();
     }
+    in.close();
+    // If we succeed, return an array with values.
+    return host_array;
 }
 
-std::vector<float> loadArrayFromBin(std::vector<size_t> shape, std::string filename, std::vector<ConcateSlice> slices)
+std::vector<float> loadArrayFromBin(std::vector<size_t> shape, std::string filename)
 {
-    return loadWeightFromBinHelper<float>(shape, filename, slices);
+    return loadWeightFromBinHelper<float>(shape, filename);
 }
 
 template<typename T, typename T_IN>
-int loadWeightFromBinFunc(T*                        ptr,
-                          std::vector<size_t>       shape,
-                          std::string               filename,
-                          std::vector<ConcateSlice> slices = std::vector<ConcateSlice>())
+int loadWeightFromBinFunc(T* ptr, std::vector<size_t> shape, std::string filename)
 {
-    std::vector<T_IN> host_array = loadWeightFromBinHelper<T_IN>(shape, filename, slices);
+    std::vector<T_IN> host_array = loadWeightFromBinHelper<T_IN>(shape, filename);
 
     if (host_array.empty()) {
         return 0;
@@ -472,84 +374,49 @@ int loadWeightFromBinFunc(T*                        ptr,
     return 0;
 }
 
-template int loadWeightFromBinFunc<float, float>(float*                    ptr,
-                                                 std::vector<size_t>       shape,
-                                                 std::string               filename,
-                                                 std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, float>(half*                     ptr,
-                                                std::vector<size_t>       shape,
-                                                std::string               filename,
-                                                std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<float, half>(float*                    ptr,
-                                                std::vector<size_t>       shape,
-                                                std::string               filename,
-                                                std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, half>(half*                     ptr,
-                                               std::vector<size_t>       shape,
-                                               std::string               filename,
-                                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<int8_t, int8_t>(int8_t*                   ptr,
-                                                   std::vector<size_t>       shape,
-                                                   std::string               filename,
-                                                   std::vector<ConcateSlice> slices);
+template int loadWeightFromBinFunc<float, float>(float* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<half, float>(half* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<float, half>(float* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<half, half>(half* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<int8_t, int8_t>(int8_t* ptr, std::vector<size_t> shape, std::string filename);
 #ifdef ENABLE_BF16
-template int loadWeightFromBinFunc<__nv_bfloat16, float>(__nv_bfloat16*            ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<__nv_bfloat16, half>(__nv_bfloat16*            ptr,
-                                                        std::vector<size_t>       shape,
-                                                        std::string               filename,
-                                                        std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<float, __nv_bfloat16>(float*                    ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, __nv_bfloat16>(half*                     ptr,
-                                                        std::vector<size_t>       shape,
-                                                        std::string               filename,
-                                                        std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<__nv_bfloat16, __nv_bfloat16>(__nv_bfloat16*            ptr,
-                                                                 std::vector<size_t>       shape,
-                                                                 std::string               filename,
-                                                                 std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBinFunc<__nv_bfloat16, float>(__nv_bfloat16* ptr, std::vector<size_t> shape, std::string filename);
+template int
+loadWeightFromBinFunc<__nv_bfloat16, half>(__nv_bfloat16* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<float, __nv_bfloat16>(float* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<half, __nv_bfloat16>(half* ptr, std::vector<size_t> shape, std::string filename);
+template int loadWeightFromBinFunc<__nv_bfloat16, __nv_bfloat16>(__nv_bfloat16*      ptr,
+                                                                 std::vector<size_t> shape,
+                                                                 std::string         filename);
 #endif  // ENABLE_BF16
-template int loadWeightFromBinFunc<int, int>(int*                      ptr,
-                                             std::vector<size_t>       shape,
-                                             std::string               filename,
-                                             std::vector<ConcateSlice> slices);
+template int loadWeightFromBinFunc<int, int>(int* ptr, std::vector<size_t> shape, std::string filename);
 #ifdef ENABLE_FP8
-template int loadWeightFromBinFunc<__nv_fp8_e4m3, float>(__nv_fp8_e4m3*            ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBinFunc<__nv_fp8_e4m3, float>(__nv_fp8_e4m3* ptr, std::vector<size_t> shape, std::string filename);
 #endif  // ENABLE_FP8
 
 template<typename T>
-int loadWeightFromBin(T*                        ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type,
-                      std::vector<ConcateSlice> slices)
+int loadWeightFromBin(T* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type)
 {
     switch (model_file_type) {
         case FtCudaDataType::FP32:
-            loadWeightFromBinFunc<T, float>(ptr, shape, filename, slices);
+            loadWeightFromBinFunc<T, float>(ptr, shape, filename);
             break;
         case FtCudaDataType::FP16:
-            loadWeightFromBinFunc<T, half>(ptr, shape, filename, slices);
+            loadWeightFromBinFunc<T, half>(ptr, shape, filename);
             break;
         case FtCudaDataType::INT8:
-            loadWeightFromBinFunc<T, int8_t>(ptr, shape, filename, slices);
+            loadWeightFromBinFunc<T, int8_t>(ptr, shape, filename);
             break;
 #ifdef ENABLE_BF16
         case FtCudaDataType::BF16:
-            loadWeightFromBinFunc<T, __nv_bfloat16>(ptr, shape, filename, slices);
+            loadWeightFromBinFunc<T, __nv_bfloat16>(ptr, shape, filename);
             break;
 #endif
 #ifdef ENABLE_FP8
         case FtCudaDataType::FP8:
-            loadWeightFromBinFunc<T, float>(ptr, shape, filename, slices);
+            loadWeightFromBinFunc<T, float>(ptr, shape, filename);
             break;
 #endif
         default:
@@ -560,50 +427,28 @@ int loadWeightFromBin(T*                        ptr,
 }
 
 template<>
-int loadWeightFromBin(int*                      ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type,
-                      std::vector<ConcateSlice> slices)
+int loadWeightFromBin(int* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type)
 {
-    loadWeightFromBinFunc<int, int>(ptr, shape, filename, slices);
+    loadWeightFromBinFunc<int, int>(ptr, shape, filename);
     return 0;
 }
 
-template int loadWeightFromBin(float*                    ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBin(half*                     ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBin(int8_t*                   ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBin(float* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
+template int
+loadWeightFromBin(half* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
+template int
+loadWeightFromBin(int8_t* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
 #ifdef ENABLE_BF16
-template int loadWeightFromBin(__nv_bfloat16*            ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBin(__nv_bfloat16* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
 #endif
 #ifdef ENABLE_FP8
-template int loadWeightFromBin(__nv_fp8_e4m3*            ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBin(__nv_fp8_e4m3* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
 #endif
-template int loadWeightFromBin(int*                      ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
+template int
+loadWeightFromBin(int* ptr, std::vector<size_t> shape, std::string filename, FtCudaDataType model_file_type);
 
 template<typename T_IN, typename T_OUT>
 __global__ void cudaD2DcpyConvert(T_OUT* dst, const T_IN* src, const size_t size)

diff --git a/src/turbomind/utils/memory_utils.h b/src/turbomind/utils/memory_utils.h
@@ -49,20 +49,13 @@ void cudaAutoCpy(T* tgt, const T* src, const size_t size, cudaStream_t stream =
 template<typename T>
 void cudaRandomUniform(T* buffer, const size_t size);
 
-struct ConcateSlice {
-    std::vector<std::pair<size_t, size_t>> slices;
-};
-
 template<typename T>
-int loadWeightFromBin(T*                        ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type = FtCudaDataType::FP32,
-                      std::vector<ConcateSlice> slices          = std::vector<ConcateSlice>());
+int loadWeightFromBin(T*                  ptr,
+                      std::vector<size_t> shape,
+                      std::string         filename,
+                      FtCudaDataType      model_file_type = FtCudaDataType::FP32);
 
-std::vector<float> loadArrayFromBin(std::vector<size_t>       shape,
-                                    std::string               filename,
-                                    std::vector<ConcateSlice> slices = std::vector<ConcateSlice>());
+std::vector<float> loadArrayFromBin(std::vector<size_t> shape, std::string filename);
 
 // template<typename T>
 // int loadWeightFromBinAndQuantizeForWeightOnly(int8_t*             quantized_weight_ptr,