use tp in config.yaml

irexyc · Sep 4, 2024 · a0503ae · a0503ae
1 parent c187077
commit a0503ae
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 22 deletions.
diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py
@@ -240,12 +240,13 @@ def _from_workspace(self, model_path: str,
             _cfg = yaml.safe_load(f)
         cfg = TurbomindModelConfig.from_dict(_cfg)
 
-        # check whether input tp is valid
-        self.gpu_count = engine_config.tp
-        if cfg.tensor_para_size != 1 and \
-                self.gpu_count != cfg.tensor_para_size:
-            logger.info(f'found tp={cfg.tensor_para_size} in config.yaml.')
-            self.gpu_count = cfg.tensor_para_size
+        # always use tp in converted model (config.yaml)
+        if cfg.tensor_para_size != engine_config.tp:
+            logger.warning(
+                'tp in engine_config is different from in config.yaml'
+                f'({config_path}), {engine_config.tp} vs '
+                f'{cfg.tensor_para_size}, using tp={cfg.tensor_para_size}')
+        self.gpu_count = cfg.tensor_para_size
         engine_config.tp = self.gpu_count
 
         self._postprocess_config(cfg, engine_config)

diff --git a/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc b/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc
@@ -284,14 +284,10 @@ template<typename T>
 void loadWeights(
     LlamaDenseWeight<T>& w, std::string prefix, int rank, FtCudaDataType model_file_type, size_t tensor_para_size)
 {
-    // check converted file with tp
-    auto check_exist = [&](size_t max_index) {
-        auto weight_file  = prefix + "." + std::to_string(max_index) + ".weight";
-        auto qweight_file = prefix + "." + std::to_string(max_index) + ".qweight";
-        return std::filesystem::exists(weight_file) || std::filesystem::exists(qweight_file);
-    };
-    if (!check_exist(tensor_para_size - 1) || check_exist(tensor_para_size)) {
-        TM_LOG_ERROR("please make sure the tp parameter is same when you convert the model.");
+    auto weight_file  = prefix + "." + std::to_string(tensor_para_size - 1) + ".weight";
+    auto qweight_file = prefix + "." + std::to_string(tensor_para_size - 1) + ".qweight";
+    if (!std::filesystem::exists(weight_file) && !std::filesystem::exists(qweight_file)) {
+        TM_LOG_ERROR("%s and %s does not exist", weight_file.c_str(), qweight_file.c_str());
         FT_CHECK(false);
     }
 

diff --git a/src/turbomind/models/llama/LlamaWeight.cc b/src/turbomind/models/llama/LlamaWeight.cc
@@ -121,14 +121,6 @@ void loadLinearWeights(T*             weights,
     // the weight could be split along split_dim
     std::vector<std::reference_wrapper<size_t>> dims = {dim0, dim1};
     if (dims[split_dim] % tensor_para_size == 0) {
-        // check converted file with tp
-        auto should_exist     = prefix + "." + std::to_string(tensor_para_size - 1) + ".weight";
-        auto should_not_exist = prefix + "." + std::to_string(tensor_para_size) + ".weight";
-        if (!std::filesystem::exists(should_exist) || std::filesystem::exists(should_not_exist)) {
-            TM_LOG_ERROR("please make sure the tp parameter is same when you convert the model.");
-            FT_CHECK(false);
-        }
-
         dims[split_dim] /= tensor_para_size;
         prefix += "." + std::to_string(rank);
     }