Skip to content

Commit

Permalink
use tp in config.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
irexyc committed Sep 4, 2024
1 parent c187077 commit a0503ae
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 22 deletions.
13 changes: 7 additions & 6 deletions lmdeploy/turbomind/turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,13 @@ def _from_workspace(self, model_path: str,
_cfg = yaml.safe_load(f)
cfg = TurbomindModelConfig.from_dict(_cfg)

# check whether input tp is valid
self.gpu_count = engine_config.tp
if cfg.tensor_para_size != 1 and \
self.gpu_count != cfg.tensor_para_size:
logger.info(f'found tp={cfg.tensor_para_size} in config.yaml.')
self.gpu_count = cfg.tensor_para_size
# always use tp in converted model (config.yaml)
if cfg.tensor_para_size != engine_config.tp:
logger.warning(
'tp in engine_config is different from in config.yaml'
f'({config_path}), {engine_config.tp} vs '
f'{cfg.tensor_para_size}, using tp={cfg.tensor_para_size}')
self.gpu_count = cfg.tensor_para_size
engine_config.tp = self.gpu_count

self._postprocess_config(cfg, engine_config)
Expand Down
12 changes: 4 additions & 8 deletions src/turbomind/models/llama/LlamaDecoderLayerWeight.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,10 @@ template<typename T>
void loadWeights(
LlamaDenseWeight<T>& w, std::string prefix, int rank, FtCudaDataType model_file_type, size_t tensor_para_size)
{
// check converted file with tp
auto check_exist = [&](size_t max_index) {
auto weight_file = prefix + "." + std::to_string(max_index) + ".weight";
auto qweight_file = prefix + "." + std::to_string(max_index) + ".qweight";
return std::filesystem::exists(weight_file) || std::filesystem::exists(qweight_file);
};
if (!check_exist(tensor_para_size - 1) || check_exist(tensor_para_size)) {
TM_LOG_ERROR("please make sure the tp parameter is same when you convert the model.");
auto weight_file = prefix + "." + std::to_string(tensor_para_size - 1) + ".weight";
auto qweight_file = prefix + "." + std::to_string(tensor_para_size - 1) + ".qweight";
if (!std::filesystem::exists(weight_file) && !std::filesystem::exists(qweight_file)) {
TM_LOG_ERROR("%s and %s does not exist", weight_file.c_str(), qweight_file.c_str());
FT_CHECK(false);
}

Expand Down
8 changes: 0 additions & 8 deletions src/turbomind/models/llama/LlamaWeight.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,6 @@ void loadLinearWeights(T* weights,
// the weight could be split along split_dim
std::vector<std::reference_wrapper<size_t>> dims = {dim0, dim1};
if (dims[split_dim] % tensor_para_size == 0) {
// check converted file with tp
auto should_exist = prefix + "." + std::to_string(tensor_para_size - 1) + ".weight";
auto should_not_exist = prefix + "." + std::to_string(tensor_para_size) + ".weight";
if (!std::filesystem::exists(should_exist) || std::filesystem::exists(should_not_exist)) {
TM_LOG_ERROR("please make sure the tp parameter is same when you convert the model.");
FT_CHECK(false);
}

dims[split_dim] /= tensor_para_size;
prefix += "." + std::to_string(rank);
}
Expand Down

0 comments on commit a0503ae

Please sign in to comment.