Skip to content

Commit

Permalink
fix: 重启 job-backup 导致正在执行的归档任务未无损终止 #3359
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyu096 committed Dec 26, 2024
1 parent 0029953 commit 15bd20c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,12 @@ public void schedule() {
watch.stop();
int taskConcurrent = archiveProperties.getTasks().getJobInstance().getConcurrent();
if (highestPriorityDbNodeTasksInfo.getRunningTaskCount() >= taskConcurrent) {
// 休眠5分钟,等待并行任务减少
log.info("Running archive task count exceed concurrent limit : {}, wait 300s", taskConcurrent);
// 休眠1分钟,等待并行任务减少
log.info("Running archive task count exceed concurrent limit : {}, wait 60s", taskConcurrent);
// 释放锁
jobInstanceArchiveTaskScheduleLock.unlock();
locked = false;
ThreadUtils.sleep(1000 * 300L);
ThreadUtils.sleep(1000 * 60L);
continue;
}

Expand Down Expand Up @@ -279,8 +279,8 @@ private void stopTasksGraceful() {
}
try {
if (taskCountDownLatch != null) {
// 等待任务结束,最多等待 2min
boolean isAllTaskStopped = taskCountDownLatch.waitingForAllTasksDone(120);
// 等待任务结束,最多等待 10s(等待时间太长进程会被k8s kill掉)
boolean isAllTaskStopped = taskCountDownLatch.waitingForAllTasksDone(10);
if (!isAllTaskStopped) {
for (JobInstanceArchiveTask task : scheduledTasks.values()) {
task.forceStopAtOnce();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ spec:
annotations:
{{ include "annotations.sha256sum.configmap" ( dict "service" "job-execute" "context" . ) | nindent 8 }}
spec:
{{- include "job.podTerminationGracePeriodSeconds" . | nindent 6 }}
{{- include "job.imagePullSecrets" . | nindent 6 }}
hostAliases: {{- include "common.tplvalues.render" (dict "value" .Values.hostAliases "context" $) | nindent 8 }}
{{- if .Values.executeConfig.affinity }}
Expand Down
2 changes: 1 addition & 1 deletion support-files/kubernetes/charts/bk-job/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1807,4 +1807,4 @@ assembleConfig:
readTimeout: 300000

# pod删除时等待优雅关闭的最大时间,单位为秒(超出后强制删除)
podTerminationGracePeriodSeconds: 40
podTerminationGracePeriodSeconds: 60

0 comments on commit 15bd20c

Please sign in to comment.