diff --git a/charts/lorax/Chart.yaml b/charts/lorax/Chart.yaml index 4128dd279..5262aef36 100644 --- a/charts/lorax/Chart.yaml +++ b/charts/lorax/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: lorax -description: LoRAX is the open-source framework for serving +description: LoRAX is the open-source framework for serving hundreds of fine-tuned LLMs in production for the price of one. -version: 0.3.0 +version: 0.4.0 appVersion: 0.3.0 home: https://github.com/predibase/lorax @@ -11,15 +11,14 @@ annotations: artifacthub.io/category: ai-machine-learning keywords: -- lorax -- llama -- llm -- predibase + - lorax + - llama + - llm + - predibase maintainers: -- email: maintainers@predibase.com - name: Predibase + - email: maintainers@predibase.com + name: Predibase sources: -- https://github.com/predibase/lorax - + - https://github.com/predibase/lorax diff --git a/charts/lorax/templates/deployment.yaml b/charts/lorax/templates/deployment.yaml index 1646e6d56..53c90afbe 100644 --- a/charts/lorax/templates/deployment.yaml +++ b/charts/lorax/templates/deployment.yaml @@ -33,25 +33,16 @@ spec: {{- end }} containers: - args: - - --model-id - - {{ .Values.deployment.args.modelId }} - - --max-input-length - - {{ .Values.deployment.args.maxInputLength | quote }} - - --max-total-tokens - - {{ .Values.deployment.args.maxTotalTokens | quote }} - - --max-batch-total-tokens - - {{ .Values.deployment.args.maxBatchTotalTokens | quote }} - - --max-batch-prefill-tokens - - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }} - - --sharded - - {{ .Values.deployment.args.sharded | quote }} - - --eager-prefill - - {{ .Values.deployment.args.eagerPrefill | quote }} + {{- range .Values.deployment.args }} + - {{ .name }} + {{- if .value }} + - {{ .value | quote }} + {{- end }} + {{- end }} env: - name: PORT value: "8000" - - name: HUGGING_FACE_HUB_TOKEN - value: {{ .Values.deployment.env.huggingFaceHubToken | quote }} +{{- toYaml .Values.deployment.env | nindent 8 }} image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }} imagePullPolicy: IfNotPresent livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }} @@ -68,7 +59,7 @@ spec: - mountPath: /dev/shm name: shm {{- if .Values.deployment.tolerations }} - tolerations: + tolerations: {{- toYaml .Values.deployment.tolerations | nindent 6 }} {{- end }} nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }} diff --git a/charts/lorax/values.yaml b/charts/lorax/values.yaml index c9769484e..0d5da6b36 100644 --- a/charts/lorax/values.yaml +++ b/charts/lorax/values.yaml @@ -7,54 +7,55 @@ deployment: tag: "latest" args: - modelId: "mistralai/Mistral-7B-Instruct-v0.1" - maxInputLength: 512 - maxTotalTokens: 1024 - maxBatchTotalTokens: 4096 - maxBatchPrefillTokens: 2048 - sharded: false - eagerPrefill: false + - name: "--model-id" + value: "mistralai/Mistral-7B-Instruct-v0.1" + - name: "--max-input-length" + value: "512" + - name: "--max-total-tokens" + value: "1024" + - name: "--max-batch-total-tokens" + value: "4096" + - name: "--max-batch-prefill-tokens" + value: "2048" + - name: "--eager-prefill" + value: "false" + - name: "--compile" + value: "" # --complie does not take a second argument env: # Your huggingface hub token. Required for some models such as the llama-2 family. - huggingFaceHubToken: "" - - # Model types that support dynamic adapter loading - loraxEnabledModelTypes: "llama,mistral" + - name: "HUGGING_FACE_HUB_TOKEN" + value: "" resources: limits: - cpu: "8" - ephemeral-storage: 100Gi - memory: 27041Mi nvidia.com/gpu: "1" requests: - cpu: "8" - ephemeral-storage: 100Gi - memory: 27041Mi nvidia.com/gpu: "1" - livenessProbe: - failureThreshold: 240 - httpGet: - path: /health - port: http - scheme: HTTP - initialDelaySeconds: 5 - periodSeconds: 5 - successThreshold: 1 - timeoutSeconds: 1 + livenessProbe: + {} + # failureThreshold: 240 + # httpGet: + # path: /health + # port: http + # scheme: HTTP + # initialDelaySeconds: 5 + # periodSeconds: 5 + # successThreshold: 1 + # timeoutSeconds: 1 - readinessProbe: - failureThreshold: 600 - httpGet: - path: /health - port: http - scheme: HTTP - initialDelaySeconds: 5 - periodSeconds: 5 - successThreshold: 1 - timeoutSeconds: 1 + readinessProbe: + {} + # failureThreshold: 600 + # httpGet: + # path: /health + # port: http + # scheme: HTTP + # initialDelaySeconds: 5 + # periodSeconds: 5 + # successThreshold: 1 + # timeoutSeconds: 1 nodeSelector: {} tolerations: [] @@ -72,4 +73,3 @@ service: serviceType: ClusterIP port: 80 additionalLabels: {} -