Last active
January 31, 2025 12:02
-
-
Save chenkovsky/1c74b7eaea9f8db77476f1a97dd4f696 to your computer and use it in GitHub Desktop.
Revisions
-
chenkovsky revised this gist
Jan 22, 2025 . 1 changed file with 52 additions and 51 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -61,53 +61,63 @@ kind load docker-image alpine:3.18 ```diff diff --git a/charts/celeborn/templates/master/statefulset.yaml b/charts/celeborn/templates/master/statefulset.yaml index 7d3fe6e..0f9c38b 100644 --- a/charts/celeborn/templates/master/statefulset.yaml +++ b/charts/celeborn/templates/master/statefulset.yaml @@ -102,6 +102,10 @@ spec: - name: {{ $.Release.Name }}-master-vol-{{ $index }} mountPath: {{ .mountPath }} {{- end }} + {{- range $index, $volume := .Values.volumes.hot_loads }} + - name: {{ $.Release.Name }}-master-hot-load-vol-{{ $index }} + mountPath: {{ .mountPath }} + {{- end }} {{- with .Values.resources.master }} resources: {{- toYaml . | nindent 10 }} @@ -127,6 +131,12 @@ spec: {{ fail "For now Celeborn Helm only support emptyDir or hostPath volume types" }} {{- end }} {{- end }} + {{- range $index, $volume := .Values.volumes.hot_loads }} + - name: {{ $.Release.Name }}-master-hot-load-vol-{{ $index }} + hostPath: + path: {{ $volume.hostPath | default $volume.mountPath }} + type: DirectoryOrCreate + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/charts/celeborn/templates/worker/statefulset.yaml b/charts/celeborn/templates/worker/statefulset.yaml index f8d1023..a3290ef 100644 --- a/charts/celeborn/templates/worker/statefulset.yaml +++ b/charts/celeborn/templates/worker/statefulset.yaml @@ -105,6 +105,10 @@ spec: - name: {{ $.Release.Name }}-worker-vol-{{ $index }} mountPath: {{ .mountPath }} {{- end }} + {{- range $index, $volume := .Values.volumes.hot_loads }} + - name: {{ $.Release.Name }}-worker-hot-load-vol-{{ $index }} + mountPath: {{ .mountPath }} + {{- end }} {{- with .Values.resources.worker }} resources: {{- toYaml . | nindent 10 }} @@ -130,6 +134,12 @@ spec: {{ fail "Currently, Celeborn chart only supports 'emptyDir' and 'hostPath' volume types" }} {{- end }} {{- end }} + {{- range $index, $volume := .Values.volumes.hot_loads }} + - name: {{ $.Release.Name }}-worker-hot-load-vol-{{ $index }} + hostPath: + path: {{ $volume.hostPath | default $volume.mountPath }} + type: DirectoryOrCreate + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/charts/celeborn/values.yaml b/charts/celeborn/values.yaml index 37c6962..9459859 100644 --- a/charts/celeborn/values.yaml +++ b/charts/celeborn/values.yaml @@ -28,11 +28,11 @@ fullnameOverride: "" @@ -124,33 +134,24 @@ index 8847707be..0d80b4057 100644 + pullPolicy: IfNotPresent # -- Image name for init containter. (your-private-repo/alpine:3.18) initContainerImage: alpine:3.18 @@ -78,6 +78,16 @@ volumes: hostPath: /mnt/celeborn_ratis type: hostPath capacity: 100Gi + hot_loads: + - mountPath: /opt/celeborn/master-jars + hostPath: /opt/celeborn/master-jars + type: hostPath + - mountPath: /opt/celeborn/worker-jars + hostPath: /opt/celeborn/worker-jars + type: hostPath + - mountPath: /opt/celeborn/jars + hostPath: /opt/celeborn/jars + type: hostPath # -- Specifies volumes for Celeborn worker pods worker: - mountPath: /mnt/disk1 ``` ## 在 kind 集群中安装修改过后的 Celeborn -
chenkovsky revised this gist
Jan 22, 2025 . 1 changed file with 0 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -92,15 +92,6 @@ index bdcb65783..d4dc7a8d5 100644 {{- end}} volumeMounts: {{- range $index, $dir := $dirs }} @@ -122,8 +124,13 @@ spec: sizeLimit: {{ $volume.capacity }} {{- else if eq "hostPath" $volume.type }} -
chenkovsky revised this gist
Jan 21, 2025 . No changes.There are no files selected for viewing
-
chenkovsky created this gist
Jan 21, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,186 @@ # Celeborn 本地调试 在开发 celeborn 的时候,往往需要打包部署验证。这样的流程比较长。而如果只是使用 celeborn 内部自带的单测,又很难模拟真实环境。 所以在此分享一下本机调试分布式系统的经验。 ## Docker 环境准备 笔者使用 Macbook, 所以安装了 [Colima](https://github.com/abiosoft/colima). 其他系统只要安装了 docker 环境即可。 首先启动 docker 环境。celeborn 对于内存还是有一定要求的,所以设置 cpu 和 memory。 ```bash colima start --cpu 6 --memory 12 ``` ## 部署 Kind 集群 首先安装[Kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) 在本地新建一个 kind 集群的配置文件。将下面的内容复制进去。将 `workspace` 替换成 celeborn 代码所在的目录。 ```yaml apiVersion: kind.x-k8s.io/v1alpha4 kind: Cluster nodes: - role: control-plane - role: worker extraMounts: - hostPath: {workspace}/celeborn/dist containerPath: /opt/celeborn - role: worker extraMounts: - hostPath: {workspace}/celeborn/dist containerPath: /opt/celeborn - role: worker extraMounts: - hostPath: {workspace}/celeborn/dist containerPath: /opt/celeborn - role: worker extraMounts: - hostPath: {workspace}/celeborn/dist containerPath: /opt/celeborn ``` 用上面的配置创建 kind 集群 ```bash kind create cluster --config config.yaml ``` 第一次使用,我们可能会需要打包镜像,并将 celeborn 和 alpine 的镜像上传, ```bash kind load docker-image celeborn kind load docker-image alpine:3.18 ``` ## 修改 Celeborn Helm chart 的定义 修改定义的原因是,我希望将 celeborn 编译结果所在的 dist 目录挂载到 container 中。一旦 maven 编译完成,重启 pod 就能实时更新,无需再打包镜像,重新部署,从而加快开发流程。 ```diff diff --git a/charts/celeborn/templates/master/statefulset.yaml b/charts/celeborn/templates/master/statefulset.yaml index f3f3e651a..35e4c2a33 100644 --- a/charts/celeborn/templates/master/statefulset.yaml +++ b/charts/celeborn/templates/master/statefulset.yaml @@ -119,8 +119,13 @@ spec: sizeLimit: {{ $volume.capacity }} {{- else if eq "hostPath" $volume.type }} hostPath: + {{- if hasPrefix "/mnt" $volume.mountPath}} path: {{ $volume.hostPath | default $volume.mountPath }}/master type: DirectoryOrCreate + {{- else }} + path: {{ $volume.hostPath | default $volume.mountPath }} + type: DirectoryOrCreate + {{- end }} {{- else }} {{ fail "For now Celeborn Helm only support emptyDir or hostPath volume types" }} {{- end }} diff --git a/charts/celeborn/templates/worker/statefulset.yaml b/charts/celeborn/templates/worker/statefulset.yaml index bdcb65783..d4dc7a8d5 100644 --- a/charts/celeborn/templates/worker/statefulset.yaml +++ b/charts/celeborn/templates/worker/statefulset.yaml @@ -52,7 +52,9 @@ spec: - chown - {{ .Values.securityContext.runAsUser | default 10006 }}:{{ .Values.securityContext.runAsGroup | default 10006 }} {{- range $dir := $dirs }} + {{- if hasPrefix "/mnt/" $dir.mountPath }} - {{ $dir.mountPath }} + {{- end }} {{- end}} volumeMounts: {{- range $index, $dir := $dirs }} @@ -80,7 +82,7 @@ spec: {{- $namespace := .Release.Namespace }} - > until {{ range until (.Values.masterReplicas | int) }} - nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local && + nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local && {{- end }} true; do echo "waiting for master"; @@ -122,8 +124,13 @@ spec: sizeLimit: {{ $volume.capacity }} {{- else if eq "hostPath" $volume.type }} hostPath: + {{- if hasPrefix "/mnt" $volume.mountPath}} path: {{ $volume.hostPath | default $volume.mountPath }}/worker type: DirectoryOrCreate + {{- else }} + path: {{ $volume.hostPath | default $volume.mountPath }} + type: DirectoryOrCreate + {{- end }} {{- else }} {{ fail "Currently, Celeborn chart only supports 'emptyDir' and 'hostPath' volume types" }} {{- end }} diff --git a/charts/celeborn/values.yaml b/charts/celeborn/values.yaml index 8847707be..0d80b4057 100644 --- a/charts/celeborn/values.yaml +++ b/charts/celeborn/values.yaml @@ -28,11 +28,11 @@ fullnameOverride: "" # Specifies the Celeborn image to use image: # -- Image repository - repository: aliyunemr/remote-shuffle-service + repository: celeborn # -- Image tag - tag: 0.1.1-6badd20 + tag: latest # -- Image pull policy - pullPolicy: Always + pullPolicy: IfNotPresent # -- Image name for init containter. (your-private-repo/alpine:3.18) initContainerImage: alpine:3.18 @@ -78,6 +78,12 @@ volumes: hostPath: /mnt/celeborn_ratis type: hostPath capacity: 100Gi + - mountPath: /opt/celeborn/master-jars + hostPath: /opt/celeborn/master-jars + type: hostPath + - mountPath: /opt/celeborn/jars + hostPath: /opt/celeborn/jars + type: hostPath # -- Specifies volumes for Celeborn worker pods worker: - mountPath: /mnt/disk1 @@ -100,6 +106,12 @@ volumes: type: hostPath diskType: SSD capacity: 100Gi + - mountPath: /opt/celeborn/worker-jars + hostPath: /opt/celeborn/worker-jars + type: hostPath + - mountPath: /opt/celeborn/jars + hostPath: /opt/celeborn/jars + type: hostPath # -- Celeborn configurations celeborn: ``` ## 在 kind 集群中安装修改过后的 Celeborn ```bash helm upgrade --install celeborn . --namespace celeborn -f values.yaml --create-namespace ``` ## 调试 进入 pod 查看运行状况。 ```bash kubectl exec -it celeborn-master-0 -n celeborn -- bash ``` 代码更新。 编译完,只需要删除 pod, 重启的pod就是使用新的代码了。 ```bash kubectl delete po celeborn-worker-0 -n celeborn ```