Skip to content

Instantly share code, notes, and snippets.

@chenkovsky
Last active January 31, 2025 12:02
Show Gist options
  • Select an option

  • Save chenkovsky/1c74b7eaea9f8db77476f1a97dd4f696 to your computer and use it in GitHub Desktop.

Select an option

Save chenkovsky/1c74b7eaea9f8db77476f1a97dd4f696 to your computer and use it in GitHub Desktop.

Revisions

  1. chenkovsky revised this gist Jan 22, 2025. 1 changed file with 52 additions and 51 deletions.
    103 changes: 52 additions & 51 deletions Celeborn 本地测试.md
    Original file line number Diff line number Diff line change
    @@ -61,53 +61,63 @@ kind load docker-image alpine:3.18

    ```diff
    diff --git a/charts/celeborn/templates/master/statefulset.yaml b/charts/celeborn/templates/master/statefulset.yaml
    index f3f3e651a..35e4c2a33 100644
    index 7d3fe6e..0f9c38b 100644
    --- a/charts/celeborn/templates/master/statefulset.yaml
    +++ b/charts/celeborn/templates/master/statefulset.yaml
    @@ -119,8 +119,13 @@ spec:
    sizeLimit: {{ $volume.capacity }}
    {{- else if eq "hostPath" $volume.type }}
    hostPath:
    + {{- if hasPrefix "/mnt" $volume.mountPath}}
    path: {{ $volume.hostPath | default $volume.mountPath }}/master
    type: DirectoryOrCreate
    + {{- else }}
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- else }}
    @@ -102,6 +102,10 @@ spec:
    - name: {{ $.Release.Name }}-master-vol-{{ $index }}
    mountPath: {{ .mountPath }}
    {{- end }}
    + {{- range $index, $volume := .Values.volumes.hot_loads }}
    + - name: {{ $.Release.Name }}-master-hot-load-vol-{{ $index }}
    + mountPath: {{ .mountPath }}
    + {{- end }}
    {{- with .Values.resources.master }}
    resources:
    {{- toYaml . | nindent 10 }}
    @@ -127,6 +131,12 @@ spec:
    {{ fail "For now Celeborn Helm only support emptyDir or hostPath volume types" }}
    {{- end }}
    {{- end }}
    + {{- range $index, $volume := .Values.volumes.hot_loads }}
    + - name: {{ $.Release.Name }}-master-hot-load-vol-{{ $index }}
    + hostPath:
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- with .Values.nodeSelector }}
    nodeSelector:
    {{- toYaml . | nindent 8 }}
    diff --git a/charts/celeborn/templates/worker/statefulset.yaml b/charts/celeborn/templates/worker/statefulset.yaml
    index bdcb65783..d4dc7a8d5 100644
    index f8d1023..a3290ef 100644
    --- a/charts/celeborn/templates/worker/statefulset.yaml
    +++ b/charts/celeborn/templates/worker/statefulset.yaml
    @@ -52,7 +52,9 @@ spec:
    - chown
    - {{ .Values.securityContext.runAsUser | default 10006 }}:{{ .Values.securityContext.runAsGroup | default 10006 }}
    {{- range $dir := $dirs }}
    + {{- if hasPrefix "/mnt/" $dir.mountPath }}
    - {{ $dir.mountPath }}
    @@ -105,6 +105,10 @@ spec:
    - name: {{ $.Release.Name }}-worker-vol-{{ $index }}
    mountPath: {{ .mountPath }}
    {{- end }}
    + {{- range $index, $volume := .Values.volumes.hot_loads }}
    + - name: {{ $.Release.Name }}-worker-hot-load-vol-{{ $index }}
    + mountPath: {{ .mountPath }}
    + {{- end }}
    {{- end}}
    volumeMounts:
    {{- range $index, $dir := $dirs }}
    @@ -122,8 +124,13 @@ spec:
    sizeLimit: {{ $volume.capacity }}
    {{- else if eq "hostPath" $volume.type }}
    hostPath:
    + {{- if hasPrefix "/mnt" $volume.mountPath}}
    path: {{ $volume.hostPath | default $volume.mountPath }}/worker
    type: DirectoryOrCreate
    + {{- else }}
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- else }}
    {{- with .Values.resources.worker }}
    resources:
    {{- toYaml . | nindent 10 }}
    @@ -130,6 +134,12 @@ spec:
    {{ fail "Currently, Celeborn chart only supports 'emptyDir' and 'hostPath' volume types" }}
    {{- end }}
    {{- end }}
    + {{- range $index, $volume := .Values.volumes.hot_loads }}
    + - name: {{ $.Release.Name }}-worker-hot-load-vol-{{ $index }}
    + hostPath:
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- with .Values.nodeSelector }}
    nodeSelector:
    {{- toYaml . | nindent 8 }}
    diff --git a/charts/celeborn/values.yaml b/charts/celeborn/values.yaml
    index 8847707be..0d80b4057 100644
    index 37c6962..9459859 100644
    --- a/charts/celeborn/values.yaml
    +++ b/charts/celeborn/values.yaml
    @@ -28,11 +28,11 @@ fullnameOverride: ""
    @@ -124,33 +134,24 @@ index 8847707be..0d80b4057 100644
    + pullPolicy: IfNotPresent
    # -- Image name for init containter. (your-private-repo/alpine:3.18)
    initContainerImage: alpine:3.18

    @@ -78,6 +78,12 @@ volumes:
    @@ -78,6 +78,16 @@ volumes:
    hostPath: /mnt/celeborn_ratis
    type: hostPath
    capacity: 100Gi
    + hot_loads:
    + - mountPath: /opt/celeborn/master-jars
    + hostPath: /opt/celeborn/master-jars
    + type: hostPath
    + - mountPath: /opt/celeborn/jars
    + hostPath: /opt/celeborn/jars
    + type: hostPath
    # -- Specifies volumes for Celeborn worker pods
    worker:
    - mountPath: /mnt/disk1
    @@ -100,6 +106,12 @@ volumes:
    type: hostPath
    diskType: SSD
    capacity: 100Gi
    + - mountPath: /opt/celeborn/worker-jars
    + hostPath: /opt/celeborn/worker-jars
    + type: hostPath
    + - mountPath: /opt/celeborn/jars
    + hostPath: /opt/celeborn/jars
    + type: hostPath

    # -- Celeborn configurations
    celeborn:
    # -- Specifies volumes for Celeborn worker pods
    worker:
    - mountPath: /mnt/disk1
    ```

    ## 在 kind 集群中安装修改过后的 Celeborn
  2. chenkovsky revised this gist Jan 22, 2025. 1 changed file with 0 additions and 9 deletions.
    9 changes: 0 additions & 9 deletions Celeborn 本地测试.md
    Original file line number Diff line number Diff line change
    @@ -92,15 +92,6 @@ index bdcb65783..d4dc7a8d5 100644
    {{- end}}
    volumeMounts:
    {{- range $index, $dir := $dirs }}
    @@ -80,7 +82,7 @@ spec:
    {{- $namespace := .Release.Namespace }}
    - >
    until {{ range until (.Values.masterReplicas | int) }}
    - nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local &&
    + nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local &&
    {{- end }}
    true; do
    echo "waiting for master";
    @@ -122,8 +124,13 @@ spec:
    sizeLimit: {{ $volume.capacity }}
    {{- else if eq "hostPath" $volume.type }}
  3. chenkovsky revised this gist Jan 21, 2025. No changes.
  4. chenkovsky created this gist Jan 21, 2025.
    186 changes: 186 additions & 0 deletions Celeborn 本地测试.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,186 @@
    # Celeborn 本地调试

    在开发 celeborn 的时候,往往需要打包部署验证。这样的流程比较长。而如果只是使用 celeborn 内部自带的单测,又很难模拟真实环境。
    所以在此分享一下本机调试分布式系统的经验。

    ## Docker 环境准备

    笔者使用 Macbook, 所以安装了 [Colima](https://github.com/abiosoft/colima). 其他系统只要安装了 docker 环境即可。

    首先启动 docker 环境。celeborn 对于内存还是有一定要求的,所以设置 cpu 和 memory。

    ```bash
    colima start --cpu 6 --memory 12
    ```

    ## 部署 Kind 集群

    首先安装[Kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation)

    在本地新建一个 kind 集群的配置文件。将下面的内容复制进去。将 `workspace` 替换成 celeborn 代码所在的目录。

    ```yaml
    apiVersion: kind.x-k8s.io/v1alpha4
    kind: Cluster
    nodes:
    - role: control-plane
    - role: worker
    extraMounts:
    - hostPath: {workspace}/celeborn/dist
    containerPath: /opt/celeborn
    - role: worker
    extraMounts:
    - hostPath: {workspace}/celeborn/dist
    containerPath: /opt/celeborn
    - role: worker
    extraMounts:
    - hostPath: {workspace}/celeborn/dist
    containerPath: /opt/celeborn
    - role: worker
    extraMounts:
    - hostPath: {workspace}/celeborn/dist
    containerPath: /opt/celeborn
    ```
    用上面的配置创建 kind 集群
    ```bash
    kind create cluster --config config.yaml
    ```

    第一次使用,我们可能会需要打包镜像,并将 celeborn 和 alpine 的镜像上传,

    ```bash
    kind load docker-image celeborn
    kind load docker-image alpine:3.18
    ```

    ## 修改 Celeborn Helm chart 的定义

    修改定义的原因是,我希望将 celeborn 编译结果所在的 dist 目录挂载到 container 中。一旦 maven 编译完成,重启 pod 就能实时更新,无需再打包镜像,重新部署,从而加快开发流程。

    ```diff
    diff --git a/charts/celeborn/templates/master/statefulset.yaml b/charts/celeborn/templates/master/statefulset.yaml
    index f3f3e651a..35e4c2a33 100644
    --- a/charts/celeborn/templates/master/statefulset.yaml
    +++ b/charts/celeborn/templates/master/statefulset.yaml
    @@ -119,8 +119,13 @@ spec:
    sizeLimit: {{ $volume.capacity }}
    {{- else if eq "hostPath" $volume.type }}
    hostPath:
    + {{- if hasPrefix "/mnt" $volume.mountPath}}
    path: {{ $volume.hostPath | default $volume.mountPath }}/master
    type: DirectoryOrCreate
    + {{- else }}
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- else }}
    {{ fail "For now Celeborn Helm only support emptyDir or hostPath volume types" }}
    {{- end }}
    diff --git a/charts/celeborn/templates/worker/statefulset.yaml b/charts/celeborn/templates/worker/statefulset.yaml
    index bdcb65783..d4dc7a8d5 100644
    --- a/charts/celeborn/templates/worker/statefulset.yaml
    +++ b/charts/celeborn/templates/worker/statefulset.yaml
    @@ -52,7 +52,9 @@ spec:
    - chown
    - {{ .Values.securityContext.runAsUser | default 10006 }}:{{ .Values.securityContext.runAsGroup | default 10006 }}
    {{- range $dir := $dirs }}
    + {{- if hasPrefix "/mnt/" $dir.mountPath }}
    - {{ $dir.mountPath }}
    + {{- end }}
    {{- end}}
    volumeMounts:
    {{- range $index, $dir := $dirs }}
    @@ -80,7 +82,7 @@ spec:
    {{- $namespace := .Release.Namespace }}
    - >
    until {{ range until (.Values.masterReplicas | int) }}
    - nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local &&
    + nslookup {{ include "celeborn.masterStatefulSetName" $ }}-{{ . }}.{{ include "celeborn.masterServiceName" $ }}.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local &&
    {{- end }}
    true; do
    echo "waiting for master";
    @@ -122,8 +124,13 @@ spec:
    sizeLimit: {{ $volume.capacity }}
    {{- else if eq "hostPath" $volume.type }}
    hostPath:
    + {{- if hasPrefix "/mnt" $volume.mountPath}}
    path: {{ $volume.hostPath | default $volume.mountPath }}/worker
    type: DirectoryOrCreate
    + {{- else }}
    + path: {{ $volume.hostPath | default $volume.mountPath }}
    + type: DirectoryOrCreate
    + {{- end }}
    {{- else }}
    {{ fail "Currently, Celeborn chart only supports 'emptyDir' and 'hostPath' volume types" }}
    {{- end }}
    diff --git a/charts/celeborn/values.yaml b/charts/celeborn/values.yaml
    index 8847707be..0d80b4057 100644
    --- a/charts/celeborn/values.yaml
    +++ b/charts/celeborn/values.yaml
    @@ -28,11 +28,11 @@ fullnameOverride: ""
    # Specifies the Celeborn image to use
    image:
    # -- Image repository
    - repository: aliyunemr/remote-shuffle-service
    + repository: celeborn
    # -- Image tag
    - tag: 0.1.1-6badd20
    + tag: latest
    # -- Image pull policy
    - pullPolicy: Always
    + pullPolicy: IfNotPresent
    # -- Image name for init containter. (your-private-repo/alpine:3.18)
    initContainerImage: alpine:3.18

    @@ -78,6 +78,12 @@ volumes:
    hostPath: /mnt/celeborn_ratis
    type: hostPath
    capacity: 100Gi
    + - mountPath: /opt/celeborn/master-jars
    + hostPath: /opt/celeborn/master-jars
    + type: hostPath
    + - mountPath: /opt/celeborn/jars
    + hostPath: /opt/celeborn/jars
    + type: hostPath
    # -- Specifies volumes for Celeborn worker pods
    worker:
    - mountPath: /mnt/disk1
    @@ -100,6 +106,12 @@ volumes:
    type: hostPath
    diskType: SSD
    capacity: 100Gi
    + - mountPath: /opt/celeborn/worker-jars
    + hostPath: /opt/celeborn/worker-jars
    + type: hostPath
    + - mountPath: /opt/celeborn/jars
    + hostPath: /opt/celeborn/jars
    + type: hostPath

    # -- Celeborn configurations
    celeborn:
    ```

    ## 在 kind 集群中安装修改过后的 Celeborn


    ```bash
    helm upgrade --install celeborn . --namespace celeborn -f values.yaml --create-namespace
    ```

    ## 调试

    进入 pod 查看运行状况。

    ```bash
    kubectl exec -it celeborn-master-0 -n celeborn -- bash
    ```

    代码更新。

    编译完,只需要删除 pod, 重启的pod就是使用新的代码了。

    ```bash
    kubectl delete po celeborn-worker-0 -n celeborn
    ```