#cloud-config users: - name: datalab uid: 2000 groups: docker - name: tunnelbe uid: 2001 groups: docker - name: kmp_user uid: 2002 groups: docker write_files: - path: /etc/systemd/system/startup.sh permissions: 0755 owner: root content: | #!/bin/bash -ex truncate -s 190G /var/lib/docker/colab-vmdisk # NB: Corrupted filesystems are not supported. fsck will return a non-zero # exit status even if it fixed issues, and we exit on errors (-e above). # The reason for the fsck is solely to satisfy a resize2fs requirement. e2fsck -pf /var/lib/docker/colab-vmdisk resize2fs /var/lib/docker/colab-vmdisk mkdir -p /sys/fs/cgroup/jupyter-children bytes=$(free -b | awk '/^Mem:/{print $4}') ; adj_bytes=$(expr "${bytes}" - 2000000000 ); echo "${adj_bytes}"> /sys/fs/cgroup/jupyter-children/memory.max SHMSZ=$(expr "${adj_bytes}" / 2000000) echo -e "SHMSZ=${SHMSZ}m\n" >| /var/kernel-docker-disk-limits.env /etc/systemd/system/configure-docker.sh /etc/systemd/system/prep-var-lib-docker-mount.sh sysctl -w vm.dirty_ratio=2 vm.dirty_background_ratio=1 apparmor_parser --remove /etc/apparmor.d/kernel || true apparmor_parser -av /etc/apparmor.d/kernel /sbin/iptables-restore < /etc/iptables.cfg mkdir -p /tmp/var-colab && /bin/hostname > /tmp/var-colab/hostname && date --rfc-3339=ns >> /tmp/colab-docker-starts # Check for the existance of all Colab containers. k_default_exists=$(docker ps -q -a -f name=k_default) kmp_default_exists=$(docker ps -q -a -f name=kmp_default) tunnelbevm_exists=$(docker ps -q -a -f name=tunnelbevm) systemctl daemon-reload if [[ -z "${k_default_exists}" ]]; then systemctl start k_default.service else docker restart k_default fi systemctl start colab-kmsg-dumper.service systemctl start move-jupyter-children-to-memory-cgroup.service if [[ -z "${tunnelbevm_exists}" ]]; then systemctl start tunnelbe.service else docker restart tunnelbevm fi if [[ -z "${kmp_default_exists}" ]]; then systemctl start kmp_default.service else docker restart kmp_default fi systemctl start node-problem-detector.service - path: /etc/systemd/system/move-jupyter-children-to-memory-cgroup.sh permissions: 0755 owner: root content: | #!/bin/bash -eu # Relocate user-run processes into a memory cgroup so that if oom-killer is triggered by one of # them, only they are eligible for killing. This increases the chances that jupyter-notebook # survives the event, maintaining connectivity to the front-end. # NOTE: It would be nicer to poll on a notification mechanism for new processes (e.g. forkstat) # but none of the tools that use the Process Events Connector (https://lwn.net/Articles/157150/) # appear to be present on COS, and the events aren't made available via an FS mount like sysfs # or procfs. The body of the loop below takes ~7ms to run, though, so not worrying about it. CGROUPPROCS=/sys/fs/cgroup/jupyter-children/cgroup.procs OOM_SCORE_ADJ_MAX=1000 while :; do # Suppress unnecessary noise by ignoring already-moved PIDs. # This needs to be done in each iteration to deal with PID reuse. declare -A handledPIDs while read pid ; do handledPIDs[$pid]=1 done < ${CGROUPPROCS} # This outer loop will have at most one iteration but do it this way to avoid special-casing # zero iterations. for ppid in $(pidof -x jupyter-notebook); do for pid in $(ps -o pid= --ppid $ppid); do [[ -v handledPIDs[$pid] ]] && continue echo "$pid" done done | tee ${CGROUPPROCS} # LSP receives special treatment per b/203467697. for pid in $(pgrep -f pyright-langserver.js); do [[ -v handledPIDs[$pid] ]] && continue echo "$pid" | tee ${CGROUPPROCS} echo "$OOM_SCORE_ADJ_MAX" > /proc/${pid}/oom_score_adj || true done unset handledPIDs sleep 1 done - path: /etc/systemd/system/load-nvidia-modules.sh permissions: 0755 owner: root content: | #!/bin/bash -ex bash /var/lib/nvidia/setup_gpu.sh - path: /etc/systemd/system/move-jupyter-children-to-memory-cgroup.service permissions: 0644 owner: root content: | [Unit] Description=Jupyter children cgroup mover [Service] ExecStart=/etc/systemd/system/move-jupyter-children-to-memory-cgroup.sh Restart=always RestartSec=1 - path: /etc/apparmor.d/kernel permissions: 0755 owner: root content: | #include profile kernel flags=(attach_disconnected) { #include capability, dbus, file, mount, network, pivot_root, ptrace, remount, signal, umount, unix, deny mount fstype = cgroup, deny mount options in (bind) /sys/**, deny /sys/fs/cgroup/**/release_agent w, } - path: /etc/systemd/system/configure-docker.sh permissions: 0755 owner: root content: | #!/bin/bash -ex /usr/bin/docker network create -d bridge \ --subnet=172.28.0.0/16 \ --gateway=172.28.0.1 \ --ip-range=172.28.0.0/24 \ -o "com.docker.network.bridge.name"="br0" \ br0 || true - path: /etc/systemd/system/prep-var-lib-docker-mount.sh permissions: 0755 owner: root content: | #!/bin/bash -ex FILE=/var/lib/docker/colab-vmdisk OVERLAY2=/var/lib/docker/overlay2 # Stop docker before unmounting its overlay2 directory to avoid confusing it. systemctl stop docker # Be robust to overlay2 already being remounted by COS startup scripts. umount "$OVERLAY2" || true mount -vo nosuid,nodev "$FILE" "$OVERLAY2" # Update this setting while dockerd is stopped to have it take effect on restart. /bin/sed -i -e '/"storage-driver/i"iptables": false,\n"log-driver": "none",' /etc/docker/daemon.json # Start docker after mounting its overlay2 directory to have it create needed subdirectories (e.g. 'l'). systemctl start docker - path: /etc/iptables.cfg permissions: 0755 owner: root content: | *nat :PREROUTING ACCEPT :INPUT ACCEPT :OUTPUT ACCEPT :POSTROUTING ACCEPT :DOCKER - -A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER -A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER -A POSTROUTING -s 172.28.0.0/16 ! -o br0 -j MASQUERADE -A DOCKER -i br0 -j RETURN COMMIT *mangle :PREROUTING ACCEPT :INPUT ACCEPT :FORWARD ACCEPT :OUTPUT ACCEPT :POSTROUTING ACCEPT COMMIT *filter :INPUT ACCEPT :FORWARD DROP :OUTPUT ACCEPT :DOCKER - -A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT -A INPUT -i lo -j ACCEPT -A INPUT -p icmp -j ACCEPT -A INPUT -p tcp -m tcp --dport 22 -j ACCEPT -A INPUT -d 172.28.0.1 -i br0 -p tcp -m tcp --dport 8008 -j ACCEPT -A FORWARD -s 172.28.0.3 -d 169.254.0.0/16 -i br0 -p tcp -m tcp --dport 80 -j ACCEPT -A FORWARD -s 172.28.0.13 -d 169.254.0.0/16 -i br0 -p tcp -m tcp --dport 80 -j ACCEPT -A FORWARD -o br0 -j DOCKER -A FORWARD -o br0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT -A FORWARD -i br0 ! -o br0 -j ACCEPT -A FORWARD -i br0 -o br0 -j ACCEPT -A OUTPUT -m state --state NEW,RELATED,ESTABLISHED -j ACCEPT -A OUTPUT -o lo -j ACCEPT COMMIT - path: /etc/systemd/system/colab-metadata-monitor.sh permissions: 0700 owner: root content: | #!/bin/bash -eu # Why? b/78509777. # https://cloud.google.com/compute/docs/storing-retrieving-metadata#aggcontents ORIGINAL="$(curl -isS -m 2 -H "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/?recursive=true" | tr -d '\015')" ETAG="$(echo "$ORIGINAL" | sed -n -e 's/^ETag: \(.*\)/\1/p')" echo "$ORIGINAL" # https://cloud.google.com/compute/docs/storing-retrieving-metadata#etags curl -isS -H "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/?recursive=true&wait_for_change=true&last_etag=$ETAG" - path: /etc/systemd/system/colab-kmsg-dumper.sh permissions: 0700 owner: root content: | #!/bin/bash -eu echo >>/tmp/var-colab/ooms grep --line-buffered -e " invoked oom-killer" -e "segfault at" /dev/kmsg | awk -W interactive '{ print systime() "," $0 }' >>/tmp/var-colab/ooms - path: /etc/systemd/system/colab-kmsg-dumper.service permissions: 0644 owner: root content: | [Unit] Description=Kernel Log Dumper [Service] ExecStart=/etc/systemd/system/colab-kmsg-dumper.sh Restart=always RestartSec=1 - path: /etc/systemd/system/tunnelbe.service permissions: 0644 owner: root content: | [Unit] Description=tunnelbe docker container Requires=network-online.target After=network-online.target Requires=docker.service After=docker.service [Service] Environment="HOME=/home/tunnelbe" # This ensures that credentials are in place for docker pull commands. ExecStartPre=/usr/bin/docker-credential-gcr configure-docker ExecStop=/usr/bin/docker stop tunnelbevm # Pass -t below to prevent unfortunate buffering in log-watching through journalctl. ExecStart=/usr/bin/docker -D run --net=host -t -u 0 \ --pid=host \ --privileged \ --device=/dev/loop0 \ --name=tunnelbevm \ -v /tmp/colab-vm:/tmp/colab-vm \ -v /var/lib/docker/colab-vmdisk:/var/lib/docker/colab-vmdisk \ gcr.io/colab-datalab/tunnelbackend_binary:baked \ --backend_url_map='{"default":{"local_target":"http://172.28.0.2:8080","kernel_manager":"http://172.28.0.2:9000","kernel_manager_proxy_debug_vars":"http://172.28.0.3:6000/debug/vars"}}' \ --enable_jwt \ --debug_port=4000 \ --min_pending_requests=2 \ --tunnel_url=https://colab.research.google.com/tun/u \ --request_rate_limit=0 \ --manual_tunneled_request_port=0 # The docker pull 403's sometimes, as if the ExecStartPre command above # didn't run, even though it did, and even though a subsequent interactive # [sudo systemctl start tunnelbe.service] works just fine. Work around # this by restarting on apparent failure. Restart=always RestartSec=1 - path: /etc/systemd/system/kmp_default.service permissions: 0644 owner: root content: | [Unit] Description=kernel_manager_proxy default docker container Requires=network-online.target After=network-online.target Requires=docker.service After=docker.service [Service] Environment="HOME=/home/kmp_user" # This ensures that credentials are in place for docker pull commands. ExecStartPre=/usr/bin/docker-credential-gcr configure-docker ExecStop=/usr/bin/docker stop kmp_default # Pass -t below to prevent unfortunate buffering in log-watching through journalctl. ExecStart=/usr/bin/docker -D run -t -u 0 \ --net br0 \ --ip 172.28.0.3 \ --name=kmp_default \ gcr.io/colab-datalab/kernel_manager_proxy:baked \ --listen_host=172.28.0.3 \ --target_host=172.28.0.2 \ --listen_port=6000 \ --target_port=9000 \ --enable_output_coalescing=true \ --output_coalescing_required=true # Mirror the docker pull 403 work-around applied to the TBE. Restart=always RestartSec=1 - path: /etc/systemd/system/k_default.service permissions: 0644 owner: root content: | [Unit] Description=kernel default docker container Requires=network-online.target After=network-online.target Requires=docker.service After=docker.service [Service] Environment="HOME=/home/datalab" EnvironmentFile=/var/kernel-docker-disk-limits.env # This ensures that credentials are in place for docker pull commands. ExecStartPre=/usr/bin/docker-credential-gcr configure-docker # Add the marker for the marketplace image. ExecStartPre=/bin/bash -c 'mkdir -p /tmp/var-colab && touch /tmp/var-colab/mp' ExecStop=/usr/bin/docker stop k_default ExecStart=/usr/bin/docker run -u 0 \ --net br0 \ --ip 172.28.0.2 \ --name=k_default \ --cap-add SYS_ADMIN \ --cap-add SYS_PTRACE \ --device /dev/fuse \ --security-opt apparmor=kernel \ --volume /tmp/var-colab:/var/colab \ \ --volume /tmp/colab-shared:/var/colab/shared \ --shm-size=${SHMSZ} \ --env='GCS_READ_CACHE_BLOCK_SIZE_MB=16' \ --env='GCE_METADATA_TIMEOUT=0' \ --env='PYTHONWARNINGS=ignore:::pip._internal.cli.base_command' \ --env='NO_GCE_CHECK=True' \ --env='DATALAB_SETTINGS_OVERRIDES={"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\\\"172.28.0.2\\\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}' \ gcr.io/colab-datalab/datalab:baked \ # If a user causes the container to exit (say by killing the node app), we # prefer to restart rather than make the user wait for the VM to go unhealthy. Restart=always RestartSec=1 - path: /etc/motd permissions: 0644 owner: root content: | To use this VM in the Colab UI, navigate to the URL: https://colab.research.google.com/ runcmd: - /etc/systemd/system/startup.sh # See go/iwsdy. - systemctl stop update-engine.service