# --- Configuration --- MON_IP ?= 192.168.0.2 # Primary monitor node IP POOL_NAME ?= $(shell sudo ceph osd pool ls | grep "cephfs.cephfs.data" || echo "default_pool") POOL_SIZE ?= 3 OSD_DEVICES ?= /dev/sdb # Modify as per your system FS_NAME ?= cephfs CEPH_IMAGE ?= quay.io/ceph/ceph:v19.2.1 # Cluster Nodes MON_NODES = mon1 mon2 mon3 OSD_NODES = osd1 osd2 osd3 MGR_NODES = mgr1 mgr2 MDS_NODES = mds1 mds2 RGW_NODES = rgw1 # SSH Credentials SSH_USER ?= ceph-admin # --- High-Level Targets --- .PHONY: setup-cluster expand-cluster recover-cluster monitor-cluster upgrade-cluster status clean list-pools mount-cephfs ## Full Cluster Setup (One Command) setup-cluster: install bootstrap add-mon add-osd add-mgr add-mds add-rgw create-pool enable-dashboard status @echo "✅ Ceph Cluster Setup Completed!" ## Expand the Cluster with New Storage Nodes expand-cluster: add-osd @echo "✅ Ceph Cluster Expanded!" ## Detect and Recover Failed OSDs recover-cluster: @echo "🔍 Checking for failed OSDs..." @FAILED_OSDS=$$(sudo ceph health detail | grep -oP 'osd.\d+' | grep -oP '\d+'); \ if [ -n "$$FAILED_OSDS" ]; then \ for osd in $$FAILED_OSDS; do \ echo "🔄 Replacing OSD $$osd"; \ sudo ceph osd out $$osd; \ sudo ceph osd purge $$osd --yes-i-really-mean-it; \ sudo ceph orch apply osd --all-available-devices; \ done; \ echo "✅ OSD Recovery Completed!"; \ else \ echo "✅ No failed OSDs detected."; \ fi ## Recover a failed Ceph daemon recover-daemon: @echo "🔧 Checking for failed Ceph daemons..." @FAILED_DAEMONS=$$(ceph health detail | grep 'CEPHADM_FAILED_DAEMON' | grep -oP 'daemon \S+ on \S+' | awk '{print $$2}'); \ if [ -n "$$FAILED_DAEMONS" ]; then \ for daemon in $$FAILED_DAEMONS; do \ echo "🔄 Attempting to recover $$daemon..."; \ sudo ceph orch restart $$daemon || sudo ceph orch redeploy $$daemon; \ echo "✅ Recovery attempted for $$daemon."; \ done; \ else \ echo "✅ No failed daemons detected."; \ fi ## Recover Ceph Dashboard (Restart MGR and Re-enable Dashboard) recover-dashboard: @echo "🔍 Checking Ceph Manager and Dashboard..." @MGR_STATUS=$$(ceph mgr dump | grep "active_name"); \ if [ -z "$$MGR_STATUS" ]; then \ echo "⚠️ No active Ceph Manager detected! Restarting..."; \ sudo ceph orch restart mgr; \ fi @echo "🔄 Restarting Ceph Dashboard..." sudo ceph mgr module disable dashboard || true sudo ceph mgr module enable dashboard @echo "✅ Ceph Dashboard restarted successfully!" @echo "📌 You can access it at:" @ceph mgr services | grep dashboard ## Enable Monitoring (Prometheus & Grafana) monitor-cluster: @echo "📊 Setting up monitoring..." sudo ceph mgr module enable prometheus sudo ceph mgr module enable dashboard sudo ceph dashboard create-self-signed-cert @echo "✅ Monitoring enabled! Access Grafana via the Ceph dashboard." ## Upgrade the Ceph Cluster upgrade-cluster: @echo "⬆️ Upgrading Ceph to $(CEPH_IMAGE)..." sudo ceph orch upgrade start --image $(CEPH_IMAGE) @echo "✅ Ceph upgrade initiated!" ## Check Cluster Status status: @echo "🔍 Checking Ceph Cluster Status..." sudo ceph status ## Clean up (Reset Cluster) clean: @echo "🗑️ WARNING: This will remove all Ceph data!" sudo cephadm rm-cluster --force --zap-osds @echo "✅ Ceph Cluster Removed!" # --- Core Tasks --- .PHONY: install bootstrap add-mon add-osd add-mgr add-mds add-rgw create-pool enable-dashboard install: @echo "📥 Installing Ceph dependencies..." sudo apt update && sudo apt install -y ceph cephadm ceph-common bootstrap: @echo "🚀 Bootstrapping Ceph cluster..." sudo cephadm bootstrap --mon-ip $(MON_IP) add-mon: @echo "🔗 Adding Monitor nodes..." for node in $(MON_NODES); do \ ssh $(SSH_USER)@$$node "sudo cephadm add mon"; \ done add-osd: @echo "📦 Adding OSD nodes..." for node in $(OSD_NODES); do \ ssh $(SSH_USER)@$$node "sudo ceph orch apply osd --all-available-devices"; \ done add-mgr: @echo "🛠️ Adding Manager nodes..." for node in $(MGR_NODES); do \ ssh $(SSH_USER)@$$node "sudo cephadm add mgr"; \ done add-mds: @echo "📂 Adding Metadata Servers..." for node in $(MDS_NODES); do \ ssh $(SSH_USER)@$$node "sudo ceph orch apply mds $(FS_NAME) --placement=1"; \ done add-rgw: @echo "🌍 Adding RADOS Gateway (S3-Compatible Object Storage)..." for node in $(RGW_NODES); do \ ssh $(SSH_USER)@$$node "sudo ceph orch apply rgw rgw.$(shell hostname) --placement=1"; \ done create-pool: @echo "💾 Creating Ceph Pool: $(POOL_NAME)..." sudo ceph osd pool create $(POOL_NAME) 128 128 replicated sudo ceph osd pool set $(POOL_NAME) size $(POOL_SIZE) enable-dashboard: @echo "🌍 Enabling Ceph Dashboard..." sudo ceph mgr module enable dashboard sudo ceph dashboard create-self-signed-cert ## List all Ceph Pools list-pools: @echo "📌 Listing Ceph Pools..." @echo "-----------------------------------" @echo "Name | Data Protection | Application | PG Status" @echo "-------------------------|----------------|------------|-----------" @sudo ceph osd pool ls detail | awk '/pool/{printf "%-25s", $$2} /size/{printf "| Replication: x%s ", $$3} /application/{printf "| %s ", $$3} /pg_num/{printf "| %s\n", $$2}' @echo "-----------------------------------" ## Mount CephFS on a local directory mount-cephfs: @echo "📂 Mounting CephFS..." sudo mkdir -p /mnt/cephfs sudo mount -t ceph :/ /mnt/cephfs -o name=admin,secret=$(shell sudo ceph auth get-key client.admin) @echo "✅ CephFS Mounted at /mnt/cephfs"