====== Replace a SSD disk ======
==== Replace a SSD disk used as journal for filestore ====
Let's suppose that we need to replace /dev/sdb.
This device is used for journal for osd.15 .. osd.19:
[root@ceph-osd-02 ~]# ceph-disk list | grep sdb
/dev/sdb :
/dev/sdb1 ceph journal
/dev/sdb2 ceph journal, for /dev/sdh1
/dev/sdb3 ceph journal, for /dev/sdi1
/dev/sdb4 ceph journal, for /dev/sdj1
/dev/sdb5 ceph journal, for /dev/sdk1
/dev/sdb6 ceph journal, for /dev/sdl1
/dev/sdh1 ceph data, active, cluster ceph, osd.15, journal /dev/sdb2
/dev/sdi1 ceph data, active, cluster ceph, osd.16, journal /dev/sdb3
/dev/sdj1 ceph data, active, cluster ceph, osd.17, journal /dev/sdb4
/dev/sdk1 ceph data, active, cluster ceph, osd.18, journal /dev/sdb5
/dev/sdl1 ceph data, active, cluster ceph, osd.19, journal /dev/sdb6
Let's tell ceph to not rebalance the cluster as we stop these OSDs for maintenance:
ceph osd set noout
Let's stop the affected OSDs:
systemctl stop ceph-osd@15.service
systemctl stop ceph-osd@16.service
systemctl stop ceph-osd@17.service
systemctl stop ceph-osd@18.service
systemctl stop ceph-osd@19.service
Let's flush the journals for these OSDs:
ceph-osd -i 15 --flush-journal
ceph-osd -i 16 --flush-journal
ceph-osd -i 17 --flush-journal
ceph-osd -i 18 --flush-journal
ceph-osd -i 19 --flush-journal
Let's replace the device sdb.
In case, let's zap it:
ceph-disk zap /dev/sdb
Let's partition the new disk, using this script:
#!/bin/bash
osds="15 16 17 18 19"
journal_disk=/dev/sdb
part_number=0
for osd_id in $osds; do
part_number=$((part_number+1))
journal_uuid=$(cat /var/lib/ceph/osd/ceph-$osd_id/journal_uuid)
echo "journal_uuid: ${journal_uuid}"
echo "part_number: ${part_number}"
sgdisk --new=${part_number}:0:+30720M --change-name=${part_number}:'ceph journal' --partition-guid=${part_number}:$journal_uuid --typecode=${part_number}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- $journal_disk
done
Then:
ceph-osd --mkjournal -i 15
ceph-osd --mkjournal -i 16
ceph-osd --mkjournal -i 17
ceph-osd --mkjournal -i 18
ceph-osd --mkjournal -i 19
Let's restart the osds:
systemctl restart ceph-osd@15.service
systemctl restart ceph-osd@16.service
systemctl restart ceph-osd@17.service
systemctl restart ceph-osd@18.service
systemctl restart ceph-osd@19.service
Finally:
ceph osd unset noout
==== Replace a SSD disk used as db for bluestore ====
Let's suppose that we need to replace /dev/vdk.
Using:
vgdisplay -v
we find the volume group used for this physical device. In our case it is:
ceph-db-16-19
[root@c-osd-5 /]# vgdisplay -v ceph-db-16-19
--- Volume group ---
VG Name ceph-db-16-19
System ID
Format lvm2
Metadata Areas 1
Metadata Sequence No 57
VG Access read/write
VG Status resizable
MAX LV 0
Cur LV 4
Open LV 4
Max PV 0
Cur PV 1
Act PV 1
VG Size <200.00 GiB
PE Size 4.00 MiB
Total PE 51199
Alloc PE / Size 50944 / 199.00 GiB
Free PE / Size 255 / 1020.00 MiB
VG UUID zWl23g-hd9y-gMf6-xroV-V25z-Yte7-Yl2znG
--- Logical volume ---
LV Path /dev/ceph-db-16-19/db-16
LV Name db-16
VG Name ceph-db-16-19
LV UUID YVoGSB-vBIl-sorZ-wLNR-8bX2-XxIc-sYHbkj
LV Write Access read/write
LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:27 +0100
LV Status available
# open 2
LV Size 50.00 GiB
Current LE 12800
Segments 1
Allocation inherit
Read ahead sectors auto
- currently set to 8192
Block device 252:1
--- Logical volume ---
LV Path /dev/ceph-db-16-19/db-17
LV Name db-17
VG Name ceph-db-16-19
LV UUID 4kdL1r-RMPt-MxXj-Ve7y-Czxc-eJRX-476pzt
LV Write Access read/write
LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:31 +0100
LV Status available
# open 2
LV Size 50.00 GiB
Current LE 12800
Segments 1
Allocation inherit
Read ahead sectors auto
- currently set to 8192
Block device 252:6
--- Logical volume ---
LV Path /dev/ceph-db-16-19/db-18
LV Name db-18
VG Name ceph-db-16-19
LV UUID 3378B5-5jFJ-d3jZ-nuSV-zhB9-fEhc-efNezZ
LV Write Access read/write
LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:34 +0100
LV Status available
# open 2
LV Size 50.00 GiB
Current LE 12800
Segments 1
Allocation inherit
Read ahead sectors auto
- currently set to 8192
Block device 252:8
--- Logical volume ---
LV Path /dev/ceph-db-16-19/db-19
LV Name db-19
VG Name ceph-db-16-19
LV UUID fDAejd-teoj-MkTb-AaOb-O4Gy-tztQ-1oXjIL
LV Write Access read/write
LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:50 +0100
LV Status available
# open 2
LV Size 49.00 GiB
Current LE 12544
Segments 1
Allocation inherit
Read ahead sectors auto
- currently set to 8192
Block device 252:9
--- Physical volumes ---
PV Name /dev/vdk
PV UUID pULAJn-DZDy-yy9a-3YY7-Zyfu-rmZD-AKTd4z
PV Status allocatable
Total PE / Free PE 51199 / 255
[root@c-osd-5 /]#
I.e. it is used as db for OSD 16..19
Let's 'disable these OSDs':
[root@c-osd-5 /]# ceph osd crush reweight osd.16 0
reweighted item id 16 name 'osd.16' to 0 in crush map
[root@c-osd-5 /]# ceph osd crush reweight osd.17 0
reweighted item id 17 name 'osd.17' to 0 in crush map
[root@c-osd-5 /]# ceph osd crush reweight osd.18 0
reweighted item id 18 name 'osd.18' to 0 in crush map
[root@c-osd-5 /]# ceph osd crush reweight osd.19 0
reweighted item id 19 name 'osd.19' to 0 in crush map
[root@c-osd-5 /]#
Wait that the status is HEALTH-OK. Then:
TBC
ceph osd out osd.16
ceph osd out osd.17
ceph osd out osd.18
ceph osd out osd.19
ceph osd crush remove osd.16
ceph osd crush remove osd.17
ceph osd crush remove osd.18
ceph osd crush remove osd.19
systemctl stop ceph-osd@16.service
systemctl stop ceph-osd@17.service
systemctl stop ceph-osd@18.service
systemctl stop ceph-osd@19.service
ceph auth del osd.16
ceph auth del osd.17
ceph auth del osd.18
ceph auth del osd.19
ceph osd rm osd.16
ceph osd rm osd.17
ceph osd rm osd.18
ceph osd rm osd.19
umount /var/lib/ceph/osd/ceph-16
umount /var/lib/ceph/osd/ceph-17
umount /var/lib/ceph/osd/ceph-18
umount /var/lib/ceph/osd/ceph-19
Destroy the volume group created on this SSD disk (be sure to have saved the output of 'vgdisplay -v ceph-db-16-19' first !!):
[root@c-osd-5 /]# vgremove ceph-db-16-19
Do you really want to remove volume group "ceph-db-16-19" containing 4 logical volumes? [y/n]: y
Do you really want to remove active logical volume ceph-db-16-19/db-16? [y/n]: y
Logical volume "db-16" successfully removed
Do you really want to remove active logical volume ceph-db-16-19/db-17? [y/n]: y
Logical volume "db-17" successfully removed
Do you really want to remove active logical volume ceph-db-16-19/db-18? [y/n]: y
Logical volume "db-18" successfully removed
Do you really want to remove active logical volume ceph-db-16-19/db-19? [y/n]: y
Logical volume "db-19" successfully removed
Volume group "ceph-db-16-19" successfully removed
[root@c-osd-5 /]#
Sostituiamo il disco SSD. Supponiamo che quello nuovo si chiami sempre vdk.
Ricreo volume group e logical volume (fare riferimento all'output di vgdisplay precedente):
[root@c-osd-5 /]# vgcreate ceph-db-16-19 /dev/vdk
Physical volume "/dev/vdk" successfully created.
Volume group "ceph-db-16-19" successfully created
[root@c-osd-5 /]# lvcreate -L 50GB -n db-16 ceph-db-16-19
Logical volume "db-16" created.
[root@c-osd-5 /]# lvcreate -L 50GB -n db-17 ceph-db-16-19
Logical volume "db-17" created.
[root@c-osd-5 /]# lvcreate -L 50GB -n db-18 ceph-db-16-19
Logical volume "db-18" created.
[root@c-osd-5 /]# lvcreate -L 50GB -n db-19 ceph-db-16-19
Volume group "ceph-db-16-19" has insufficient free space (12799 extents): 12800 required.
[root@c-osd-5 /]# lvcreate -L 49GB -n db-19 ceph-db-16-19
Logical volume "db-19" created.
[root@c-osd-5 /]#
Facciamo un lvm zap (v. https://tracker.ceph.com/issues/24099):
[root@c-osd-5 /]# ceph-volume lvm zap /var/lib/ceph/osd/ceph-16/block
Running command: /usr/sbin/cryptsetup status /dev/mapper/
--> Zapping: /var/lib/ceph/osd/ceph-16/block
Running command: wipefs --all /var/lib/ceph/osd/ceph-16/block
Running command: dd if=/dev/zero of=/var/lib/ceph/osd/ceph-16/block bs=1M count=10
stderr: 10+0 records in
10+0 records out
10485760 bytes (10 MB) copied
stderr: , 0.0110834 s, 946 MB/s
--> Zapping successful for: /var/lib/ceph/osd/ceph-16/block
Creiamo l'OSD:
[root@c-osd-5 /]# ceph-volume lvm create --bluestore --data ceph-block-16/block-16 --block.db ceph-db-16-19/db-16
Running command: /bin/ceph-authtool --gen-print-key
Running command: /bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new 86f9420c-4d3f-4eee-ade5-622efbe12fec
Running command: /bin/ceph-authtool --gen-print-key
Running command: mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-16
Running command: restorecon /var/lib/ceph/osd/ceph-16
Running command: chown -h ceph:ceph /dev/ceph-block-16/block-16
Running command: chown -R ceph:ceph /dev/dm-13
Running command: ln -s /dev/ceph-block-16/block-16 /var/lib/ceph/osd/ceph-16/block
Running command: ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-16/activate.monmap
stderr: got monmap epoch 4
Running command: ceph-authtool /var/lib/ceph/osd/ceph-16/keyring --create-keyring --name osd.16 --add-key AQCiytpcTgRcNhAAOK69zFKcnPvLR0QiVUusGA==
stdout: creating /var/lib/ceph/osd/ceph-16/keyring
added entity osd.16 auth auth(auid = 18446744073709551615 key=AQCiytpcTgRcNhAAOK69zFKcnPvLR0QiVUusGA== with 0 caps)
Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16/keyring
Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16/
Running command: chown -h ceph:ceph /dev/ceph-db-16-19/db-16
Running command: chown -R ceph:ceph /dev/dm-1
Running command: /bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 16 --monmap /var/lib/ceph/osd/ceph-16/activate.monmap --keyfile - --bluestore-block-db-path /dev/ceph-db-16-19/db-16 --osd-data /var/lib/ceph/osd/ceph-16/ --osd-uuid 86f9420c-4d3f-4eee-ade5-622efbe12fec --setuser ceph --setgroup ceph
--> ceph-volume lvm prepare successful for: ceph-block-16/block-16
Running command: ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-block-16/block-16 --path /var/lib/ceph/osd/ceph-16
Running command: ln -snf /dev/ceph-block-16/block-16 /var/lib/ceph/osd/ceph-16/block
Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-16/block
Running command: chown -R ceph:ceph /dev/dm-13
Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16
Running command: ln -snf /dev/ceph-db-16-19/db-16 /var/lib/ceph/osd/ceph-16/block.db
Running command: chown -h ceph:ceph /dev/ceph-db-16-19/db-16
Running command: chown -R ceph:ceph /dev/dm-1
Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-16/block.db
Running command: chown -R ceph:ceph /dev/dm-1
Running command: systemctl enable ceph-volume@lvm-16-86f9420c-4d3f-4eee-ade5-622efbe12fec
stderr: Created symlink from /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-16-86f9420c-4d3f-4eee-ade5-622efbe12fec.service to /usr/lib/systemd/system/ceph-volume@.service.
Running command: systemctl enable --runtime ceph-osd@16
Running command: systemctl start ceph-osd@16
--> ceph-volume lvm activate successful for osd ID: 16
--> ceph-volume lvm create successful for: ceph-block-16/block-16
[root@c-osd-5 /]#
NB: Probabilmente il 'ceph volume' fallira`. Ridare lo zap allora
Stesso per gli altri:
ceph-volume lvm create --bluestore --data ceph-block-17/block-17 --block.db ceph-db-16-19/db-17
ceph-volume lvm zap /var/lib/ceph/osd/ceph-17/block
ceph-volume lvm create --bluestore --data ceph-block-17/block-17 --block.db ceph-db-16-19/db-17
ceph-volume lvm create --bluestore --data ceph-block-18/block-18 --block.db ceph-db-16-19/db-18
ceph-volume lvm zap /var/lib/ceph/osd/ceph-18/block
ceph-volume lvm create --bluestore --data ceph-block-18/block-18 --block.db ceph-db-16-19/db-18
ceph-volume lvm create --bluestore --data ceph-block-19/block-19 --block.db ceph-db-16-19/db-19
ceph-volume lvm zap /var/lib/ceph/osd/ceph-19/block
ceph-volume lvm create --bluestore --data ceph-block-19/block-19 --block.db ceph-db-16-19/db-19