User Tools

Site Tools


progetti:cloud-areapd:ceph:replace_a_ssd_disk

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
progetti:cloud-areapd:ceph:replace_a_ssd_disk [2019/05/14 14:10] – [Replace a SSD disk used as db for bluestore] sgaravat@infn.itprogetti:cloud-areapd:ceph:replace_a_ssd_disk [2019/05/14 14:13] (current) – [Replace a SSD disk used as db for bluestore] sgaravat@infn.it
Line 1: Line 1:
 +====== Replace a SSD disk ======
 +
 +==== Replace a SSD disk used as journal for filestore ====
 +
 +Let's suppose that we need to replace /dev/sdb.
 +This device is used for journal for osd.15 .. osd.19:
 +
 +<code bash>
 +[root@ceph-osd-02 ~]# ceph-disk list | grep sdb
 +/dev/sdb :
 + /dev/sdb1 ceph journal
 + /dev/sdb2 ceph journal, for /dev/sdh1
 + /dev/sdb3 ceph journal, for /dev/sdi1
 + /dev/sdb4 ceph journal, for /dev/sdj1
 + /dev/sdb5 ceph journal, for /dev/sdk1
 + /dev/sdb6 ceph journal, for /dev/sdl1
 + /dev/sdh1 ceph data, active, cluster ceph, osd.15, journal /dev/sdb2
 + /dev/sdi1 ceph data, active, cluster ceph, osd.16, journal /dev/sdb3
 + /dev/sdj1 ceph data, active, cluster ceph, osd.17, journal /dev/sdb4
 + /dev/sdk1 ceph data, active, cluster ceph, osd.18, journal /dev/sdb5
 + /dev/sdl1 ceph data, active, cluster ceph, osd.19, journal /dev/sdb6
 +</code>
 +
 +
 +Let's tell ceph to not rebalance the cluster as we stop these OSDs for maintenance:
 +
 +<code bash>
 +ceph osd set noout
 +</code>
 +
 +Let's stop the affected OSDs:
 +
 +<code bash>
 +systemctl stop ceph-osd@15.service
 +systemctl stop ceph-osd@16.service
 +systemctl stop ceph-osd@17.service
 +systemctl stop ceph-osd@18.service
 +systemctl stop ceph-osd@19.service
 +</code>
 +
 +Let's flush the journals for these OSDs:
 +
 +<code bash>
 +ceph-osd -i 15 --flush-journal
 +ceph-osd -i 16 --flush-journal
 +ceph-osd -i 17 --flush-journal
 +ceph-osd -i 18 --flush-journal
 +ceph-osd -i 19 --flush-journal
 +</code>
 +
 +Let's replace the device sdb.
 +In case, let's zap it:
 +
 +<code bash>
 +ceph-disk zap /dev/sdb
 +</code>
 +
 +Let's partition the new disk, using this script:
 +
 +<code bash>
 +#!/bin/bash
 + 
 +osds="15 16 17 18 19"
 +journal_disk=/dev/sdb
 +part_number=0 
 +for osd_id in $osds; do
 +  part_number=$((part_number+1))
 +  journal_uuid=$(cat /var/lib/ceph/osd/ceph-$osd_id/journal_uuid)
 +  echo "journal_uuid: ${journal_uuid}"
 +  echo "part_number: ${part_number}"
 +  sgdisk --new=${part_number}:0:+30720M --change-name=${part_number}:'ceph journal' --partition-guid=${part_number}:$journal_uuid --typecode=${part_number}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- $journal_disk
 +done
 +</code>
 +
 +Then:
 +
 +<code bash>
 +ceph-osd --mkjournal -i 15
 +ceph-osd --mkjournal -i 16
 +ceph-osd --mkjournal -i 17
 +ceph-osd --mkjournal -i 18
 +ceph-osd --mkjournal -i 19
 +</code>
 +
 +Let's restart the osds:
 +
 +<code bash>
 +systemctl restart ceph-osd@15.service
 +systemctl restart ceph-osd@16.service
 +systemctl restart ceph-osd@17.service
 +systemctl restart ceph-osd@18.service
 +systemctl restart ceph-osd@19.service
 +</code>
 +
 +Finally:
 +
 +<code bash>
 +ceph osd unset noout
 +</code>
 +
 +
 +==== Replace a SSD disk used as db for bluestore ====
 +
 +Let's suppose that we need to replace /dev/vdk.
 +
 +Using:
 +
 +<code bash>
 +vgdisplay -v
 +</code>
 +
 +we find the volume group used for this physical device. In our case it is:
 +
 +ceph-db-16-19
 +
 +
 +<code bash>
 +
 +[root@c-osd-5 /]# vgdisplay -v ceph-db-16-19
 +  --- Volume group ---
 +  VG Name               ceph-db-16-19
 +  System ID             
 +  Format                lvm2
 +  Metadata Areas        1
 +  Metadata Sequence No  57
 +  VG Access             read/write
 +  VG Status             resizable
 +  MAX LV                0
 +  Cur LV                4
 +  Open LV               4
 +  Max PV                0
 +  Cur PV                1
 +  Act PV                1
 +  VG Size               <200.00 GiB
 +  PE Size               4.00 MiB
 +  Total PE              51199
 +  Alloc PE / Size       50944 / 199.00 GiB
 +  Free  PE / Size       255 / 1020.00 MiB
 +  VG UUID               zWl23g-hd9y-gMf6-xroV-V25z-Yte7-Yl2znG
 +   
 +  --- Logical volume ---
 +  LV Path                /dev/ceph-db-16-19/db-16
 +  LV Name                db-16
 +  VG Name                ceph-db-16-19
 +  LV UUID                YVoGSB-vBIl-sorZ-wLNR-8bX2-XxIc-sYHbkj
 +  LV Write Access        read/write
 +  LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:27 +0100
 +  LV Status              available
 +  # open                 2
 +  LV Size                50.00 GiB
 +  Current LE             12800
 +  Segments               1
 +  Allocation             inherit
 +  Read ahead sectors     auto
 +  - currently set to     8192
 +  Block device           252:1
 +   
 +  --- Logical volume ---
 +  LV Path                /dev/ceph-db-16-19/db-17
 +  LV Name                db-17
 +  VG Name                ceph-db-16-19
 +  LV UUID                4kdL1r-RMPt-MxXj-Ve7y-Czxc-eJRX-476pzt
 +  LV Write Access        read/write
 +  LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:31 +0100
 +  LV Status              available
 +  # open                 2
 +  LV Size                50.00 GiB
 +  Current LE             12800
 +  Segments               1
 +  Allocation             inherit
 +  Read ahead sectors     auto
 +  - currently set to     8192
 +  Block device           252:6
 +   
 +  --- Logical volume ---
 +  LV Path                /dev/ceph-db-16-19/db-18
 +  LV Name                db-18
 +  VG Name                ceph-db-16-19
 +  LV UUID                3378B5-5jFJ-d3jZ-nuSV-zhB9-fEhc-efNezZ
 +  LV Write Access        read/write
 +  LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:34 +0100
 +  LV Status              available
 +  # open                 2
 +  LV Size                50.00 GiB
 +  Current LE             12800
 +  Segments               1
 +  Allocation             inherit
 +  Read ahead sectors     auto
 +  - currently set to     8192
 +  Block device           252:8
 +   
 +  --- Logical volume ---
 +  LV Path                /dev/ceph-db-16-19/db-19
 +  LV Name                db-19
 +  VG Name                ceph-db-16-19
 +  LV UUID                fDAejd-teoj-MkTb-AaOb-O4Gy-tztQ-1oXjIL
 +  LV Write Access        read/write
 +  LV Creation host, time c-osd-5.novalocal, 2019-01-30 11:38:50 +0100
 +  LV Status              available
 +  # open                 2
 +  LV Size                49.00 GiB
 +  Current LE             12544
 +  Segments               1
 +  Allocation             inherit
 +  Read ahead sectors     auto
 +  - currently set to     8192
 +  Block device           252:9
 +   
 +  --- Physical volumes ---
 +  PV Name               /dev/vdk     
 +  PV UUID               pULAJn-DZDy-yy9a-3YY7-Zyfu-rmZD-AKTd4z
 +  PV Status             allocatable
 +  Total PE / Free PE    51199 / 255
 +   
 +[root@c-osd-5 /]# 
 +</code>
 +
 +I.e. it is used as db for OSD 16..19
 +
 +Let's 'disable these OSDs':
 +
 +<code bash>
 +[root@c-osd-5 /]# ceph osd crush reweight osd.16 0
 +reweighted item id 16 name 'osd.16' to 0 in crush map
 +[root@c-osd-5 /]# ceph osd crush reweight osd.17 0
 +reweighted item id 17 name 'osd.17' to 0 in crush map
 +[root@c-osd-5 /]# ceph osd crush reweight osd.18 0
 +reweighted item id 18 name 'osd.18' to 0 in crush map
 +[root@c-osd-5 /]# ceph osd crush reweight osd.19 0
 +reweighted item id 19 name 'osd.19' to 0 in crush map
 +[root@c-osd-5 /]# 
 +</code>
 +
 +Wait that the status is HEALTH-OK. Then:
 +
 +TBC
 +
 +<code bash>
 +ceph osd out osd.16
 +ceph osd out osd.17
 +ceph osd out osd.18
 +ceph osd out osd.19
 +
 +ceph osd crush remove osd.16
 +ceph osd crush remove osd.17
 +ceph osd crush remove osd.18
 +ceph osd crush remove osd.19
 +
 +systemctl stop ceph-osd@16.service
 +systemctl stop ceph-osd@17.service
 +systemctl stop ceph-osd@18.service
 +systemctl stop ceph-osd@19.service
 +
 +ceph auth del osd.16
 +ceph auth del osd.17
 +ceph auth del osd.18
 +ceph auth del osd.19
 +
 +ceph osd rm osd.16
 +ceph osd rm osd.17
 +ceph osd rm osd.18
 +ceph osd rm osd.19
 +
 +umount /var/lib/ceph/osd/ceph-16
 +umount /var/lib/ceph/osd/ceph-17
 +umount /var/lib/ceph/osd/ceph-18
 +umount /var/lib/ceph/osd/ceph-19
 +</code>
 +
 +
 +Destroy the volume group created on this SSD disk (be sure to have saved the output of 'vgdisplay -v ceph-db-16-19' first !!):
 +
 +<code bash>
 +[root@c-osd-5 /]# vgremove ceph-db-16-19
 +Do you really want to remove volume group "ceph-db-16-19" containing 4 logical volumes? [y/n]: y
 +Do you really want to remove active logical volume ceph-db-16-19/db-16? [y/n]: y
 +  Logical volume "db-16" successfully removed
 +Do you really want to remove active logical volume ceph-db-16-19/db-17? [y/n]: y
 +  Logical volume "db-17" successfully removed
 +Do you really want to remove active logical volume ceph-db-16-19/db-18? [y/n]: y
 +  Logical volume "db-18" successfully removed
 +Do you really want to remove active logical volume ceph-db-16-19/db-19? [y/n]: y
 +  Logical volume "db-19" successfully removed
 +  Volume group "ceph-db-16-19" successfully removed
 +[root@c-osd-5 /]# 
 +</code>
 +
 +Sostituiamo il disco SSD. Supponiamo che quello nuovo si chiami sempre vdk.
 +
 +Ricreo volume group e logical volume (fare riferimento all'output di vgdisplay precedente):
 +
 +<code bash>
 +[root@c-osd-5 /]# vgcreate ceph-db-16-19 /dev/vdk 
 +  Physical volume "/dev/vdk" successfully created.
 +  Volume group "ceph-db-16-19" successfully created
 +[root@c-osd-5 /]# lvcreate -L 50GB -n db-16 ceph-db-16-19
 +  Logical volume "db-16" created.
 +[root@c-osd-5 /]# lvcreate -L 50GB -n db-17 ceph-db-16-19
 +  Logical volume "db-17" created.
 +[root@c-osd-5 /]# lvcreate -L 50GB -n db-18 ceph-db-16-19
 +  Logical volume "db-18" created.
 +[root@c-osd-5 /]# lvcreate -L 50GB -n db-19 ceph-db-16-19
 +  Volume group "ceph-db-16-19" has insufficient free space (12799 extents): 12800 required.
 +[root@c-osd-5 /]# lvcreate -L 49GB -n db-19 ceph-db-16-19
 +  Logical volume "db-19" created.
 +[root@c-osd-5 /]# 
 +</code>
 +
 +Facciamo un lvm zap (v. https://tracker.ceph.com/issues/24099):
 +
 +<code bash>
 +[root@c-osd-5 /]# ceph-volume lvm zap /var/lib/ceph/osd/ceph-16/block 
 +Running command: /usr/sbin/cryptsetup status /dev/mapper/
 +--> Zapping: /var/lib/ceph/osd/ceph-16/block
 +Running command: wipefs --all /var/lib/ceph/osd/ceph-16/block
 +Running command: dd if=/dev/zero of=/var/lib/ceph/osd/ceph-16/block bs=1M count=10
 + stderr: 10+0 records in
 +10+0 records out
 +10485760 bytes (10 MB) copied
 + stderr: , 0.0110834 s, 946 MB/s
 +--> Zapping successful for: /var/lib/ceph/osd/ceph-16/block
 +</code>
 +
 +Creiamo l'OSD:
 +
 +<code bash>
 +[root@c-osd-5 /]# ceph-volume lvm create --bluestore --data ceph-block-16/block-16 --block.db ceph-db-16-19/db-16
 +Running command: /bin/ceph-authtool --gen-print-key
 +Running command: /bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new 86f9420c-4d3f-4eee-ade5-622efbe12fec
 +Running command: /bin/ceph-authtool --gen-print-key
 +Running command: mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-16
 +Running command: restorecon /var/lib/ceph/osd/ceph-16
 +Running command: chown -h ceph:ceph /dev/ceph-block-16/block-16
 +Running command: chown -R ceph:ceph /dev/dm-13
 +Running command: ln -s /dev/ceph-block-16/block-16 /var/lib/ceph/osd/ceph-16/block
 +Running command: ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-16/activate.monmap
 + stderr: got monmap epoch 4
 +Running command: ceph-authtool /var/lib/ceph/osd/ceph-16/keyring --create-keyring --name osd.16 --add-key AQCiytpcTgRcNhAAOK69zFKcnPvLR0QiVUusGA==
 + stdout: creating /var/lib/ceph/osd/ceph-16/keyring
 +added entity osd.16 auth auth(auid = 18446744073709551615 key=AQCiytpcTgRcNhAAOK69zFKcnPvLR0QiVUusGA== with 0 caps)
 +Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16/keyring
 +Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16/
 +Running command: chown -h ceph:ceph /dev/ceph-db-16-19/db-16
 +Running command: chown -R ceph:ceph /dev/dm-1
 +Running command: /bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 16 --monmap /var/lib/ceph/osd/ceph-16/activate.monmap --keyfile - --bluestore-block-db-path /dev/ceph-db-16-19/db-16 --osd-data /var/lib/ceph/osd/ceph-16/ --osd-uuid 86f9420c-4d3f-4eee-ade5-622efbe12fec --setuser ceph --setgroup ceph
 +--> ceph-volume lvm prepare successful for: ceph-block-16/block-16
 +Running command: ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-block-16/block-16 --path /var/lib/ceph/osd/ceph-16
 +Running command: ln -snf /dev/ceph-block-16/block-16 /var/lib/ceph/osd/ceph-16/block
 +Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-16/block
 +Running command: chown -R ceph:ceph /dev/dm-13
 +Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-16
 +Running command: ln -snf /dev/ceph-db-16-19/db-16 /var/lib/ceph/osd/ceph-16/block.db
 +Running command: chown -h ceph:ceph /dev/ceph-db-16-19/db-16
 +Running command: chown -R ceph:ceph /dev/dm-1
 +Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-16/block.db
 +Running command: chown -R ceph:ceph /dev/dm-1
 +Running command: systemctl enable ceph-volume@lvm-16-86f9420c-4d3f-4eee-ade5-622efbe12fec
 + stderr: Created symlink from /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-16-86f9420c-4d3f-4eee-ade5-622efbe12fec.service to /usr/lib/systemd/system/ceph-volume@.service.
 +Running command: systemctl enable --runtime ceph-osd@16
 +Running command: systemctl start ceph-osd@16
 +--> ceph-volume lvm activate successful for osd ID: 16
 +--> ceph-volume lvm create successful for: ceph-block-16/block-16
 +[root@c-osd-5 /]# 
 +</code>
 +
 +
 +NB: Probabilmente il 'ceph volume' fallira`. Ridare lo zap allora
 +
 +
 +Stesso per gli altri:
 +
 +<code bash>
 +ceph-volume lvm create --bluestore --data ceph-block-17/block-17 --block.db ceph-db-16-19/db-17
 +ceph-volume lvm zap /var/lib/ceph/osd/ceph-17/block 
 +ceph-volume lvm create --bluestore --data ceph-block-17/block-17 --block.db ceph-db-16-19/db-17
 + 
 +ceph-volume lvm create --bluestore --data ceph-block-18/block-18 --block.db ceph-db-16-19/db-18
 +ceph-volume lvm zap /var/lib/ceph/osd/ceph-18/block 
 +ceph-volume lvm create --bluestore --data ceph-block-18/block-18 --block.db ceph-db-16-19/db-18
 +
 +ceph-volume lvm create --bluestore --data ceph-block-19/block-19 --block.db ceph-db-16-19/db-19
 +ceph-volume lvm zap /var/lib/ceph/osd/ceph-19/block 
 +ceph-volume lvm create --bluestore --data ceph-block-19/block-19 --block.db ceph-db-16-19/db-19
 +
 +</code>
 +
  
progetti/cloud-areapd/ceph/replace_a_ssd_disk.txt · Last modified: 2019/05/14 14:13 by sgaravat@infn.it

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki