commit 1d81e11179b1a0878cb742a3789c58b7b07710db Author: tim holloway Date: Sat Jul 20 14:29:29 2024 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d92c19c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +\#*\# +.\#* diff --git a/README.md b/README.md new file mode 100644 index 0000000..53785a8 --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +# "Instant" OSD for Ceph. Create a VM and spin it up as an OSD easily. + +If you read the Ceph documentation, you're likely to be pesented with +the legacy "manual" creation of a host and OSD. That still works, +but it's really not the way to go. + +Speaking of "not the way to go", ideally you don't run Ceph in a Virtual +Machine and your OSD should be 1TB or more. But for the less-demanding user +and for basic experimentation, the procedures provided here are a great +start and it runs fine on a 4GB VM and 50GB of OSD storage. + +This process is designed to work on a KVM host using libvirt, which is what +the standard Virtualization Host meta-package provides. I expect that the +Debian/Ubunto equivalent can also be used, although I have'n test it. + +Some advanced features are needed, so a recent CentOS clone, RHEL host, or Fedora platform is expected. At the moment, the oldest CentOS clones under active support are based on RHEL 9, and I used AlmaLinux 9. + +In addition to libvirt support, you need to have LVM installed on your machine, since that's where the OSD data storage will be located. An LVM volume group capable of holding the expected amount of disk space for the OSD plus a few GB more for overhead as a minumum.. + +Also, install the "virt-install" ulitity as it's not part of the core +Virtualization Host meta-package, but it's used by the VM creation script + +Finally, pull the cloud image that you want to serve as your base for the VM. As aupplied, I'm using AlmaLinux-9-GenericCloud-9.4-20240507.x86_64.qcow2. This **MUST** be a cloud image supporting cloud-init in order to properly pre-configure the VM! + +## Stages + +First up, we need to prep the host environment. Secondly, we spin up the VM, using the provided script as a model. The, we activate the new VM as a Ceph host using Ansible. Finally, we use "ceph orch host add" to add the host to the ceph inventory, Ceph will see the LVM disk we provided and auto-magically turn it into an OSD and at that point, the new VM is a full-fledged Ceph node. + +# Host Environment Preparation + +This is actually two tasks. One is general site prep. The supplied files are for my own particular environment. The second is done for each new OSD VM we want to create. + +## Site preparation + +The Instand OSD archive has 2 main dictories. One is files that you will put in you VM host's /var/lib/libvirt/images directory. The other goes into your Ansible playbook directory and roles directory. + +When you download my Instant OSD bundle, it's tailored for my local environment. +So you need to tailor it to yours. The most important step is to unarchive the ``roles/files/ceph.cfg.tgz`` file and change the ceph config and keyfiles as needed. Or you could simply archive up a copy of your cuttent /etc/ceph directory from your admin machine and replace mine with yours. If you edit the supplied archive, re-archive it and replace the original archive, as it will be copied into the OSD. + +You'll also want to go to your ceph admin host console and issue the "vars/main.yml" file of your ceph_osd_host role, Ansible will copy it to the new VM's authorized keys for its root account. + +The other important thing to do is to customize the cloud-init.data file on the VM host. As supplied it has an SSH key. You may wish to replace the supplied key with the public key of your ceph admin host. + +Now you're ready to start creating VMs. + +## VM preparation + +## Network + +Ceph likes its hostnames all nice and consistent, so I recommend adding your +new hostname/IP Address/MAC address as needed to your DHCP server, DNS server (including reverse DNS!) and optionally /etc/hosts for machines that might be +interested, Especially the Ansible host. + +Also don't forget to add the new VM's hostname to your ansible inventory file (default /etc/ansible/hosts)! + +### VM Customization + +Noe the fun begins. The cloud-init.data file contains information common to all +VMs you'll create. There should properly be a meta-data file for the VM-specific +stuff, but I haven't been able to get that to work and thus I dynamically create +a tempory composite cloud-init for the actual VM creation. + +Clone the make_ceph_osdx.sh file to make a custom VM. Edit the variables that +define the hostname, MAC address and LVM Logical Volume that will hold the OSD data. + +Note that the default MAC address for libvirt is randomly generated, so I manually supply my own to make DHCP assign a predictable IP address. + +Use LVM's "lvreate" command to create the Logical Volume you'll reference here +and edit the script to edit it". As presently configured, the VM will present the +LVM logical volume as device "/dev/sda" - the OS lives on /dev/vda. The device ID +will vary if you use a different VM bus type than "scsi", but since I don't know +the optimal bus type for an OSD, that's what I picked. + +Once you're customised the script, just execute it. Assuming you've got everything +right, it will create a new VM disk based on your cloud image and boot up a VM. + +Of course, if you are as error-prone as I am, this may require a few tweaks. Fear not. The process is idempotent, so you can re-run it as often as you like. +If you're extra paranoid, you can delete VM disk and (if it got created), the VM itself. + +Once everything is happy, the boot process will run and log to your command-line +console. At its end, you'll be presented with a login prompy. + +***Caution*** It's best to wait a minute or 2, as some setup may still be running even after the login prompt comes up! + +As supplied, the login is userid "almalinux" and password "redhat". These are defined in the cloud-init.data file and if you like, you can change them. + +Now you're ready to run the Ansible stage. Use ctrl-] to return to you VM's original shell (disconnect from the VM console). You don't need it anymore. + +## Ansible provisioning + +The cloud-init process takes care of some of the most essential functions, but +after a certain point, it's better to use something more flexible, and Ansible is the easiest option for that. So go too your ansible console and do the following +prep work: + +1. Ensure your hostname is in the Ansible inventory. +2. Customize the cephxx.yml playbook to point to that host +3. Use "ssh copy-id almalinux@mynewosd" to ensure that Ansible can run the playbook automatically. Remember that for the default account (almalinux), the +password is "redhat". "mynewosd" is, of course, the hostname you gave to the new +OSD VM. + +Use the ansible-playbook to run the ceph OSD playbook. This playbook provisions +using the "ceph_osd_hot" role you installed. + +It does the following: +1. Install the ceph repository into ``yum.repos.d``. +1. Install the cephamd utility from the ceph repository. +1. Copy in the ``/etc/ceph`` configuration information files. +1. Do an initial run of cephadm to cause it to pull the container(s) needed to tun cephadm and the ceph daemons. + +Note that if you like, you can also install the "ceph-common" package and be able to run ceph commands without needing "cephadm shell" to run them. + +## Rejoice! + +Congratulations! You have just created a new ceph host. You can confirm if you like, by using ssh to login to "almalinux@mynewcephosd" and issuing the "sudo cephadm shell" command to enter the cephadm shell and then type "ceph orch ps" to +list the running daemons in your system. + +Note that if the above fails, the most likely cause will be that your /etc/ceph config files are wrong. You did replace mine with your own in the ansible role file, didn't you? + +## Going live, You're now a full-fledged ceph node and you only need to issue the +"ceph orch host add" command to add this new VM to the Ceph host list. Ceph will +automatically see the unused OSD data device (/dev/sdb) and make an OSD out of it. + +As a final note, the new OSD may be created with a low CRUSH weight so it won't be too eager to fill up with data. Use the "ceph osd tree" command to see how it relates to the other OSDs and use the ceph osd set crush weight command to bumo it, if you need to. diff --git a/ansible_role/cephXX.yml b/ansible_role/cephXX.yml new file mode 100644 index 0000000..c449fa2 --- /dev/null +++ b/ansible_role/cephXX.yml @@ -0,0 +1,8 @@ +--- +- hosts: cephXX.ceph.mousetech.com + remote_user: almalinux + become: yes + +# new ceph node + roles: + - { role: ceph_osd_host } diff --git a/ansible_role/roles/ceph_osd_host/README.md b/ansible_role/roles/ceph_osd_host/README.md new file mode 100644 index 0000000..a47c8c6 --- /dev/null +++ b/ansible_role/roles/ceph_osd_host/README.md @@ -0,0 +1,20 @@ +# Prep a cloud-init'ed Ceph OSD host +Preliminary requirements: +1. My standard VM make_ceph script which assumes I've created an LVM + on the containing host for the data +1. Clone and customize the make_ceph script to set the hostname, MAC + address and location of the data LVM partition (logical volume) +1. Add the intended IP and MAC address to my master hosts list for + Ansible deployment, my DHCP servers and my primary DNS server. +1. Run the make_ceph script to spin up the VM. in conjunction with the + generic cloud-init user-data, this should install an SSH key for my + ansible server +1. Add the new machine to the ansible hosts file and run ansible. +1. This role installs the ceph repo, cephadm, a root ssh key for Ceph + administration. +1. At this point you should be able to log into the ceph admin node + and do a "ceph orch host add" with the new hostname and IP address. + +Note that Cephadm will see the allocated LVM partition as /dev/sda and +make an OSD out of it automatically. An OSD crush reweight will be required +to make it an active participant, since the initial crush weight is very low. \ No newline at end of file diff --git a/ansible_role/roles/ceph_osd_host/files/ceph.cfg.tgz b/ansible_role/roles/ceph_osd_host/files/ceph.cfg.tgz new file mode 100644 index 0000000..fbff815 Binary files /dev/null and b/ansible_role/roles/ceph_osd_host/files/ceph.cfg.tgz differ diff --git a/ansible_role/roles/ceph_osd_host/tasks/main.yml b/ansible_role/roles/ceph_osd_host/tasks/main.yml new file mode 100644 index 0000000..c0b5833 --- /dev/null +++ b/ansible_role/roles/ceph_osd_host/tasks/main.yml @@ -0,0 +1,31 @@ +--- +# Prep a cloud-init'ed VM to act as a ceph OSD node. +# See the README file at the root of this role. +# 2024-07019, Tim Holloway + + - name: Install ceph RPM repo + dnf: + name: centos-release-ceph-pacific + state: latest + + - name: Install cephadm + dnf: + name: cephadm + state: latest + + - name: Install Ceph admin node's ssh key + ansible.posix.authorized_key: + user: root + state: present + key: "{{ admin_server_key }}" + # ideally, key is in vault. + + - name: Copy in ceph config files + unarchive: + src: ceph.cfg.tgz + dest: /etc + + - name: Run cephadm to make it do initial container pull and setup + shell: + cmd: /usr/sbin/cephadm pull + diff --git a/ansible_role/roles/ceph_osd_host/vars/main.yml b/ansible_role/roles/ceph_osd_host/vars/main.yml new file mode 100644 index 0000000..f9fb4ed --- /dev/null +++ b/ansible_role/roles/ceph_osd_host/vars/main.yml @@ -0,0 +1,3 @@ +--- +# use ceph cephadm get-pub-key to obtain your system's key and paste it here. +admin_server_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDEbRZP9ZSH++sAZyewGUZhZ1hxfjYCRSDB56wJu1+Iy4A24z8H1bhocjN+mSzXN5DI7tvuBnYqeZv1xOuJfxGXbsBZKKgvNe3gZi4llQ2vfmwowIqX4ZzmHmwg63VB7ZI28/O/l7bHdFkGs+CmJ/9Ct0s7PaUWrsxTAS38MGTX+RynzArv2EYvF62HLOVMr0SE8anM8q6/hvZzBivDYKhwnDe+Yo1QBQlhany/YkLrhVgxqMEqvgLvAHVGsXwhhMISRqyDxQNyjAbydcpHl6cLatja1SJKh0AZSNiC7NBlifo+7r65JcDVclfeQNvQxdC4savT3rkPEiIQOXC+t3Wqg0F8S8PNf5DfSVOw/se04dgw9fqng6bajHc7699TSngHdJ4LoDvo2mMpGU3bnBBTAh/pp0b16QB+fP22RLquovU/IKI/F9jcWUvO6J3v7BLvWgaQv7kBa9aQL967lkSCG2Hhk7ICNuDxnhac9fVb5gMZMzIYGkqco6iE6/f1IB8= root@control.ceph.mousetech.com" diff --git a/vm_host/cloud-init.data b/vm_host/cloud-init.data new file mode 100644 index 0000000..c02e0f0 --- /dev/null +++ b/vm_host/cloud-init.data @@ -0,0 +1,9 @@ +#cloud-config +password: redhat +chpasswd: { expire: False } +ssh_pwauth: True +ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAoyyqUd5hT9ka+F7iE/TAhCxIqO9YTx9g7Kr5Aje+cuVmg7A+26AowQfGHUIf53g1uI6IO+a96tY+r2XWhiIS2PdrhXLKA4dOuLGbMEF1qq1I854kOy2wrTuqNg39AjNbeMg0kE2xfpe/Pnb+0OqzNLKLa3BGlQzcPoLcOT8C6Vog0lCjUZ7jyHMnM4+43ClxT/a6oWGoOem+ctK0hH37lC7h6/ZX/hBO26d1RsajO6sv++YrfBcKIl9iW8VJ5jT8N2p4A9MWizBd5OnC7eW9+E8Q30irHE/JZdPPxYyWzOxK7kSSGhhYEsTVj6184QPwwn9CUzna2joNBRU6FxuVDQ== root@control.ceph.mousetech.com +# Stock init gives almalinux home to root!? +runcmd: + - chown almalinux:almalinux /home/almalinux diff --git a/vm_host/make_cephxx.sh b/vm_host/make_cephxx.sh new file mode 100644 index 0000000..50c285c --- /dev/null +++ b/vm_host/make_cephxx.sh @@ -0,0 +1,56 @@ +#!/usr/bin/bash +# VM Creator for a Ceph OSD host +# 2024-07-19, Tim Holloway +# +# Customize variables (and script!) as needed: +vm_host='cephxx' +vm_domain='ceph.mousetech.com' +vm_name="01-$vm_host" +vm_memory='4096' +vm_cpus='2' +lvm_data='/dev/vg_data/lv_cephxx' +vm_disk="$vm_name-VDA.qcow2" +vm_mac_address="04:00:00:00:01:08" +#RANDOM for host randomly assigned MAC + +ci_user_data='cloud-init.data' +base_disk='/var/lib/libvirt/images/AlmaLinux-9-GenericCloud-9.4-20240507.x86_64.qcow2' +ci_meta_data=$(mktemp) + +# virt-install won't process the following as "meta-data": +cat >$ci_meta_data <>$ci_meta_data + +echo building host $hostname as $vm_name +echo Ensure $vm_disk exists +if [ ! -f "$vm_disk" ]; then + qemu-img create -b $base_disk -F qcow2 -f qcow2 $vm_disk 17G +fi + +virt-install \ + --connect qemu:///system \ + --name "$vm_name" \ + --memory "$vm_memory" \ + --machine q35 \ + --vcpus "$vm_cpus" \ + --cpu host-passthrough \ + --import \ + --cloud-init user-data="$ci_meta_data" \ + --osinfo name=almalinux9 \ + --disk "$vm_disk" \ + --disk "$lvm_data",bus=scsi \ + --network mac="$vm_mac_address",bridge=br0 \ + --virt-type kvm + +#echo "METADATA FILE: $ci_meta_data" +rm $ci_meta_data