From 27027a17d395e39e9c2ae1c44e439ffef7089f6b Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Mon, 10 Sep 2018 17:23:20 +0000 Subject: [PATCH] osd: add osd memory target option BlueStore's cache is sized conservatively by default, so that it does not overwhelm under-provisioned servers. The default is 1G for HDD, and 3G for SSD. To replace the page cache, as much memory as possible should be given to BlueStore. This is required for good performance. Since ceph-ansible knows how much memory a host has, it can set `bluestore cache size = max(total host memory / num OSDs on this host * safety factor, 1G)` Due to fragmentation and other memory use not included in bluestore's cache, a safety factor of 0.5 for dedicated nodes and 0.2 for hyperconverged nodes is recommended. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1595003 Signed-off-by: Neha Ojha Co-Authored-by: Guillaume Abrioux --- group_vars/all.yml.sample | 4 +++ group_vars/rhcs.yml.sample | 4 +++ roles/ceph-config/tasks/main.yml | 31 ++++++++++++++++++++++++ roles/ceph-config/templates/ceph.conf.j2 | 14 +++++++++++ roles/ceph-defaults/defaults/main.yml | 4 +++ roles/ceph-defaults/tasks/facts.yml | 2 +- 6 files changed, 58 insertions(+), 1 deletion(-) diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index ea8aab475..1eaade9b9 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -360,6 +360,10 @@ dummy: ## OSD options # +#is_hci: false +#hci_safety_factor: 0.2 +#non_hci_safety_factor: 0.7 +#osd_memory_target: 4000000000 #journal_size: 5120 # OSD journal size in MB #public_network: 0.0.0.0/0 #cluster_network: "{{ public_network | regex_replace(' ', '') }}" diff --git a/group_vars/rhcs.yml.sample b/group_vars/rhcs.yml.sample index 8e418b269..d7696123c 100644 --- a/group_vars/rhcs.yml.sample +++ b/group_vars/rhcs.yml.sample @@ -360,6 +360,10 @@ ceph_rhcs_version: 3 ## OSD options # +#is_hci: false +#hci_safety_factor: 0.2 +#non_hci_safety_factor: 0.7 +#osd_memory_target: 4000000000 #journal_size: 5120 # OSD journal size in MB #public_network: 0.0.0.0/0 #cluster_network: "{{ public_network | regex_replace(' ', '') }}" diff --git a/roles/ceph-config/tasks/main.yml b/roles/ceph-config/tasks/main.yml index 429a11bbe..a6239ec75 100644 --- a/roles/ceph-config/tasks/main.yml +++ b/roles/ceph-config/tasks/main.yml @@ -9,6 +9,37 @@ group: "ceph" mode: "0755" + - block: + - name: count number of osds for non-lvm scenario + set_fact: + num_osds: "{{ devices | length | int }}" + when: + - devices | length > 0 + - (osd_scenario == 'collocated' or osd_scenario == 'non-collocated') + + - name: count number of osds for lvm scenario + set_fact: + num_osds: "{{ lvm_volumes | length | int }}" + when: + - lvm_volumes | length > 0 + - osd_scenario == 'lvm' + + - name: get number of osds for lvm-batch scenario + command: "ceph-volume lvm batch --report --format=json --osds-per-device osds_per_device {{ devices | join(' ') }}" + register: lvm_batch_devices + when: + - devices | length > 0 + - osd_scenario == 'lvm' + + - name: set_fact num_osds + set_fact: + num_osds: "{{ (lvm_batch_devices.stdout | from_json).osds | length | int }}" + when: + - devices | length > 0 + - osd_scenario == 'lvm' + when: + - inventory_hostname in groups.get(osd_group_name, []) + - name: "generate ceph configuration file: {{ cluster }}.conf" action: config_template args: diff --git a/roles/ceph-config/templates/ceph.conf.j2 b/roles/ceph-config/templates/ceph.conf.j2 index cee74ee15..75233bbb2 100644 --- a/roles/ceph-config/templates/ceph.conf.j2 +++ b/roles/ceph-config/templates/ceph.conf.j2 @@ -151,6 +151,20 @@ filestore xattr use omap = true {# else, default is false #} {% endif %} {% endif %} +{% if osd_objectstore == 'bluestore' %} +{% set _num_osds = num_osds | default(0) | int %} +[osd] +{% if is_hci and _num_osds > 0 %} +{% if ansible_memtotal_mb * hci_safety_factor / _num_osds > osd_memory_target %} # hci_safety_factor is the safety factor for HCI deployments +{% set _osd_memory_target = (ansible_memtotal_mb * hci_safety_factor / _num_osds) %} +{% endif %} +{% elif _num_osds > 0 %} +{% if ansible_memtotal_mb * non_hci_safety_factor / _num_osds > osd_memory_target %} # non_hci_safety_factor is the safety factor for dedicated nodes +{% set _osd_memory_target = (ansible_memtotal_mb * non_hci_safety_factor / _num_osds) %} +{% endif %} +{% endif %} +osd memory target = {{ _osd_memory_target | default(osd_memory_target) }} +{% endif %} {% endif %} {% if inventory_hostname in groups.get(rgw_group_name, []) %} diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml index a87fcee42..a935b7d52 100644 --- a/roles/ceph-defaults/defaults/main.yml +++ b/roles/ceph-defaults/defaults/main.yml @@ -352,6 +352,10 @@ cephfs_pools: ## OSD options # +is_hci: false +hci_safety_factor: 0.2 +non_hci_safety_factor: 0.7 +osd_memory_target: 4000000000 journal_size: 5120 # OSD journal size in MB public_network: 0.0.0.0/0 cluster_network: "{{ public_network | regex_replace(' ', '') }}" diff --git a/roles/ceph-defaults/tasks/facts.yml b/roles/ceph-defaults/tasks/facts.yml index 745332a53..49f49338d 100644 --- a/roles/ceph-defaults/tasks/facts.yml +++ b/roles/ceph-defaults/tasks/facts.yml @@ -238,4 +238,4 @@ - inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(nfs_group_name, []) - ceph_current_status['servicemap'] is defined - ceph_current_status['servicemap']['services'] is defined - - ceph_current_status['servicemap']['services']['rgw'] is defined # that's the way to cover ceph_release_num[ceph_release] >= ceph_release_num['luminous'] \ No newline at end of file + - ceph_current_status['servicemap']['services']['rgw'] is defined # that's the way to cover ceph_release_num[ceph_release] >= ceph_release_num['luminous'] -- 2.39.5