From 7a1c7d3ed705637aa8d1aaab3e04c547d1269106 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Jun 2020 14:12:01 +0200 Subject: [PATCH] rbd: recognize crush_location and read_from_replica map options Signed-off-by: Ilya Dryomov (cherry picked from commit e3874a25f0e828e2a6bd8735dbeb14197008896c) --- doc/man/8/rbd.rst | 41 ++++++++++++++++++++++++++++++++++ src/tools/rbd/action/Kernel.cc | 22 ++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/doc/man/8/rbd.rst b/doc/man/8/rbd.rst index b5a373584a213..ffc651546e944 100644 --- a/doc/man/8/rbd.rst +++ b/doc/man/8/rbd.rst @@ -792,6 +792,47 @@ Per mapping (block device) `rbd device map` options: solid-state drives). For filestore with filestore_punch_hole = false, the recommended setting is image object size (typically 4M). +* crush_location=x - Specify the location of the client in terms of CRUSH + hierarchy (since 5.8). This is a set of key-value pairs separated from + each other by '|', with keys separated from values by ':'. Note that '|' + may need to be quoted or escaped to avoid it being interpreted as a pipe + by the shell. The key is the bucket type name (e.g. rack, datacenter or + region with default bucket types) and the value is the bucket name. For + example, to indicate that the client is local to rack "myrack", data center + "mydc" and region "myregion":: + + crush_location=rack:myrack|datacenter:mydc|region:myregion + + Each key-value pair stands on its own: "myrack" doesn't need to reside in + "mydc", which in turn doesn't need to reside in "myregion". The location + is not a path to the root of the hierarchy but rather a set of nodes that + are matched independently, owning to the fact that bucket names are unique + within a CRUSH map. "Multipath" locations are supported, so it is possible + to indicate locality for multiple parallel hierarchies:: + + crush_location=rack:myrack1|rack:myrack2|datacenter:mydc + +* read_from_replica=no - Disable replica reads, always pick the primary OSD + (since 5.8, default). + +* read_from_replica=balance - When issued a read on a replicated pool, pick + a random OSD for serving it (since 5.8). + + This mode is safe for general use only since Octopus (i.e. after "ceph osd + require-osd-release octopus"). Otherwise it should be limited to read-only + workloads such as images mapped read-only everywhere or snapshots. + +* read_from_replica=localize - When issued a read on a replicated pool, pick + the most local OSD for serving it (since 5.8). The locality metric is + calculated against the location of the client given with crush_location; + a match with the lowest-valued bucket type wins. For example, with default + bucket types, an OSD in a matching rack is closer than an OSD in a matching + data center, which in turn is closer than an OSD in a matching region. + + This mode is safe for general use only since Octopus (i.e. after "ceph osd + require-osd-release octopus"). Otherwise it should be limited to read-only + workloads such as images mapped read-only everywhere or snapshots. + `rbd device unmap` options: * force - Force the unmapping of a block device that is open (since 4.9). The diff --git a/src/tools/rbd/action/Kernel.cc b/src/tools/rbd/action/Kernel.cc index 3ebb5bc1de40b..6013067ac9af2 100644 --- a/src/tools/rbd/action/Kernel.cc +++ b/src/tools/rbd/action/Kernel.cc @@ -64,6 +64,20 @@ static std::string map_option_int_cb(const char *value_char) return stringify(d); } +static std::string map_option_string_cb(const char *value_char) +{ + return value_char; +} + +static std::string map_option_read_from_replica_cb(const char *value_char) +{ + if (!strcmp(value_char, "no") || !strcmp(value_char, "balance") || + !strcmp(value_char, "localize")) { + return value_char; + } + return ""; +} + static void put_map_option(const std::string &key, const std::string &val) { map_options[key] = val; @@ -153,6 +167,14 @@ static int parse_map_options(const std::string &options_string) } else if (!strcmp(this_char, "alloc_size")) { if (put_map_option_value("alloc_size", value_char, map_option_int_cb)) return -EINVAL; + } else if (!strcmp(this_char, "crush_location")) { + if (put_map_option_value("crush_location", value_char, + map_option_string_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "read_from_replica")) { + if (put_map_option_value("read_from_replica", value_char, + map_option_read_from_replica_cb)) + return -EINVAL; } else { std::cerr << "rbd: unknown map option '" << this_char << "'" << std::endl; return -EINVAL; -- 2.39.5