From a4439f0fccd5d5770d07ebf134980291eeeba4e3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 25 Nov 2012 13:54:08 -0800 Subject: [PATCH] osd: move tmapup into a helper Signed-off-by: Sage Weil --- src/osd/ReplicatedPG.cc | 357 ++++++++++++++++++++-------------------- src/osd/ReplicatedPG.h | 4 + 2 files changed, 184 insertions(+), 177 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 3dd5e9566d626..5529d0a4d5c5d 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1646,6 +1646,185 @@ void ReplicatedPG::remove_watchers_and_notifies() // ======================================================================== // low level osd ops +int ReplicatedPG::do_tmapup(OpContext *ctx, bufferlist::iterator& bp, OSDOp& osd_op) +{ + int result = 0; + if (bp.end()) { + dout(10) << "tmapup is a no-op" << dendl; + } else { + // read the whole object + vector nops(1); + OSDOp& newop = nops[0]; + newop.op.op = CEPH_OSD_OP_READ; + newop.op.extent.offset = 0; + newop.op.extent.length = 0; + do_osd_ops(ctx, nops); + + dout(10) << "tmapup read " << newop.outdata.length() << dendl; + + dout(30) << " starting is \n"; + newop.outdata.hexdump(*_dout); + *_dout << dendl; + + bufferlist::iterator ip = newop.outdata.begin(); + bufferlist obl; + + dout(30) << "the update command is: \n"; + osd_op.indata.hexdump(*_dout); + *_dout << dendl; + + // header + bufferlist header; + __u32 nkeys = 0; + if (newop.outdata.length()) { + ::decode(header, ip); + ::decode(nkeys, ip); + } + dout(10) << "tmapup header " << header.length() << dendl; + + if (!bp.end() && *bp == CEPH_OSD_TMAP_HDR) { + ++bp; + ::decode(header, bp); + dout(10) << "tmapup new header " << header.length() << dendl; + } + + ::encode(header, obl); + + dout(20) << "tmapup initial nkeys " << nkeys << dendl; + + // update keys + bufferlist newkeydata; + string nextkey; + bufferlist nextval; + bool have_next = false; + if (!ip.end()) { + have_next = true; + ::decode(nextkey, ip); + ::decode(nextval, ip); + } + result = 0; + while (!bp.end() && !result) { + __u8 op; + string key; + try { + ::decode(op, bp); + ::decode(key, bp); + } + catch (buffer::error& e) { + return -EINVAL; + } + + dout(10) << "tmapup op " << (int)op << " key " << key << dendl; + + // skip existing intervening keys + bool key_exists = false; + while (have_next && !key_exists) { + dout(20) << " (have_next=" << have_next << " nextkey=" << nextkey << ")" << dendl; + if (nextkey > key) + break; + if (nextkey < key) { + // copy untouched. + ::encode(nextkey, newkeydata); + ::encode(nextval, newkeydata); + dout(20) << " keep " << nextkey << " " << nextval.length() << dendl; + } else { + // don't copy; discard old value. and stop. + dout(20) << " drop " << nextkey << " " << nextval.length() << dendl; + key_exists = true; + nkeys--; + } + if (!ip.end()) { + ::decode(nextkey, ip); + ::decode(nextval, ip); + } else { + have_next = false; + } + } + + if (op == CEPH_OSD_TMAP_SET) { + bufferlist val; + try { + ::decode(val, bp); + } + catch (buffer::error& e) { + return -EINVAL; + } + ::encode(key, newkeydata); + ::encode(val, newkeydata); + dout(20) << " set " << key << " " << val.length() << dendl; + nkeys++; + } else if (op == CEPH_OSD_TMAP_CREATE) { + if (key_exists) { + return -EEXIST; + break; + } + bufferlist val; + try { + ::decode(val, bp); + } + catch (buffer::error& e) { + return -EINVAL; + } + ::encode(key, newkeydata); + ::encode(val, newkeydata); + dout(20) << " create " << key << " " << val.length() << dendl; + nkeys++; + } else if (op == CEPH_OSD_TMAP_RM) { + if (!key_exists) { + return -ENOENT; + } + // do nothing. + } + } + + // copy remaining + if (have_next) { + ::encode(nextkey, newkeydata); + ::encode(nextval, newkeydata); + dout(20) << " keep " << nextkey << " " << nextval.length() << dendl; + } + if (!ip.end()) { + bufferlist rest; + rest.substr_of(newop.outdata, ip.get_off(), newop.outdata.length() - ip.get_off()); + dout(20) << " keep trailing " << rest.length() + << " at " << newkeydata.length() << dendl; + newkeydata.claim_append(rest); + } + + // encode final key count + key data + dout(20) << "tmapup final nkeys " << nkeys << dendl; + ::encode(nkeys, obl); + obl.claim_append(newkeydata); + + if (0) { + dout(30) << " final is \n"; + obl.hexdump(*_dout); + *_dout << dendl; + + // sanity check + bufferlist::iterator tp = obl.begin(); + bufferlist h; + ::decode(h, tp); + map d; + ::decode(d, tp); + assert(tp.end()); + dout(0) << " **** debug sanity check, looks ok ****" << dendl; + } + + // write it out + if (!result) { + dout(20) << "tmapput write " << obl.length() << dendl; + newop.op.op = CEPH_OSD_OP_WRITEFULL; + newop.op.extent.offset = 0; + newop.op.extent.length = obl.length(); + newop.indata = obl; + do_osd_ops(ctx, nops); + osd_op.outdata.claim(newop.outdata); + } + } + return result; +} + int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) { int result = 0; @@ -2405,183 +2584,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) break; case CEPH_OSD_OP_TMAPUP: - if (bp.end()) { - dout(10) << "tmapup is a no-op" << dendl; - } else { - // read the whole object - vector nops(1); - OSDOp& newop = nops[0]; - newop.op.op = CEPH_OSD_OP_READ; - newop.op.extent.offset = 0; - newop.op.extent.length = 0; - do_osd_ops(ctx, nops); - - dout(10) << "tmapup read " << newop.outdata.length() << dendl; - - dout(30) << " starting is \n"; - newop.outdata.hexdump(*_dout); - *_dout << dendl; - - bufferlist::iterator ip = newop.outdata.begin(); - bufferlist obl; - - dout(30) << "the update command is: \n"; - osd_op.indata.hexdump(*_dout); - *_dout << dendl; - - // header - bufferlist header; - __u32 nkeys = 0; - if (newop.outdata.length()) { - ::decode(header, ip); - ::decode(nkeys, ip); - } - dout(10) << "tmapup header " << header.length() << dendl; - - if (!bp.end() && *bp == CEPH_OSD_TMAP_HDR) { - ++bp; - ::decode(header, bp); - dout(10) << "tmapup new header " << header.length() << dendl; - } - - ::encode(header, obl); - - dout(20) << "tmapup initial nkeys " << nkeys << dendl; - - // update keys - bufferlist newkeydata; - string nextkey; - bufferlist nextval; - bool have_next = false; - if (!ip.end()) { - have_next = true; - ::decode(nextkey, ip); - ::decode(nextval, ip); - } - result = 0; - while (!bp.end() && !result) { - __u8 op; - string key; - try { - ::decode(op, bp); - ::decode(key, bp); - } - catch (buffer::error& e) { - result = -EINVAL; - goto fail; - } - - dout(10) << "tmapup op " << (int)op << " key " << key << dendl; - - // skip existing intervening keys - bool key_exists = false; - while (have_next && !key_exists) { - dout(20) << " (have_next=" << have_next << " nextkey=" << nextkey << ")" << dendl; - if (nextkey > key) - break; - if (nextkey < key) { - // copy untouched. - ::encode(nextkey, newkeydata); - ::encode(nextval, newkeydata); - dout(20) << " keep " << nextkey << " " << nextval.length() << dendl; - } else { - // don't copy; discard old value. and stop. - dout(20) << " drop " << nextkey << " " << nextval.length() << dendl; - key_exists = true; - nkeys--; - } - if (!ip.end()) { - ::decode(nextkey, ip); - ::decode(nextval, ip); - } else { - have_next = false; - } - } - - if (op == CEPH_OSD_TMAP_SET) { - bufferlist val; - try { - ::decode(val, bp); - } - catch (buffer::error& e) { - result = -EINVAL; - goto fail; - } - ::encode(key, newkeydata); - ::encode(val, newkeydata); - dout(20) << " set " << key << " " << val.length() << dendl; - nkeys++; - } else if (op == CEPH_OSD_TMAP_CREATE) { - if (key_exists) { - result = -EEXIST; - break; - } - bufferlist val; - try { - ::decode(val, bp); - } - catch (buffer::error& e) { - result = -EINVAL; - goto fail; - } - ::encode(key, newkeydata); - ::encode(val, newkeydata); - dout(20) << " create " << key << " " << val.length() << dendl; - nkeys++; - } else if (op == CEPH_OSD_TMAP_RM) { - if (!key_exists) { - result = -ENOENT; - break; - } - // do nothing. - } - } - - // copy remaining - if (have_next) { - ::encode(nextkey, newkeydata); - ::encode(nextval, newkeydata); - dout(20) << " keep " << nextkey << " " << nextval.length() << dendl; - } - if (!ip.end()) { - bufferlist rest; - rest.substr_of(newop.outdata, ip.get_off(), newop.outdata.length() - ip.get_off()); - dout(20) << " keep trailing " << rest.length() - << " at " << newkeydata.length() << dendl; - newkeydata.claim_append(rest); - } - - // encode final key count + key data - dout(20) << "tmapup final nkeys " << nkeys << dendl; - ::encode(nkeys, obl); - obl.claim_append(newkeydata); - - if (0) { - dout(30) << " final is \n"; - obl.hexdump(*_dout); - *_dout << dendl; - - // sanity check - bufferlist::iterator tp = obl.begin(); - bufferlist h; - ::decode(h, tp); - map d; - ::decode(d, tp); - assert(tp.end()); - dout(0) << " **** debug sanity check, looks ok ****" << dendl; - } - - // write it out - if (!result) { - dout(20) << "tmapput write " << obl.length() << dendl; - newop.op.op = CEPH_OSD_OP_WRITEFULL; - newop.op.extent.offset = 0; - newop.op.extent.length = obl.length(); - newop.indata = obl; - do_osd_ops(ctx, nops); - osd_op.outdata.claim(newop.outdata); - } - } + result = do_tmapup(ctx, bp, osd_op); break; // OMAP Read ops diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 3cf3d97289c80..204a4623f147f 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -960,6 +960,10 @@ public: RepGather *trim_object(const hobject_t &coid, const snapid_t &sn); void snap_trimmer(); int do_osd_ops(OpContext *ctx, vector& ops); + + int do_tmapup(OpContext *ctx, bufferlist::iterator& bp, OSDOp& osd_op); + int do_tmapup_slow(OpContext *ctx, bufferlist::iterator& bp, OSDOp& osd_op); + void do_osd_op_effects(OpContext *ctx); private: bool temp_created; -- 2.47.3