--- /dev/null
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10"
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+function TEST_import_after_merge_and_gap() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 2 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 1
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ #
+ kill_daemons $dir TERM osd.0
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ # this will import both halves the original pg
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+
+ # make a map gap
+ for f in `seq 1 50` ; do
+ ceph osd set nodown
+ ceph osd unset nodown
+ done
+ wait_for_clean || return 1
+ ceph osd down 0
+ sleep 3
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0
+
+ # this should fail.. 1.1 still doesn't exist
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+
+ # this should not
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+function TEST_import_after_split() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 1 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 2
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+
+ # this should fail because 1.1 (split child) is there
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+ # now it will work (1.1. is gone)
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+
+main pg-split-merge "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh"
+# End:
logging.debug(cmd)
call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
- # On import can't specify a different shard
- BADPG = ONEECPG.split('s')[0] + "s10"
- cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=ONEECOSD, pg=BADPG, file=OTHERFILE)
- ERRORS += test_failure(cmd, "Can't specify a different shard, must be")
-
os.unlink(OTHERFILE)
# Prep a valid export file for import failure tests
logging.debug(cmd)
call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
- # On import can't specify a PG with a non-existent pool
- cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=ONEOSD, pg="10.0", file=OTHERFILE)
- ERRORS += test_failure(cmd, "Can't specify a different pgid pool, must be")
-
- # On import can't specify shard for a replicated export
- cmd = (CFSD_PREFIX + "--op import --pgid {pg}s0 --file {file}").format(osd=ONEOSD, pg=ONEPG, file=OTHERFILE)
- ERRORS += test_failure(cmd, "Can't specify a sharded pgid with a non-sharded export")
-
- # On import can't specify a PG with a bad seed
+ # On import can't specify a different pgid than the file
TMPPG="{pool}.80".format(pool=REPID)
- cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE)
- ERRORS += test_failure(cmd, "PG {pg} no longer exists".format(pg=TMPPG))
+ cmd = (CFSD_PREFIX + "--op import --pgid 12.dd --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE)
+ ERRORS += test_failure(cmd, "specified pgid 12.dd does not match actual pgid")
os.unlink(OTHERFILE)
cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE)
kill_daemons()
# Now 2 PGs, poolid.0 and poolid.1
+ # make note of pgs before we remove the pgs...
+ osds = get_osds("{pool}.0".format(pool=SPLITID), OSDDIR);
for seed in range(2):
pg = "{pool}.{seed}".format(pool=SPLITID, seed=seed)
- which = 0
- for osd in get_osds(pg, OSDDIR):
+ for osd in osds:
cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd)
logging.debug(cmd)
ret = call(cmd, shell=True, stdout=nullfd)
- # This is weird. The export files are based on only the EXPORT_PG
- # and where that pg was before the split. Use 'which' to use all
- # export copies in import.
- mydir = os.path.join(TESTDIR, export_osds[which])
- fname = os.path.join(mydir, EXPORT_PG)
- which += 1
- cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
- logging.debug(cmd)
- ret = call(cmd, shell=True, stdout=nullfd)
- if ret != 0:
- logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
- IMP_ERRORS += 1
+ which = 0
+ for osd in osds:
+ # This is weird. The export files are based on only the EXPORT_PG
+ # and where that pg was before the split. Use 'which' to use all
+ # export copies in import.
+ mydir = os.path.join(TESTDIR, export_osds[which])
+ fname = os.path.join(mydir, EXPORT_PG)
+ which += 1
+ cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=EXPORT_PG, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+ IMP_ERRORS += 1
ERRORS += IMP_ERRORS
OSDriver& driver,
SnapMapper& mapper,
coll_t coll,
- bufferlist &bl, OSDMap &curmap,
+ bufferlist &bl, OSDMap &origmap,
bool *skipped_objects)
{
ObjectStore::Transaction tran;
if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
object_t oid = ob.hoid.hobj.oid;
object_locator_t loc(ob.hoid.hobj);
- pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
- pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
+ pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
+ pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
spg_t coll_pgid;
if (coll.is_pg(&coll_pgid) == false) {
}
int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
- const OSDSuperblock& sb, OSDMap& curmap, spg_t pgid)
+ const OSDSuperblock& sb, spg_t pgid)
{
auto ebliter = bl.cbegin();
ms.decode(ebliter);
cout << std::endl;
cout << "osd current epoch " << sb.current_epoch << std::endl;
- formatter->open_object_section("current OSDMap");
- curmap.dump(formatter);
- formatter->close_section();
- formatter->flush(cout);
- cout << std::endl;
formatter->open_object_section("info");
ms.info.dump(formatter);
return -EINVAL;
}
- // Pool verified to exist for call to get_pg_num().
- unsigned new_pg_num = curmap.get_pg_num(pgid.pgid.pool());
-
- if (pgid.pgid.ps() >= new_pg_num) {
- cerr << "Illegal pgid, the seed is larger than current pg_num" << std::endl;
- return -EINVAL;
- }
-
// Old exports didn't include OSDMap
if (ms.osdmap.get_epoch() == 0) {
cerr << "WARNING: No OSDMap in old export, this is an ancient export."
" Not supported." << std::endl;
return -EINVAL;
}
+
if (ms.osdmap.get_epoch() < sb.oldest_map) {
cerr << "PG export's map " << ms.osdmap.get_epoch()
<< " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
return -EINVAL;
}
}
- unsigned old_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
-
- if (debug) {
- cerr << "old_pg_num " << old_pg_num << std::endl;
- cerr << "new_pg_num " << new_pg_num << std::endl;
- cerr << ms.osdmap << std::endl;
- cerr << curmap << std::endl;
- }
-
- // If we have managed to have a good OSDMap we can do these checks
- if (old_pg_num) {
- if (old_pgid.pgid.ps() >= old_pg_num) {
- cerr << "FATAL: pgid invalid for original map epoch" << std::endl;
- return -EFAULT;
- }
- if (pgid.pgid.ps() >= old_pg_num) {
- cout << "NOTICE: Post split pgid specified" << std::endl;
- } else {
- spg_t parent(pgid);
- if (parent.is_split(old_pg_num, new_pg_num, NULL)) {
- cerr << "WARNING: Split occurred, some objects may be ignored" << std::endl;
- }
- }
- }
-
if (debug) {
cerr << "Import pgid " << ms.info.pgid << std::endl;
cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
- cerr << "history.same_interval_since " << ms.info.history.same_interval_since << std::endl;
- }
-
- if (debug)
- cerr << "Changing pg epoch " << ms.map_epoch << " to " << sb.current_epoch << std::endl;
-
- // advance map and fill in PastIntervals
- if (ms.map_epoch < sb.current_epoch) {
- if (debug)
- cerr << "Advancing PG from " << ms.map_epoch << " to " << sb.current_epoch
- << std::endl;
-
- OSDMap *startmap = new OSDMap;
- startmap->deepish_copy_from(ms.osdmap);
- OSDMapRef lastmap(startmap);
-
- int up_primary, acting_primary;
- vector<int> up, acting;
- lastmap->pg_to_up_acting_osds(
- ms.info.pgid.pgid, &up, &up_primary, &acting, &acting_primary);
- cerr << "initial e" << lastmap->get_epoch()
- << " up " << up << "/" << up_primary
- << " acting " << acting << "/" << acting_primary
- << std::endl;
- while (ms.map_epoch < sb.current_epoch) {
- ++ms.map_epoch;
- if (ms.map_epoch < sb.oldest_map) {
- // cheat!
- ms.map_epoch = sb.oldest_map;
- }
-
- OSDMap *t = new OSDMap;
- bufferlist nextmap_bl;
- int ret = get_osdmap(store, ms.map_epoch, *t, nextmap_bl);
- OSDMapRef nextmap(t);
- if (ret < 0) {
- cerr << "cannot load osdmap " << ms.map_epoch << std::endl;
- return -EINVAL;
- }
-
- int new_up_primary, new_acting_primary;
- vector<int> new_up, new_acting;
- nextmap->pg_to_up_acting_osds(
- ms.info.pgid.pgid, &new_up, &new_up_primary, &new_acting,
- &new_acting_primary);
-
- cerr << "e" << nextmap->get_epoch()
- << " up " << up << "/" << up_primary
- << " acting " << acting << "/" << acting_primary
- << std::endl;
-
- // this is a bit imprecise, but sufficient?
- struct min_size_predicate_t : public IsPGRecoverablePredicate {
- const pg_pool_t *pi;
- bool operator()(const set<pg_shard_t> &have) const {
- return have.size() >= pi->min_size;
- }
- min_size_predicate_t(const pg_pool_t *i) : pi(i) {}
- } min_size_predicate(nextmap->get_pg_pool(ms.info.pgid.pgid.pool()));
-
-
- bool new_interval = PastIntervals::check_new_interval(
- acting_primary,
- new_acting_primary,
- acting, new_acting,
- up_primary,
- new_up_primary,
- up, new_up,
- ms.info.history.same_interval_since,
- ms.info.history.last_epoch_clean,
- nextmap,
- lastmap,
- ms.info.pgid.pgid.get_ancestor(
- lastmap->get_pg_num(ms.info.pgid.pgid.pool())),
- &min_size_predicate,
- &ms.past_intervals,
- &cerr);
- if (new_interval) {
- ms.info.history.same_interval_since = nextmap->get_epoch();
- if (up != new_up) {
- ms.info.history.same_up_since = nextmap->get_epoch();
- }
- if (acting_primary != new_acting_primary) {
- ms.info.history.same_primary_since = nextmap->get_epoch();
- }
- unsigned old_pg_num = lastmap->get_pg_num(ms.info.pgid.pgid.pool());
- unsigned new_pg_num = nextmap->get_pg_num(ms.info.pgid.pgid.pool());
- if (pgid.pgid.m_seed >= old_pg_num) {
- if (pgid.pgid.m_seed < new_pg_num) {
- ms.info.history.epoch_created = nextmap->get_epoch();
- ms.info.history.last_epoch_split = nextmap->get_epoch();
- }
- // we don't actually care about the ancestor splits because only
- // the latest split is recorded.
- } else {
- if (pgid.pgid.is_split(old_pg_num, new_pg_num, nullptr)) {
- ms.info.history.last_epoch_split = nextmap->get_epoch();
- }
- }
- up = new_up;
- acting = new_acting;
- up_primary = new_up_primary;
- acting_primary = new_acting_primary;
- }
- lastmap = nextmap;
- }
- if (debug)
- cerr << "new PastIntervals " << ms.past_intervals << std::endl;
+ cerr << "history.same_interval_since "
+ << ms.info.history.same_interval_since << std::endl;
}
return 0;
pg_begin pgb;
pgb.decode(ebliter);
spg_t pgid = pgb.pgid;
- spg_t orig_pgid = pgid;
if (pgidstr.length()) {
spg_t user_pgid;
// This succeeded in main() already
ceph_assert(ok);
if (pgid != user_pgid) {
- if (pgid.pool() != user_pgid.pool()) {
- cerr << "Can't specify a different pgid pool, must be " << pgid.pool() << std::endl;
- return -EINVAL;
- }
- if (pgid.is_no_shard() && !user_pgid.is_no_shard()) {
- cerr << "Can't specify a sharded pgid with a non-sharded export" << std::endl;
- return -EINVAL;
- }
- // Get shard from export information if not specified
- if (!pgid.is_no_shard() && user_pgid.is_no_shard()) {
- user_pgid.shard = pgid.shard;
- }
- if (pgid.shard != user_pgid.shard) {
- cerr << "Can't specify a different shard, must be " << pgid.shard << std::endl;
- return -EINVAL;
- }
- pgid = user_pgid;
+ cerr << "specified pgid " << user_pgid
+ << " does not match actual pgid " << pgid << std::endl;
+ return -EINVAL;
}
}
cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
}
- // Special case: Old export has SHARDS incompat feature on replicated pg, remove it
+ // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
if (pgid.is_no_shard())
pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
return 11; // Positive return means exit status
}
- // Don't import if pool no longer exists
+ // we need the latest OSDMap to check for collisions
OSDMap curmap;
bufferlist bl;
ret = get_osdmap(store, sb.current_epoch, curmap, bl);
if (ret) {
- cerr << "Can't find local OSDMap" << std::endl;
+ cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
return ret;
}
- if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
- cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
- // Special exit code for this error, used by test code
- return 10; // Positive return means exit status
- }
-
- const pg_pool_t *pi = curmap.get_pg_pool(pgid.pgid.m_pool);
- if (pi->get_pg_num() <= pgid.pgid.m_seed) {
- cerr << "PG " << pgid.pgid << " no longer exists" << std::endl;
- // Special exit code for this error, used by test code
- return 12; // Positive return means exit status
- }
pool_pg_num_history_t pg_num_history;
get_pg_num_history(store, &pg_num_history);
ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
- //Check for PG already present.
+ // Check for PG already present.
coll_t coll(pgid);
if (store->collection_exists(coll)) {
cerr << "pgid " << pgid << " already exists" << std::endl;
SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
cout << "Importing pgid " << pgid;
- if (orig_pgid != pgid) {
- cout << " exported as " << orig_pgid;
- }
cout << std::endl;
bool done = false;
switch(type) {
case TYPE_OBJECT_BEGIN:
ceph_assert(found_metadata);
- ret = get_object(store, driver, mapper, coll, ebl, curmap, &skipped_objects);
+ ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
+ &skipped_objects);
if (ret) return ret;
break;
case TYPE_PG_METADATA:
- ret = get_pg_metadata(store, ebl, ms, sb, curmap, pgid);
+ ret = get_pg_metadata(store, ebl, ms, sb, pgid);
if (ret) return ret;
found_metadata = true;
- // make sure there are no conflicting merges
- {
+ if (pgid != ms.info.pgid) {
+ cerr << "specified pgid " << pgid << " does not match import file pgid "
+ << ms.info.pgid << std::endl;
+ return -EINVAL;
+ }
+
+ // make sure there are no conflicting splits or merges
+ if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
- if (p != pg_num_history.pg_nums.end()) {
- unsigned pg_num = ms.osdmap.get_pg_num(pgid.pgid.m_pool);
+ if (p != pg_num_history.pg_nums.end() &&
+ !p->second.empty()) {
+ unsigned pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
for (auto q = p->second.lower_bound(ms.map_epoch);
q != p->second.end();
++q) {
- pg_t parent;
- if (pgid.pgid.is_merge_source(pg_num, q->second, &parent)) {
- cerr << "PG " << pgid.pgid << " merge source in epoch "
- << q->first << " pg_num " << pg_num
- << " -> " << q->second << std::endl;
- return 12;
+ unsigned new_pg_num = q->second;
+ cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
+ << " -> " << new_pg_num << std::endl;
+
+ // check for merge target
+ spg_t target;
+ if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
+ // FIXME: this checks assumes the OSD's PG is at the OSD's
+ // map epoch; it could be, say, at *our* epoch, pre-merge.
+ coll_t coll(target);
+ if (store->collection_exists(coll)) {
+ cerr << "pgid " << pgid << " merges to target " << target
+ << " which already exists" << std::endl;
+ return 12;
+ }
}
- if (pgid.pgid.is_merge_target(pg_num, q->second)) {
- cerr << "PG " << pgid.pgid << " merge target in epoch "
- << q->first << " pg_num " << pg_num
- << " -> " << q->second << std::endl;
- return 12;
+
+ // check for split children
+ set<spg_t> children;
+ if (pgid.is_split(pg_num, new_pg_num, &children)) {
+ for (auto child : children) {
+ coll_t coll(child);
+ if (store->collection_exists(coll)) {
+ cerr << "pgid " << pgid << " splits to " << children
+ << " and " << child << " exists" << std::endl;
+ return 12;
+ }
+ }
}
- pg_num = q->second;
+ pg_num = new_pg_num;
}
}
+ } else {
+ cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
+ << " for splits or mergers" << std::endl;
}
if (!dry_run) {
ObjectStore::Transaction t;
ch = store->create_new_collection(coll);
- PG::_create(t, pgid,
- pgid.get_split_bits(curmap.get_pg_pool(pgid.pool())->get_pg_num()));
+ PG::_create(
+ t, pgid,
+ pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
PG::_init(t, pgid, NULL);
// mark this coll for removal until we're done
ObjectStore::Transaction t;
if (!dry_run) {
pg_log_t newlog, reject;
- pg_log_t::filter_log(pgid, curmap, g_ceph_context->_conf->osd_hit_set_namespace,
+ pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
ms.log, newlog, reject);
if (debug) {
for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
}
divergent_priors_t newdp, rejectdp;
- filter_divergent_priors(pgid, curmap, g_ceph_context->_conf->osd_hit_set_namespace,
+ filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
ms.divergent_priors, newdp, rejectdp);
ms.divergent_priors = newdp;
if (debug) {
ceph_assert(!obj.is_temp());
object_t oid = obj.oid;
object_locator_t loc(obj);
- pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
- pg_t _pgid = curmap.raw_pg_to_pg(raw_pgid);
+ pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
+ pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
return pgid.pgid != _pgid;
});