--- /dev/null
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: Josh Durgin <jdurgin@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function test_log_size()
+{
+ PGID=$1
+ EXPECTED=$2
+ ceph tell osd.\* flush_pg_stats
+ sleep 3
+ ceph pg $PGID query | jq .info.stats.log_size
+ ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}"
+}
+
+function do_repro_long_log() {
+ local dir=$1
+ local which=$2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create test 1 1 || true
+ POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool')
+ PGID="${POOL_ID}.0"
+
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1
+ ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1
+ ceph tell osd.\* injectargs -- --osd-pg-log-dups-tracked 10 || return 1
+
+ touch foo
+ for i in $(seq 1 20)
+ do
+ rados -p test put foo foo || return 1
+ done
+
+ test_log_size $PGID 20 || return 1
+
+ rados -p test rm foo || return 1
+
+ # generate error entries
+ for i in $(seq 1 20)
+ do
+ rados -p test rm foo
+ done
+
+ # this demonstrates the problem - it should fail
+ test_log_size $PGID 41 || return 1
+
+ if [ "$which" = "test1" ];
+ then
+ # regular write should trim the log
+ rados -p test put foo foo || return 1
+ test_log_size $PGID 22 || return 1
+ else
+ PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary')
+ kill_daemons $dir TERM osd.$PRIMARY || return 1
+ CEPH_ARGS="--osd-max-pg-log-entries=30 --osd-pg-log-trim-max=5" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1
+ run_osd $dir $PRIMARY || return 1
+ wait_for_clean || return 1
+ test_log_size $PGID 30 || return 1
+ fi
+}
+
+function TEST_repro_long_log1()
+{
+ local dir=$1
+
+ do_repro_long_log $dir test1
+}
+
+function TEST_repro_long_log2()
+{
+ local dir=$1
+
+ do_repro_long_log $dir test2
+}
+
+main repro-long-log "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh"
+# End:
return 0;
}
+int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
+ pg_info_t &info, const spg_t &pgid,
+ epoch_t map_epoch,
+ PastIntervals &past_intervals)
+{
+ ghobject_t oid = pgid.make_pgmeta_oid();
+ struct stat st;
+ auto ch = store->open_collection(coll);
+ int r = store->stat(ch, oid, &st);
+ assert(r == 0);
+ assert(st.st_size == 0);
+
+ cerr << "Log bounds are: " << "(" << info.log_tail << ","
+ << info.last_update << "]" << std::endl;
+
+ uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
+ if (info.last_update.version - info.log_tail.version <= max_entries) {
+ cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
+ return 0;
+ }
+
+ assert(info.last_update.version > max_entries);
+ version_t trim_to = info.last_update.version - max_entries;
+ size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
+ eversion_t new_tail;
+ bool done = false;
+
+ while (!done) {
+ // gather keys so we can delete them in a batch without
+ // affecting the iterator
+ set<string> keys_to_trim;
+ {
+ ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
+ if (!p)
+ break;
+ for (p->seek_to_first(); p->valid(); p->next(false)) {
+ if (p->key()[0] == '_')
+ continue;
+ if (p->key() == "can_rollback_to")
+ continue;
+ if (p->key() == "divergent_priors")
+ continue;
+ if (p->key() == "rollback_info_trimmed_to")
+ continue;
+ if (p->key() == "may_include_deletes_in_missing")
+ continue;
+ if (p->key().substr(0, 7) == string("missing"))
+ continue;
+ if (p->key().substr(0, 4) == string("dup_"))
+ continue;
+
+ bufferlist bl = p->value();
+ bufferlist::iterator bp = bl.begin();
+ pg_log_entry_t e;
+ try {
+ e.decode_with_checksum(bp);
+ } catch (const buffer::error &e) {
+ cerr << "Error reading pg log entry: " << e << std::endl;
+ }
+ if (debug) {
+ cerr << "read entry " << e << std::endl;
+ }
+ if (e.version.version > trim_to) {
+ done = true;
+ break;
+ }
+ keys_to_trim.insert(p->key());
+ new_tail = e.version;
+ if (keys_to_trim.size() >= trim_at_once)
+ break;
+ }
+
+ if (!p->valid())
+ done = true;
+ } // deconstruct ObjectMapIterator
+
+ // delete the keys
+ if (!dry_run && !keys_to_trim.empty()) {
+ cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
+ ObjectStore::Transaction t;
+ t.omap_rmkeys(coll, oid, keys_to_trim);
+ store->queue_transaction(ch, std::move(t));
+ ch->flush();
+ }
+ }
+
+ // update pg info with new tail
+ if (!dry_run && new_tail != eversion_t()) {
+ info.log_tail = new_tail;
+ ObjectStore::Transaction t;
+ int ret = write_info(t, map_epoch, info, past_intervals);
+ if (ret)
+ return ret;
+ store->queue_transaction(ch, std::move(t));
+ ch->flush();
+ }
+
+ // compact the db since we just removed a bunch of data
+ cerr << "Finished trimming, now compacting..." << std::endl;
+ if (!dry_run)
+ store->compact();
+ return 0;
+}
+
const int OMAP_BATCH_SIZE = 25;
void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
{
("journal-path", po::value<string>(&jpath),
"path to journal, use if tool can't find it")
("pgid", po::value<string>(&pgidstr),
- "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, and mandatory for apply-layout-settings if --pool is not specified")
+ "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
("pool", po::value<string>(&pool),
"Pool name, mandatory for apply-layout-settings if --pgid is not specified")
("op", po::value<string>(&op),
"Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
- "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, apply-layout-settings, update-mon-db, dump-import]")
+ "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, apply-layout-settings, update-mon-db, dump-import, trim-pg-log]")
("epoch", po::value<unsigned>(&epoch),
"epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
("file", po::value<string>(&file),
// The ops which require --pgid option are checked here and
// mentioned in the usage for --pgid.
if ((op == "info" || op == "log" || op == "remove" || op == "export"
- || op == "export-remove" || op == "mark-complete") &&
+ || op == "export-remove" || op == "mark-complete"
+ || op == "trim-pg-log") &&
pgidstr.length() == 0) {
cerr << "Must provide pgid" << std::endl;
usage(desc);
// If not an object command nor any of the ops handled below, then output this usage
// before complaining about a bad pgid
- if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete") {
+ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
- "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, dump-import)"
+ "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, dump-import, trim-pg-log)"
<< std::endl;
usage(desc);
ret = 1;
fs->queue_transaction(ch, std::move(*t));
}
cout << "Marking complete succeeded" << std::endl;
+ } else if (op == "trim-pg-log") {
+ ret = do_trim_pg_log(fs, coll, info, pgid,
+ map_epoch, past_intervals);
+ if (ret < 0) {
+ cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ cout << "Finished trimming pg log" << std::endl;
+ goto out;
} else {
assert(!"Should have already checked for valid --op");
}