OPTION(osd_min_pg_log_entries, OPT_U32) // number of entries to keep in the pg log when trimming it
OPTION(osd_max_pg_log_entries, OPT_U32) // max entries, say when degraded, before we trim
+OPTION(osd_pg_log_dups_tracked, OPT_U32) // how many versions back to track combined in both pglog's regular + dup logs
OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT) // max entries factor before force recovery
OPTION(osd_pg_log_trim_min, OPT_U32)
OPTION(osd_op_complaint_time, OPT_FLOAT) // how many seconds old makes an op complaint-worthy
.set_description(""),
Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
- .set_default(3000)
- .set_description(""),
+ .set_default(1500)
+ .set_description("minimum number of entries to maintain in the PG log")
+ .add_service("osd")
+ .add_see_also("osd_max_pg_log_entries")
+ .add_see_also("osd_pg_log_dups_tracked"),
Option("osd_max_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(10000)
- .set_description(""),
+ .set_description("maximum number of entries to maintain in the PG log when degraded before we trim")
+ .add_service("osd")
+ .add_see_also("osd_min_pg_log_entries")
+ .add_see_also("osd_pg_log_dups_tracked"),
+
+ Option("osd_pg_log_dups_tracked", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(3000)
+ .set_description("how many versions back to track in order to detect duplicate ops; this is combined with both the regular pg log entries and additional minimal dup detection entries")
+ .add_service("osd")
+ .add_see_also("osd_min_pg_log_entries")
+ .add_see_also("osd_max_pg_log_entries"),
Option("osd_force_recovery_pg_log_entries_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.3)
}
#endif
+
void PGPool::update(OSDMapRef map)
{
const pg_pool_t *pi = map->get_pg_pool(id);
dirty_info(false), dirty_big_info(false),
info(p),
info_struct_v(0),
- coll(p), pg_log(cct),
+ coll(p),
+ pg_log(cct),
pgmeta_oid(p.make_pgmeta_oid()),
missing_loc(this),
past_intervals(
auto last = logv.rbegin();
if (is_primary() && last != logv.rend()) {
projected_log.skip_can_rollback_to_to_head();
- projected_log.trim(cct, last->version, nullptr);
+ projected_log.trim(cct, last->version, nullptr, nullptr, nullptr);
}
if (transaction_applied && roll_forward_to > pg_log.get_can_rollback_to()) {
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
+ * License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
- *
+ *
*/
#include "PGLog.h"
PGLog::IndexedLog *target)
{
unindex();
- *target = pg_log_t::split_out_child(child_pgid, split_bits);
+ *target = IndexedLog(pg_log_t::split_out_child(child_pgid, split_bits));
index();
target->index();
reset_rollback_info_trimmed_to_riter();
CephContext* cct,
eversion_t s,
set<eversion_t> *trimmed,
- set<string> *trimmed_dups)
+ set<string>* trimmed_dups,
+ bool* dirty_dups)
{
if (complete_to != log.end() &&
complete_to->version <= s) {
assert(s <= can_rollback_to);
+ auto earliest_dup_version =
+ log.rbegin()->version.version < cct->_conf->osd_pg_log_dups_tracked
+ ? 0u
+ : log.rbegin()->version.version - cct->_conf->osd_pg_log_dups_tracked;
+
while (!log.empty()) {
- pg_log_entry_t &e = *log.begin();
+ const pg_log_entry_t &e = *log.begin();
if (e.version > s)
break;
generic_dout(20) << "trim " << e << dendl;
unindex(e); // remove from index,
// add to dup list
- if (e.version.version + 1000 > s.version) {
- dirty_dups = true;
+ if (e.version.version >= earliest_dup_version) {
+ if (dirty_dups) *dirty_dups = true;
dups.push_back(pg_log_dup_t(e));
- dup_index[e.reqid] = &(dups.back());
+ index(dups.back());
for (const auto& extra : e.extra_reqids) {
+ // note: extras have the same version as outer op
dups.push_back(pg_log_dup_t(e.version, extra.second,
extra.first, e.return_code));
- dup_index[extra->first] = &(dups.back());
+ index(dups.back());
}
}
}
while (!dups.empty()) {
- auto &e = *dups.begin();
- if (e.version.version + 1000 > s.version)
+ const auto& e = *dups.begin();
+ if (e.version.version >= earliest_dup_version)
break;
generic_dout(20) << "trim dup " << e << dendl;
if (trimmed_dups)
trimmed_dups->insert(e.get_key_name());
- dup_index.erase(e.reqid);
+ if (indexed_data & PGLOG_INDEXED_DUPS) {
+ dup_index.erase(e.reqid);
+ }
dups.pop_front();
}
for (list<pg_log_entry_t>::const_iterator p = log.begin();
p != log.end();
++p) {
- out << *p << " " << (logged_object(p->soid) ? "indexed":"NOT INDEXED") << std::endl;
+ out << *p << " " <<
+ (logged_object(p->soid) ? "indexed" : "NOT INDEXED") <<
+ std::endl;
assert(!p->reqid_is_indexed() || logged_req(p->reqid));
}
+
+ for (list<pg_log_dup_t>::const_iterator p = dups.begin();
+ p != dups.end();
+ ++p) {
+ out << *p << std::endl;
+ }
+
return out;
}
assert(trim_to <= info.last_complete);
dout(10) << "trim " << log << " to " << trim_to << dendl;
- log.trim(cct, trim_to, &trimmed, &trimmed_dups);
+ log.trim(cct, trim_to, &trimmed, &trimmed_dups, &dirty_dups);
info.log_tail = log.tail;
}
}
} else {
oinfo.last_complete = oinfo.last_update;
}
-}
+} // proc_replica_log
/**
* rewind divergent entries at the head of the log
pg_info_t &info, LogEntryHandler *rollbacker,
bool &dirty_info, bool &dirty_big_info)
{
- dout(10) << "rewind_divergent_log truncate divergent future " << newhead << dendl;
+ dout(10) << "rewind_divergent_log truncate divergent future " <<
+ newhead << dendl;
if (info.last_complete > newhead)
// splice into our log.
log.log.splice(log.log.begin(),
olog.log, from, to);
-
+
info.log_tail = log.tail = olog.tail;
changed = true;
}
// extend on head?
if (olog.head > log.head) {
dout(10) << "merge_log extending head to " << olog.head << dendl;
-
+
// find start point in olog
list<pg_log_entry_t>::iterator to = olog.log.end();
list<pg_log_entry_t>::iterator from = olog.log.end();
changed = true;
}
-
- dout(10) << "merge_log result " << log << " " << missing << " changed=" << changed << dendl;
+
+ // now handle dups
+ if (merge_log_dups(olog)) {
+ dirty_dups = true;
+ changed = true;
+ }
+
+ dout(10) << "merge_log result " << log << " " << missing <<
+ " changed=" << changed << dendl;
if (changed) {
dirty_info = true;
}
}
+
+// returns true if any changes were made to log.dups
+bool PGLog::merge_log_dups(const pg_log_t& olog) {
+ bool changed = false;
+
+ if (!olog.dups.empty()) {
+ if (log.dups.empty()) {
+ dout(10) << "merge_log copying olog dups to log " <<
+ olog.dups.front().version << " to " <<
+ olog.dups.back().version << dendl;
+ changed = true;
+ // since our log.dups is empty just copy them
+ for (const auto& i : olog.dups) {
+ log.dups.push_back(i);
+ log.index(log.dups.back());
+ }
+ } else {
+ // since our log.dups is not empty try to extend on each end
+
+ if (olog.dups.back().version > log.dups.back().version) {
+ // extend the dups's tail (i.e., newer dups)
+ dout(10) << "merge_log extending dups tail to " <<
+ olog.dups.back().version << dendl;
+ changed = true;
+
+ auto log_tail_version = log.dups.back().version;
+
+ auto insert_cursor = log.dups.end();
+ for (auto i = olog.dups.crbegin(); i != olog.dups.crend(); ++i) {
+ if (i->version <= log_tail_version) break;
+ log.dups.insert(insert_cursor, *i);
+
+ auto prev = insert_cursor;
+ --prev;
+ // be sure to pass reference of copy in log.dups
+ log.index(*prev);
+
+ --insert_cursor; // make sure we insert in reverse order
+ }
+ }
+
+ if (olog.dups.front().version < log.dups.front().version) {
+ // extend the dups's head (i.e., older dups)
+ dout(10) << "merge_log extending dups head to " <<
+ olog.dups.front().version << dendl;
+ changed = true;
+
+ auto insert_cursor = log.dups.begin();
+ for (auto i = olog.dups.cbegin(); i != olog.dups.cend(); ++i) {
+ if (i->version >= insert_cursor->version) break;
+ log.dups.insert(insert_cursor, *i);
+ auto prev = insert_cursor;
+ --prev;
+ // be sure to pass address of copy in log.dups
+ log.index(*prev);
+ }
+ }
+ }
+ }
+
+ // remove any dup entries that overlap with pglog
+ if (!log.dups.empty() && log.dups.back().version >= log.tail) {
+ dout(10) << "merge_log removed dups overlapping log entries [" <<
+ log.tail << "," << log.dups.back().version << "]" << dendl;
+ changed = true;
+
+ while (!log.dups.empty() && log.dups.back().version >= log.tail) {
+ log.unindex(log.dups.back());
+ log.dups.pop_back();
+ }
+ }
+
+ return changed;
+}
+
void PGLog::check() {
if (!pg_log_debug)
return;
}
}
+// non-static
void PGLog::write_log_and_missing(
ObjectStore::Transaction& t,
map<string,bufferlist> *km,
- const coll_t& coll, const ghobject_t &log_oid,
+ const coll_t& coll,
+ const ghobject_t &log_oid,
bool require_rollback)
{
if (is_dirty()) {
!touched_log,
require_rollback,
clear_divergent_priors,
+ dirty_dups,
&rebuilt_missing_with_deletes,
- (pg_log_debug ? &log_keys_debug : 0));
+ (pg_log_debug ? &log_keys_debug : nullptr));
undirty();
} else {
dout(10) << "log is not dirty" << dendl;
}
}
+// static
void PGLog::write_log_and_missing_wo_missing(
ObjectStore::Transaction& t,
map<string,bufferlist> *km,
pg_log_t &log,
const coll_t& coll, const ghobject_t &log_oid,
map<eversion_t, hobject_t> &divergent_priors,
- bool require_rollback)
+ bool require_rollback,
+ bool dirty_dups)
{
_write_log_and_missing_wo_missing(
t, km, log, coll, log_oid,
divergent_priors, eversion_t::max(), eversion_t(), eversion_t(),
set<eversion_t>(),
- true, true, require_rollback, 0);
+ set<string>(),
+ true, true, require_rollback, dirty_dups, nullptr);
}
+// static
void PGLog::write_log_and_missing(
ObjectStore::Transaction& t,
map<string,bufferlist> *km,
const ghobject_t &log_oid,
const pg_missing_tracker_t &missing,
bool require_rollback,
+ bool dirty_dups,
bool *rebuilt_missing_with_deletes)
{
_write_log_and_missing(
eversion_t(),
eversion_t(),
set<eversion_t>(),
+ set<string>(),
missing,
- true, require_rollback, false, rebuilt_missing_with_deletes, 0);
+ true, require_rollback, false, dirty_dups, rebuilt_missing_with_deletes, nullptr);
}
+// static
void PGLog::_write_log_and_missing_wo_missing(
ObjectStore::Transaction& t,
map<string,bufferlist> *km,
bool dirty_divergent_priors,
bool touch_log,
bool require_rollback,
+ bool dirty_dups,
set<string> *log_keys_debug
)
{
}
}
-//dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl;
+ // dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl;
if (touch_log)
t.touch(coll, log_oid);
if (dirty_to != eversion_t()) {
clear_up_to(log_keys_debug, dirty_to.get_key_name());
}
if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) {
- // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl;
+ // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl;
t.omap_rmkeyrange(
coll, log_oid,
dirty_from.get_key_name(), eversion_t::max().get_key_name());
(*km)[p->get_key_name()].claim(bl);
}
+ if (log_keys_debug) {
+ for (map<string, bufferlist>::iterator i = (*km).begin();
+ i != (*km).end();
+ ++i) {
+ if (i->first[0] == '_')
+ continue;
+ assert(!log_keys_debug->count(i->first));
+ log_keys_debug->insert(i->first);
+ }
+ }
+
+ // process dirty_dups after log_keys_debug is filled, so dups do not
+ // end up in that set
if (dirty_dups) {
pg_log_dup_t min;
t.omap_rmkeyrange(
}
}
- if (log_keys_debug) {
- for (map<string, bufferlist>::iterator i = (*km).begin();
- i != (*km).end();
- ++i) {
- if (i->first[0] == '_')
- continue;
- assert(!log_keys_debug->count(i->first));
- log_keys_debug->insert(i->first);
- }
- }
-
if (dirty_divergent_priors) {
//dout(10) << "write_log_and_missing: writing divergent_priors" << dendl;
::encode(divergent_priors, (*km)["divergent_priors"]);
t.omap_rmkeys(coll, log_oid, to_remove);
}
+// static
void PGLog::_write_log_and_missing(
ObjectStore::Transaction& t,
map<string,bufferlist>* km,
bool touch_log,
bool require_rollback,
bool clear_divergent_priors,
+ bool dirty_dups,
bool *rebuilt_missing_with_deletes, // in/out param
set<string> *log_keys_debug
) {
(*km)[p->get_key_name()].claim(bl);
}
+ if (log_keys_debug) {
+ for (map<string, bufferlist>::iterator i = (*km).begin();
+ i != (*km).end();
+ ++i) {
+ if (i->first[0] == '_')
+ continue;
+ assert(!log_keys_debug->count(i->first));
+ log_keys_debug->insert(i->first);
+ }
+ }
+
+ // process dirty_dups after log_keys_debug is filled, so dups do not
+ // end up in that set
if (dirty_dups) {
pg_log_dup_t min;
t.omap_rmkeyrange(
}
}
- if (log_keys_debug) {
- for (map<string, bufferlist>::iterator i = (*km).begin();
- i != (*km).end();
- ++i) {
- if (i->first[0] == '_')
- continue;
- assert(!log_keys_debug->count(i->first));
- log_keys_debug->insert(i->first);
- }
- }
-
if (clear_divergent_priors) {
//dout(10) << "write_log_and_missing: writing divergent_priors" << dendl;
to_remove.insert("divergent_priors");
* Foundation. See file COPYING.
*
*/
-#ifndef CEPH_PG_LOG_H
-#define CEPH_PG_LOG_H
+#pragma once
// re-include our assert to clobber boost's
#include "include/assert.h"
#define PGLOG_INDEXED_OBJECTS (1 << 0)
#define PGLOG_INDEXED_CALLER_OPS (1 << 1)
#define PGLOG_INDEXED_EXTRA_CALLER_OPS (1 << 2)
-#define PGLOG_INDEXED_ALL (PGLOG_INDEXED_OBJECTS | PGLOG_INDEXED_CALLER_OPS | PGLOG_INDEXED_EXTRA_CALLER_OPS)
+#define PGLOG_INDEXED_DUPS (1 << 3)
+#define PGLOG_INDEXED_ALL (PGLOG_INDEXED_OBJECTS | \
+ PGLOG_INDEXED_CALLER_OPS | \
+ PGLOG_INDEXED_EXTRA_CALLER_OPS | \
+ PGLOG_INDEXED_DUPS)
class CephContext;
mutable ceph::unordered_map<hobject_t,pg_log_entry_t*> objects; // ptrs into log. be careful!
mutable ceph::unordered_map<osd_reqid_t,pg_log_entry_t*> caller_ops;
mutable ceph::unordered_multimap<osd_reqid_t,pg_log_entry_t*> extra_caller_ops;
- mutable ceph::unordered_map<osd_reqid_t, pg_log_dup_t*> dup_index;
+ mutable ceph::unordered_map<osd_reqid_t,pg_log_dup_t*> dup_index;
// recovery pointers
list<pg_log_entry_t>::iterator complete_to; // not inclusive of referenced item
last_requested(0),
indexed_data(0),
rollback_info_trimmed_to_riter(log.rbegin())
- {}
+ { }
template <typename... Args>
IndexedLog(Args&&... args) :
complete_to(log.end()),
last_requested(0),
indexed_data(0),
- rollback_info_trimmed_to_riter(log.rbegin()) {
+ rollback_info_trimmed_to_riter(log.rbegin())
+ {
reset_rollback_info_trimmed_to_riter();
index();
}
complete_to(log.end()),
last_requested(rhs.last_requested),
indexed_data(0),
- rollback_info_trimmed_to_riter(log.rbegin()) {
+ rollback_info_trimmed_to_riter(log.rbegin())
+ {
reset_rollback_info_trimmed_to_riter();
index(rhs.indexed_data);
}
+
IndexedLog &operator=(const IndexedLog &rhs) {
this->~IndexedLog();
new (this) IndexedLog(rhs);
const osd_reqid_t &r,
eversion_t *version,
version_t *user_version,
- int *return_code) const {
+ int *return_code) const
+ {
assert(version);
assert(user_version);
assert(return_code);
}
assert(0 == "in extra_caller_ops but not extra_reqids");
}
+
+ if (!(indexed_data & PGLOG_INDEXED_DUPS)) {
+ index_dups();
+ }
+ auto q = dup_index.find(r);
+ if (q != dup_index.end()) {
+ *version = q->second->version;
+ *user_version = q->second->user_version;
+ *return_code = q->second->return_code;
+ return true;
+ }
+
return false;
}
}
}
}
-
+
void index(__u16 to_index = PGLOG_INDEXED_ALL) const {
+ // if to_index is 0, no need to run any of this code, especially
+ // loop below; this can happen with copy constructor for
+ // IndexedLog (and indirectly through assignment operator)
+ if (!to_index) return;
+
if (to_index & PGLOG_INDEXED_OBJECTS)
objects.clear();
if (to_index & PGLOG_INDEXED_CALLER_OPS)
caller_ops.clear();
if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS)
extra_caller_ops.clear();
+ if (to_index & PGLOG_INDEXED_DUPS) {
+ dup_index.clear();
+ for (auto& i : dups) {
+ dup_index[i.reqid] = const_cast<pg_log_dup_t*>(&i);
+ }
+ }
- for (list<pg_log_entry_t>::const_iterator i = log.begin();
- i != log.end();
- ++i) {
- if (to_index & PGLOG_INDEXED_OBJECTS) {
- if (i->object_is_indexed()) {
- objects[i->soid] = const_cast<pg_log_entry_t*>(&(*i));
+ constexpr __u16 any_log_entry_index =
+ PGLOG_INDEXED_OBJECTS |
+ PGLOG_INDEXED_CALLER_OPS |
+ PGLOG_INDEXED_EXTRA_CALLER_OPS;
+
+ if (to_index & any_log_entry_index) {
+ for (list<pg_log_entry_t>::const_iterator i = log.begin();
+ i != log.end();
+ ++i) {
+ if (to_index & PGLOG_INDEXED_OBJECTS) {
+ if (i->object_is_indexed()) {
+ objects[i->soid] = const_cast<pg_log_entry_t*>(&(*i));
+ }
}
- }
- if (to_index & PGLOG_INDEXED_CALLER_OPS) {
- if (i->reqid_is_indexed()) {
- caller_ops[i->reqid] = const_cast<pg_log_entry_t*>(&(*i));
+ if (to_index & PGLOG_INDEXED_CALLER_OPS) {
+ if (i->reqid_is_indexed()) {
+ caller_ops[i->reqid] = const_cast<pg_log_entry_t*>(&(*i));
+ }
}
- }
-
- if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
- for (auto j = i->extra_reqids.begin();
- j != i->extra_reqids.end();
- ++j) {
- extra_caller_ops.insert(
- make_pair(j->first, const_cast<pg_log_entry_t*>(&(*i))));
+
+ if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
+ for (auto j = i->extra_reqids.begin();
+ j != i->extra_reqids.end();
+ ++j) {
+ extra_caller_ops.insert(
+ make_pair(j->first, const_cast<pg_log_entry_t*>(&(*i))));
+ }
}
}
}
-
+
indexed_data |= to_index;
}
index(PGLOG_INDEXED_EXTRA_CALLER_OPS);
}
+ void index_dups() const {
+ index(PGLOG_INDEXED_DUPS);
+ }
+
void index(pg_log_entry_t& e) {
if ((indexed_data & PGLOG_INDEXED_OBJECTS) && e.object_is_indexed()) {
if (objects.count(e.soid) == 0 ||
}
}
}
+
void unindex() {
objects.clear();
caller_ops.clear();
dup_index.clear();
indexed_data = 0;
}
- void unindex(pg_log_entry_t& e) {
+
+ void unindex(const pg_log_entry_t& e) {
// NOTE: this only works if we remove from the _tail_ of the log!
if (indexed_data & PGLOG_INDEXED_OBJECTS) {
if (objects.count(e.soid) && objects[e.soid]->version == e.version)
if (indexed_data & PGLOG_INDEXED_CALLER_OPS) {
// divergent merge_log indexes new before unindexing old
if (caller_ops.count(e.reqid) && caller_ops[e.reqid] == &e)
- caller_ops.erase(e.reqid);
+ caller_ops.erase(e.reqid);
}
}
if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
}
}
+ void index(pg_log_dup_t& e) {
+ if (PGLOG_INDEXED_DUPS) {
+ dup_index[e.reqid] = &e;
+ }
+ }
+
+ void unindex(const pg_log_dup_t& e) {
+ if (PGLOG_INDEXED_DUPS) {
+ auto i = dup_index.find(e.reqid);
+ if (i != dup_index.end()) {
+ dup_index.erase(i);
+ }
+ }
+ }
+
// actors
void add(const pg_log_entry_t& e, bool applied = true) {
if (!applied) {
caller_ops[e.reqid] = &(log.back());
}
}
-
+
if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
for (auto j = e.extra_reqids.begin();
j != e.extra_reqids.end();
if (!applied) {
skip_can_rollback_to_to_head();
}
- }
+ } // add
void trim(
CephContext* cct,
eversion_t s,
set<eversion_t> *trimmed,
- set<string> *trimmed_dups);
+ set<string>* trimmed_dups,
+ bool* dirty_dups);
ostream& print(ostream& out) const;
- };
+ }; // IndexedLog
protected:
eversion_t dirty_from; ///< must clear/writeout all keys >= dirty_from
eversion_t writeout_from; ///< must writout keys >= writeout_from
set<eversion_t> trimmed; ///< must clear keys in trimmed
- set<string> trimmed_dups; ///< must clear keys in trimmed_dups
+ set<string> trimmed_dups; ///< must clear keys in trimmed_dups
CephContext *cct;
bool pg_log_debug;
/// Log is clean on [dirty_to, dirty_from)
!(trimmed.empty()) ||
!missing.is_clean() ||
!(trimmed_dups.empty()) ||
+ dirty_dups ||
rebuilt_missing_with_deletes;
}
void mark_log_for_rewrite() {
dirty_dups = false;
}
public:
+
// cppcheck-suppress noExplicitConstructor
- PGLog(CephContext *cct, DoutPrefixProvider *dpp = 0) :
+ PGLog(CephContext *cct, DoutPrefixProvider *dpp = nullptr) :
prefix_provider(dpp),
dirty_from(eversion_t::max()),
writeout_from(eversion_t::max()),
cct(cct),
pg_log_debug(!(cct && !(cct->_conf->osd_debug_pg_log_writeout))),
touched_log(false),
- clear_divergent_priors(false) {}
-
+ clear_divergent_priors(false),
+ dirty_dups(false)
+ { }
void reset_backfill();
void split_into(
pg_t child_pgid,
unsigned split_bits,
- PGLog *opg_log) {
+ PGLog *opg_log) {
log.split_out_child(child_pgid, split_bits, &opg_log->log);
missing.split_into(child_pgid, split_bits, &(opg_log->missing));
opg_log->mark_dirty_to(eversion_t::max());
void recover_got(hobject_t oid, eversion_t v, pg_info_t &info) {
if (missing.is_missing(oid, v)) {
missing.got(oid, v);
-
+
// raise last_complete?
if (missing.get_items().empty()) {
log.complete_to = log.log.end();
while (!missing.get_items().empty() && log.complete_to->version <
missing.get_items().at(
missing.get_rmissing().begin()->second
- ).need)
+ ).need) {
++log.complete_to;
+ }
assert(log.complete_to != log.log.end());
if (log.complete_to == log.log.begin()) {
if (info)
const mempool::osd_pglog::list<pg_log_entry_t> &orig_entries, ///< [in] entries for hoid to merge
const pg_info_t &info, ///< [in] info for merging entries
eversion_t olog_can_rollback_to, ///< [in] rollback boundary
- missing_type &missing, ///< [in,out] missing to adjust, use
+ missing_type &missing, ///< [in,out] missing to adjust, use
LogEntryHandler *rollbacker, ///< [in] optional rollbacker object
const DoutPrefixProvider *dpp ///< [in] logging provider
) {
rollbacker,
this);
}
+
+ bool merge_log_dups(const pg_log_t& olog);
+
public:
+
void rewind_divergent_log(eversion_t newhead,
pg_info_t &info,
LogEntryHandler *rollbacker,
return invalidate_stats;
}
- void write_log_and_missing(ObjectStore::Transaction& t,
- map<string,bufferlist> *km,
- const coll_t& coll,
- const ghobject_t &log_oid,
- bool require_rollback);
+ void write_log_and_missing(
+ ObjectStore::Transaction& t,
+ map<string,bufferlist> *km,
+ const coll_t& coll,
+ const ghobject_t &log_oid,
+ bool require_rollback);
static void write_log_and_missing_wo_missing(
ObjectStore::Transaction& t,
pg_log_t &log,
const coll_t& coll,
const ghobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors,
- bool require_rollback);
+ bool require_rollback,
+ bool dirty_dups);
static void write_log_and_missing(
ObjectStore::Transaction& t,
const ghobject_t &log_oid,
const pg_missing_tracker_t &missing,
bool require_rollback,
+ bool dirty_dups,
bool *rebuilt_missing_set_with_deletes);
static void _write_log_and_missing_wo_missing(
bool dirty_divergent_priors,
bool touch_log,
bool require_rollback,
+ bool dirty_dups,
set<string> *log_keys_debug
);
bool touch_log,
bool require_rollback,
bool clear_divergent_priors,
+ bool dirty_dups,
bool *rebuilt_missing_with_deletes,
set<string> *log_keys_debug
);
void read_log_and_missing(
- ObjectStore *store, coll_t pg_coll,
- coll_t log_coll, ghobject_t log_oid,
+ ObjectStore *store,
+ coll_t pg_coll,
+ coll_t log_coll,
+ ghobject_t log_oid,
const pg_info_t &info,
ostringstream &oss,
bool tolerate_divergent_missing_log,
tolerate_divergent_missing_log,
&clear_divergent_priors,
this,
- (pg_log_debug ? &log_keys_debug : 0),
+ (pg_log_debug ? &log_keys_debug : nullptr),
debug_verify_stored_missing);
}
template <typename missing_type>
- static void read_log_and_missing(ObjectStore *store, coll_t pg_coll,
- coll_t log_coll, ghobject_t log_oid,
+ static void read_log_and_missing(
+ ObjectStore *store,
+ coll_t pg_coll,
+ coll_t log_coll,
+ ghobject_t log_oid,
const pg_info_t &info,
IndexedLog &log,
- missing_type &missing, ostringstream &oss,
+ missing_type &missing,
+ ostringstream &oss,
bool tolerate_divergent_missing_log,
- bool *clear_divergent_priors = NULL,
- const DoutPrefixProvider *dpp = NULL,
- set<string> *log_keys_debug = 0,
+ bool *clear_divergent_priors = nullptr,
+ const DoutPrefixProvider *dpp = nullptr,
+ set<string> *log_keys_debug = nullptr,
bool debug_verify_stored_missing = false
) {
ldpp_dout(dpp, 20) << "read_log_and_missing coll " << pg_coll
if (has_divergent_priors || debug_verify_stored_missing) {
// build missing
if (debug_verify_stored_missing || info.last_complete < info.last_update) {
- ldpp_dout(dpp, 10) << "read_log_and_missing checking for missing items over interval ("
- << info.last_complete
- << "," << info.last_update << "]" << dendl;
+ ldpp_dout(dpp, 10)
+ << "read_log_and_missing checking for missing items over interval ("
+ << info.last_complete
+ << "," << info.last_update << "]" << dendl;
set<hobject_t> did;
set<hobject_t> checked;
missing.flush();
}
ldpp_dout(dpp, 10) << "read_log_and_missing done" << dendl;
- }
-};
-
-#endif // CEPH_PG_LOG_H
+ } // static read_log_and_missing
+}; // struct PGLog
void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
{
o.push_back(new pg_log_dup_t());
- o.push_back(new pg_log_dup_t(osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
- eversion_t(1,2), 1, 0);
- o.push_back(new pg_log_dup_t(osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
- eversion_t(1,2), 2, -ENOENT);
+ o.push_back(new pg_log_dup_t(eversion_t(1,2),
+ 1,
+ osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
+ 0));
+ o.push_back(new pg_log_dup_t(eversion_t(1,2),
+ 2,
+ osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
+ -ENOENT));
}
-ostream& operator<<(ostream& out, const pg_log_dup_t& e)
-{
- out << e.reqid << " v" << e.version << " uv" << e.user_version
- << " rc=" << e.return_code;
- return out;
+
+std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
+ return out << "log_dup(reqid=" << e.reqid <<
+ " v=" << e.version << " uv=" << e.user_version <<
+ " rc=" << e.return_code << ")";
}
*/
struct osd_reqid_t {
entity_name_t name; // who
- ceph_tid_t tid;
+ ceph_tid_t tid;
int32_t inc; // incarnation
osd_reqid_t()
- : tid(0), inc(0) {}
+ : tid(0), inc(0)
+ {}
+ osd_reqid_t(const osd_reqid_t& other)
+ : name(other.name), tid(other.tid), inc(other.inc)
+ {}
osd_reqid_t(const entity_name_t& a, int i, ceph_tid_t t)
- : name(a), tid(t), inc(i) {}
+ : name(a), tid(t), inc(i)
+ {}
DENC(osd_reqid_t, v, p) {
DENC_START(2, 2, p);
eversion_t(epoch_t e, version_t v) : version(v), epoch(e), __pad(0) {}
// cppcheck-suppress noExplicitConstructor
- eversion_t(const ceph_eversion& ce) :
+ eversion_t(const ceph_eversion& ce) :
version(ce.version),
epoch(ce.epoch),
__pad(0) { }
int32_t return_code; // only stored for ERRORs for dup detection
pg_log_dup_t()
- : user_version(0), return_code(0) {}
- pg_log_dup_t(const pg_log_entry_t &entry) explicit
+ : user_version(0), return_code(0)
+ {}
+ explicit pg_log_dup_t(const pg_log_entry_t& entry)
: reqid(entry.reqid), version(entry.version),
user_version(entry.user_version), return_code(entry.return_code)
{}
: reqid(rid), version(v), user_version(uv),
return_code(return_code)
{}
+
string get_key_name() const;
void encode(bufferlist &bl) const;
void decode(bufferlist::iterator &bl);
void dump(Formatter *f) const;
static void generate_test_instances(list<pg_log_dup_t*>& o);
+
+ friend std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e);
};
WRITE_CLASS_ENCODER(pg_log_dup_t)
+std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e);
+
/**
* pg_log_t - incremental log of recent pg changes.
*
eversion_t rollback_info_trimmed_to;
public:
- mempool::osd_pglog::list<pg_log_entry_t> log; // the actual log.
- mempool::osd_pglog::list<pg_log_dup_t> dups; // entries just for dup op detection
-
+ // the actual log
+ mempool::osd_pglog::list<pg_log_entry_t> log;
+
+ // entries just for dup op detection ordered oldest to newest
+ mempool::osd_pglog::list<pg_log_dup_t> dups;
+
pg_log_t() = default;
pg_log_t(const eversion_t &last_update,
const eversion_t &log_tail,
rollback_info_trimmed_to,
std::move(childlog),
std::move(childdups));
- }
+ }
mempool::osd_pglog::list<pg_log_entry_t> rewind_from_head(eversion_t newhead) {
assert(newhead >= tail);
};
WRITE_CLASS_ENCODER(pg_log_t)
-inline ostream& operator<<(ostream& out, const pg_log_t& log)
+inline ostream& operator<<(ostream& out, const pg_log_t& log)
{
out << "log((" << log.tail << "," << log.head << "], crt="
<< log.get_can_rollback_to() << ")";
}
-
-
-
-
// ---------------------------------------
class OSDSuperblock {
#include "include/coredumpctl.h"
#include "../objectstore/store_test_fixture.h"
-class PGLogTest : virtual public ::testing::Test, protected PGLog {
-public:
- PGLogTest() : PGLog(g_ceph_context) {}
- void SetUp() override {
- missing.may_include_deletes = true;
- }
-
- void TearDown() override {
- clear();
- }
+struct PGLogTestBase {
static hobject_t mk_obj(unsigned id) {
hobject_t hoid;
stringstream ss;
return eversion_t(ep, v);
}
static pg_log_entry_t mk_ple_mod(
- const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
pg_log_entry_t e;
e.mark_unrollbackable();
e.op = pg_log_entry_t::MODIFY;
e.soid = hoid;
e.version = v;
e.prior_version = pv;
+ e.reqid = reqid;
return e;
}
static pg_log_entry_t mk_ple_dt(
- const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
pg_log_entry_t e;
e.mark_unrollbackable();
e.op = pg_log_entry_t::DELETE;
e.soid = hoid;
e.version = v;
e.prior_version = pv;
+ e.reqid = reqid;
return e;
}
static pg_log_entry_t mk_ple_ldt(
return e;
}
static pg_log_entry_t mk_ple_mod_rb(
- const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
pg_log_entry_t e;
e.op = pg_log_entry_t::MODIFY;
e.soid = hoid;
e.version = v;
e.prior_version = pv;
+ e.reqid = reqid;
return e;
}
static pg_log_entry_t mk_ple_dt_rb(
- const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
pg_log_entry_t e;
e.op = pg_log_entry_t::DELETE;
e.soid = hoid;
e.version = v;
e.prior_version = pv;
+ e.reqid = reqid;
return e;
}
+ static pg_log_entry_t mk_ple_mod(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_mod(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_dt(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_dt(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_mod_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_mod_rb(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_dt_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_dt_rb(hoid, v, pv, osd_reqid_t());
+ }
+}; // PGLogTestBase
+
+
+class PGLogTest : virtual public ::testing::Test, protected PGLog, public PGLogTestBase {
+public:
+ PGLogTest() : PGLog(g_ceph_context) {}
+ void SetUp() override {
+ missing.may_include_deletes = true;
+ }
+
+#include "common/ceph_context.h"
+#include "common/config.h"
+
+ void TearDown() override {
+ clear();
+ }
+
struct TestCase {
list<pg_log_entry_t> base;
const IndexedLog &get_fulldiv() const { return fulldiv; }
const pg_info_t &get_authinfo() const { return authinfo; }
const pg_info_t &get_divinfo() const { return divinfo; }
- };
+ }; // struct TestCase
struct LogHandler : public PGLog::LogEntryHandler {
set<hobject_t> removed;
list<pg_log_entry_t> rolledback;
-
+
void rollback(
const pg_log_entry_t &entry) override {
rolledback.push_back(entry);
ASSERT_EQ(info.last_update, oinfo.last_update);
verify_missing(tcase, missing);
verify_sideeffects(tcase, h);
- };
+ }
+
void test_proc_replica_log(const TestCase &tcase) {
clear();
log = tcase.get_fullauth();
}
}
verify_missing(tcase, omissing);
- }
+ } // test_proc_replica_log
+
void run_test_case(const TestCase &tcase) {
test_merge_log(tcase);
test_proc_replica_log(tcase);
}
-};
+}; // class PGLogTest
struct TestHandler : public PGLog::LogEntryHandler {
list<hobject_t> &removed;
// the old entry (from the log entry given in argument) is not a CLONE and
// the old entry (from the log entry given in argument) is not a DELETE and
// the old entry prior_version is lower than the tail of the log :
- // add the old object to the remove_snap list and
+ // add the old object to the remove_snap list and
// add the old object to divergent priors and
// add or update the prior_version of the object to missing and
// return false
| | | DELETE |
| | | |
+--------+-------+---------+
-
+
The log entry (1,3) deletes the object x9 and the olog entry
(2,3) also deletes it : do nothing. The olog tail is ignored
because it is before the log tail.
-
+
*/
{
clear();
run_rebuild_missing_test(expected);
}
+
+class PGLogMergeDupsTest : public ::testing::Test, protected PGLog {
+
+public:
+
+ PGLogMergeDupsTest() : PGLog(g_ceph_context) { }
+
+ void SetUp() override { }
+
+ void TearDown() override {
+ clear();
+ }
+
+ static pg_log_dup_t create_dup_entry(uint a, uint b) {
+ // make each dup_entry unique by using different client id's
+ static uint client_id = 777;
+ return pg_log_dup_t(eversion_t(a, b),
+ a,
+ osd_reqid_t(entity_name_t::CLIENT(client_id++), 8, 1),
+ 0);
+ }
+
+ static std::vector<pg_log_dup_t> example_dups_1() {
+ std::vector<pg_log_dup_t> result = {
+ create_dup_entry(10, 11),
+ create_dup_entry(10, 12),
+ create_dup_entry(11, 1),
+ create_dup_entry(12, 3),
+ create_dup_entry(13, 99)
+ };
+ return result;
+ }
+
+ static std::vector<pg_log_dup_t> example_dups_2() {
+ std::vector<pg_log_dup_t> result = {
+ create_dup_entry(12, 3),
+ create_dup_entry(13, 99),
+ create_dup_entry(15, 11),
+ create_dup_entry(16, 14),
+ create_dup_entry(16, 32)
+ };
+ return result;
+ }
+
+ void add_dups(uint a, uint b) {
+ log.dups.push_back(create_dup_entry(a, b));
+ }
+
+ void add_dups(const std::vector<pg_log_dup_t>& l) {
+ for (auto& i : l) {
+ log.dups.push_back(i);
+ }
+ }
+
+ static void add_dups(IndexedLog& log, const std::vector<pg_log_dup_t>& dups) {
+ for (auto& i : dups) {
+ log.dups.push_back(i);
+ }
+ }
+
+ void check_order() {
+ eversion_t prev(0, 0);
+
+ for (auto& i : log.dups) {
+ EXPECT_LT(prev, i.version) << "verify versions monotonically increase";
+ prev = i.version;
+ }
+ }
+
+ void check_index() {
+ EXPECT_EQ(log.dups.size(), log.dup_index.size());
+ for (auto& i : log.dups) {
+ EXPECT_EQ(1u, log.dup_index.count(i.reqid));
+ }
+ }
+};
+
+TEST_F(PGLogMergeDupsTest, OtherEmpty) {
+ log.tail = eversion_t(14, 5);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_FALSE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, AmEmpty) {
+ log.tail = eversion_t(14, 5);
+ index();
+
+ IndexedLog olog;
+
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, AmEmptyOverlap) {
+ log.tail = eversion_t(12, 3);
+ index();
+
+ IndexedLog olog;
+
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(3u, log.dups.size());
+
+ if (3 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(11u, log.dups.back().version.epoch);
+ EXPECT_EQ(1u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, Same) {
+ log.tail = eversion_t(14, 1);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_FALSE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Later) {
+ log.tail = eversion_t(16, 14);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+ add_dups(olog, example_dups_2());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(6u, log.dups.size());
+
+ if (6 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(15u, log.dups.back().version.epoch);
+ EXPECT_EQ(11u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Earlier) {
+ log.tail = eversion_t(17, 2);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_2());
+ index();
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(8u, log.dups.size());
+
+ if (6 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(16u, log.dups.back().version.epoch);
+ EXPECT_EQ(32u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Superset) {
+ log.tail = eversion_t(17, 2);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+
+ olog.dups.push_back(create_dup_entry(9, 5));
+ olog.dups.push_back(create_dup_entry(15, 11));
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(7u, log.dups.size());
+
+ if (7 == log.dups.size()) {
+ EXPECT_EQ(9u, log.dups.front().version.epoch);
+ EXPECT_EQ(5u, log.dups.front().version.version);
+
+ EXPECT_EQ(15u, log.dups.back().version.epoch);
+ EXPECT_EQ(11u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+struct PGLogTrimTest :
+ public ::testing::Test,
+ public PGLogTestBase,
+ public PGLog::IndexedLog
+{
+ std::list<hobject_t*> test_hobjects;
+ CephContext *cct;
+
+ void SetUp() override {
+ cct = (new CephContext(CEPH_ENTITY_TYPE_OSD))->get();
+
+ hobject_t::generate_test_instances(test_hobjects);
+ }
+
+ void SetUp(unsigned min_entries, unsigned max_entries, unsigned dup_track) {
+ constexpr size_t size = 10;
+
+ char min_entries_s[size];
+ char max_entries_s[size];
+ char dup_track_s[size];
+
+ snprintf(min_entries_s, size, "%u", min_entries);
+ snprintf(max_entries_s, size, "%u", max_entries);
+ snprintf(dup_track_s, size, "%u", dup_track);
+
+ cct->_conf->set_val_or_die("osd_min_pg_log_entries", min_entries_s);
+ cct->_conf->set_val_or_die("osd_max_pg_log_entries", max_entries_s);
+ cct->_conf->set_val_or_die("osd_pg_log_dups_tracked", dup_track_s);
+}
+
+ void TearDown() override {
+ while (!test_hobjects.empty()) {
+ delete test_hobjects.front();
+ test_hobjects.pop_front();
+ }
+
+ cct->put();
+ }
+}; // struct PGLogTrimTest
+
+
+# if 0
+TEST_F(PGLogTest, Trim1) {
+ TestCase t;
+
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(21, 165), mk_evt(26, 160)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(21, 165), mk_evt(31, 171)));
+
+ t.setup();
+}
+#endif
+
+
+TEST_F(PGLogTrimTest, TestMakingCephContext)
+{
+ SetUp(1, 2, 5);
+
+ EXPECT_EQ(1u, cct->_conf->osd_min_pg_log_entries);
+ EXPECT_EQ(2u, cct->_conf->osd_max_pg_log_entries);
+ EXPECT_EQ(5u, cct->_conf->osd_pg_log_dups_tracked);
+}
+
+
+TEST_F(PGLogTrimTest, TestPartialTrim)
+{
+ SetUp(1, 2, 20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &dirty_dups);
+
+ EXPECT_EQ(true, dirty_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(3u, trimmed.size());
+ EXPECT_EQ(2u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+
+ SetUp(1, 2, 15);
+
+ std::set<eversion_t> trimmed2;
+ std::set<std::string> trimmed_dups2;
+ bool dirty_dups2 = false;
+
+ log.trim(cct, mk_evt(20, 164), &trimmed2, &trimmed_dups2, &dirty_dups2);
+
+ EXPECT_EQ(true, dirty_dups2);
+ EXPECT_EQ(2u, log.log.size());
+ EXPECT_EQ(1u, trimmed2.size());
+ EXPECT_EQ(2u, log.dups.size());
+ EXPECT_EQ(1u, trimmed_dups2.size());
+}
+
+
+TEST_F(PGLogTrimTest, TestTrimNoTrimmed) {
+ SetUp(1, 2, 20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &dirty_dups);
+
+ EXPECT_EQ(true, dirty_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(2u, log.dups.size());
+}
+
+
+TEST_F(PGLogTrimTest, TestTrimNoDups)
+{
+ SetUp(1, 2, 10);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &dirty_dups);
+
+ EXPECT_EQ(false, dirty_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(3u, trimmed.size());
+ EXPECT_EQ(0u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+}
+
+TEST_F(PGLogTrimTest, TestNoTrim)
+{
+ SetUp(1, 2, 20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(9, 99), &trimmed, &trimmed_dups, &dirty_dups);
+
+ EXPECT_EQ(false, dirty_dups);
+ EXPECT_EQ(6u, log.log.size());
+ EXPECT_EQ(0u, trimmed.size());
+ EXPECT_EQ(0u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+}
+
+TEST_F(PGLogTrimTest, TestTrimAll)
+{
+ SetUp(1, 2, 20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(22, 180), &trimmed, &trimmed_dups, &dirty_dups);
+
+ EXPECT_EQ(true, dirty_dups);
+ EXPECT_EQ(0u, log.log.size());
+ EXPECT_EQ(6u, trimmed.size());
+ EXPECT_EQ(5u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+}
+
+
+TEST_F(PGLogTrimTest, TestGetRequest) {
+ SetUp(1, 2, 20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166),
+ osd_reqid_t(client, 8, 6)));
+
+ bool dirty_dups = false;
+
+ log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &dirty_dups);
+
+ EXPECT_EQ(true, dirty_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(2u, log.dups.size());
+
+ eversion_t version;
+ version_t user_version;
+ int return_code;
+
+ osd_reqid_t log_reqid = osd_reqid_t(client, 8, 5);
+ osd_reqid_t dup_reqid = osd_reqid_t(client, 8, 3);
+ osd_reqid_t bad_reqid = osd_reqid_t(client, 8, 1);
+
+ bool result;
+
+ result = log.get_request(log_reqid, &version, &user_version, &return_code);
+ EXPECT_EQ(true, result);
+ EXPECT_EQ(mk_evt(21, 165), version);
+
+ result = log.get_request(dup_reqid, &version, &user_version, &return_code);
+ EXPECT_EQ(true, result);
+ EXPECT_EQ(mk_evt(15, 155), version);
+
+ result = log.get_request(bad_reqid, &version, &user_version, &return_code);
+ EXPECT_EQ(false, result);
+}
+
+
// Local Variables:
// compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* "
// End:
if (!divergent.empty()) {
assert(missing.get_items().empty());
PGLog::write_log_and_missing_wo_missing(
- t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true);
+ t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true, true);
} else {
pg_missing_tracker_t tmissing(missing);
bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
PGLog::write_log_and_missing(
- t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true,
+ t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true, true,
&rebuilt_missing_set_with_deletes);
}
t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);