}
}
+void PeeringState::consider_rollback_pwlc(eversion_t last_complete)
+{
+ for (const auto & [shard, versionrange] :
+ info.partial_writes_last_complete) {
+ auto [fromversion, toversion] = versionrange;
+ if (last_complete < fromversion) {
+ // It is possible that we need to rollback pwlc, this can happen if
+ // peering is attempted with an OSD missing but does not manage to
+ // activate (typically because of a wait upthru) before the missing
+ // OSD returns
+ info.partial_writes_last_complete[shard] = std::pair(last_complete,
+ last_complete);
+ // Assign the current epoch to the version number so that this is
+ // recognised as the newest pwlc update
+ info.partial_writes_last_complete[shard].second.epoch =
+ get_osdmap_epoch();
+ psdout(10) << "shard " << shard << " pwlc rolled back to "
+ << info.partial_writes_last_complete[shard] << dendl;
+ } else if (last_complete < toversion) {
+ info.partial_writes_last_complete[shard].second = last_complete;
+ psdout(10) << "shard " << shard << " pwlc rolled back to "
+ << info.partial_writes_last_complete[shard] << dendl;
+ }
+ }
+}
+
void PeeringState::proc_master_log(
ObjectStore::Transaction& t, pg_info_t &oinfo,
pg_log_t&& olog, pg_missing_t&& omissing, pg_shard_t from)
<< olog << " " << omissing << dendl;
ceph_assert(!is_peered() && is_primary());
+ if (info.partial_writes_last_complete.contains(from.shard)) {
+ // Check if last_complete and last_update can be advanced based on
+ // knowledge of partial_writes
+ const auto & [fromversion, toversion] =
+ info.partial_writes_last_complete[from.shard];
+ if (toversion > oinfo.last_complete) {
+ if (fromversion <= oinfo.last_complete) {
+ psdout(10) << "osd." << from << " has last_complete "
+ << oinfo.last_complete
+ << " but pwlc says its at " << toversion << dendl;
+ oinfo.last_complete = toversion;
+ if (toversion > oinfo.last_update) {
+ oinfo.last_update = toversion;
+ }
+ if (toversion > olog.head) {
+ olog.head = toversion;
+ }
+ } else {
+ psdout(10) << "osd." << from << " has last_complete "
+ << oinfo.last_complete << " cannot apply pwlc from "
+ << fromversion << " to " << toversion << dendl;
+ }
+ }
+ }
+ // For partial writes we may be able to keep some of the divergent entries
+ if (olog.head < pg_log.get_head()) {
+ // Iterate backwards to divergence
+ auto p = pg_log.get_log().log.end();
+ while (true) {
+ if (p == pg_log.get_log().log.begin()) {
+ break;
+ }
+ --p;
+ if (p->version.version <= olog.head.version) {
+ break;
+ }
+ }
+ // See if we can wind forward partially written entries
+ map<pg_shard_t, pg_info_t> all_info(peer_info.begin(), peer_info.end());
+ all_info[pg_whoami] = info;
+ while (p->version == olog.head) {
+ ++p;
+ if (p == pg_log.get_log().log.end()) {
+ break;
+ }
+ if (p->is_written_shard(from.shard)) {
+ psdout(10) << "entry " << p->version << " has written shards "
+ << p->written_shards << " so is divergent" << dendl;
+ // This entry was meant to be written on from, this is the first
+ // divergent entry
+ break;
+ } else {
+ // Test if enough shards have the update
+ shard_id_set shards_with_update;
+ shard_id_set shards_without_update;
+ for (auto&& [pg_shard, pi] : all_info) {
+ psdout(20) << "version " << p->version
+ << " testing osd " << pg_shard
+ << " written=" << p->written_shards
+ << " present=" << p->present_shards << dendl;
+ if (p->is_present_shard(pg_shard.shard) &&
+ p->is_written_shard(pg_shard.shard)) {
+ if (pi.last_update < p->version) {
+ if (!shards_with_update.contains(pg_shard.shard)) {
+ shards_without_update.insert(pg_shard.shard);
+ }
+ } else {
+ shards_with_update.insert(pg_shard.shard);
+ shards_without_update.erase(pg_shard.shard);
+ }
+ }
+ }
+ psdout(20) << "shards_with_update=" << shards_with_update
+ << " shards_without_update=" << shards_without_update
+ << dendl;
+ if (!shards_without_update.empty()) {
+ // A shard is missing this write - this is the first divergent entry
+ break;
+ }
+ // This entry can be kept, only shards that didn't participate in
+ // the partial write missed the update
+ psdout(20) << "keeping entry " << p->version << dendl;
+ olog.head = p->version;
+ }
+ }
+ }
// merge log into our own log to build master log. no need to
// make any adjustments to their missing map; we are taking their
// log to be authoritative (i.e., their entries are by definitely
<< " " << omissing << dendl;
might_have_unfound.insert(from);
+ // our log is now authoritative - update pwlc information based
+ // on the log head
+ consider_rollback_pwlc(pg_log.get_head());
+
// See doc/dev/osd_internals/last_epoch_started
if (oinfo.last_epoch_started > info.last_epoch_started) {
info.last_epoch_started = oinfo.last_epoch_started;