From: David Zafman Date: Wed, 30 Jan 2013 01:59:45 +0000 (-0800) Subject: Move read_log() function to prep for next commit X-Git-Tag: v0.57~73 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4a950aa94f04b60fc072c47bbacd82a2c3ddb0c0;p=ceph.git Move read_log() function to prep for next commit Signed-off-by: David Zafman Reviewed-by: Samuel Just --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 51dbee46f61f..ff879ec46fc0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2511,203 +2511,6 @@ void PG::append_log(vector& logv, eversion_t trim_to, ObjectStor write_info(t); } -void PG::read_log(ObjectStore *store) -{ - // load bounds - ondisklog.tail = ondisklog.head = 0; - - bufferlist blb; - store->collection_getattr(coll, "ondisklog", blb); - bufferlist::iterator p = blb.begin(); - ::decode(ondisklog, p); - - dout(10) << "read_log " << ondisklog.tail << "~" << ondisklog.length() << dendl; - - log.tail = info.log_tail; - - // In case of sobject_t based encoding, may need to list objects in the store - // to find hashes - bool listed_collection = false; - vector ls; - - if (ondisklog.head > 0) { - // read - bufferlist bl; - store->read(coll_t::META_COLL, log_oid, ondisklog.tail, ondisklog.length(), bl); - if (bl.length() < ondisklog.length()) { - std::ostringstream oss; - oss << "read_log got " << bl.length() << " bytes, expected " - << ondisklog.head << "-" << ondisklog.tail << "=" - << ondisklog.length(); - throw read_log_error(oss.str().c_str()); - } - - pg_log_entry_t e; - bufferlist::iterator p = bl.begin(); - assert(log.empty()); - eversion_t last; - bool reorder = false; - while (!p.end()) { - uint64_t pos = ondisklog.tail + p.get_off(); - if (ondisklog.has_checksums) { - bufferlist ebl; - ::decode(ebl, p); - __u32 crc; - ::decode(crc, p); - - __u32 got = ebl.crc32c(0); - if (crc == got) { - bufferlist::iterator q = ebl.begin(); - ::decode(e, q); - } else { - std::ostringstream oss; - oss << "read_log " << pos << " bad crc got " << got << " expected" << crc; - throw read_log_error(oss.str().c_str()); - } - } else { - ::decode(e, p); - } - dout(20) << "read_log " << pos << " " << e << dendl; - - // [repair] in order? - if (e.version < last) { - dout(0) << "read_log " << pos << " out of order entry " << e << " follows " << last << dendl; - osd->clog.error() << info.pgid << " log has out of order entry " - << e << " following " << last << "\n"; - reorder = true; - } - - if (e.version <= log.tail) { - dout(20) << "read_log ignoring entry at " << pos << " below log.tail" << dendl; - continue; - } - if (last.version == e.version.version) { - dout(0) << "read_log got dup " << e.version << " (last was " << last << ", dropping that one)" << dendl; - log.log.pop_back(); - osd->clog.error() << info.pgid << " read_log got dup " - << e.version << " after " << last << "\n"; - } - - if (e.invalid_hash) { - // We need to find the object in the store to get the hash - if (!listed_collection) { - store->collection_list(coll, ls); - listed_collection = true; - } - bool found = false; - for (vector::iterator i = ls.begin(); - i != ls.end(); - ++i) { - if (i->oid == e.soid.oid && i->snap == e.soid.snap) { - e.soid = *i; - found = true; - break; - } - } - if (!found) { - // Didn't find the correct hash - std::ostringstream oss; - oss << "Could not find hash for hoid " << e.soid << std::endl; - throw read_log_error(oss.str().c_str()); - } - } - - if (e.invalid_pool) { - e.soid.pool = info.pgid.pool(); - } - - e.offset = pos; - uint64_t endpos = ondisklog.tail + p.get_off(); - log.log.push_back(e); - last = e.version; - - // [repair] at end of log? - if (!p.end() && e.version == info.last_update) { - osd->clog.error() << info.pgid << " log has extra data at " - << endpos << "~" << (ondisklog.head-endpos) << " after " - << info.last_update << "\n"; - - dout(0) << "read_log " << endpos << " *** extra gunk at end of log, " - << "adjusting ondisklog.head" << dendl; - ondisklog.head = endpos; - break; - } - } - - if (reorder) { - dout(0) << "read_log reordering log" << dendl; - map m; - for (list::iterator p = log.log.begin(); p != log.log.end(); p++) - m[p->version] = *p; - log.log.clear(); - for (map::iterator p = m.begin(); p != m.end(); p++) - log.log.push_back(p->second); - } - } - - log.head = info.last_update; - log.index(); - - // build missing - if (info.last_complete < info.last_update) { - dout(10) << "read_log checking for missing items over interval (" << info.last_complete - << "," << info.last_update << "]" << dendl; - - set did; - for (list::reverse_iterator i = log.log.rbegin(); - i != log.log.rend(); - i++) { - if (i->version <= info.last_complete) break; - if (did.count(i->soid)) continue; - did.insert(i->soid); - - if (i->is_delete()) continue; - - bufferlist bv; - int r = osd->store->getattr(coll, i->soid, OI_ATTR, bv); - if (r >= 0) { - object_info_t oi(bv); - if (oi.version < i->version) { - dout(15) << "read_log missing " << *i << " (have " << oi.version << ")" << dendl; - missing.add(i->soid, i->version, oi.version); - } - } else { - dout(15) << "read_log missing " << *i << dendl; - missing.add(i->soid, i->version, eversion_t()); - } - } - for (map::reverse_iterator i = - ondisklog.divergent_priors.rbegin(); - i != ondisklog.divergent_priors.rend(); - ++i) { - if (i->first <= info.last_complete) break; - if (did.count(i->second)) continue; - did.insert(i->second); - bufferlist bv; - int r = osd->store->getattr(coll, i->second, OI_ATTR, bv); - if (r >= 0) { - object_info_t oi(bv); - /** - * 1) we see this entry in the divergent priors mapping - * 2) we didn't see an entry for this object in the log - * - * From 1 & 2 we know that either the object does not exist - * or it is at the version specified in the divergent_priors - * map since the object would have been deleted atomically - * with the addition of the divergent_priors entry, an older - * version would not have been recovered, and a newer version - * would show up in the log above. - */ - assert(oi.version == i->first); - } else { - dout(15) << "read_log missing " << *i << dendl; - missing.add(i->second, i->first, eversion_t()); - } - } - } - dout(10) << "read_log done" << dendl; -} - bool PG::check_log_for_corruption(ObjectStore *store) { OndiskLog bounds; @@ -5221,6 +5024,203 @@ std::ostream& operator<<(std::ostream& oss, return oss; } +void PG::read_log(ObjectStore *store) +{ + // load bounds + ondisklog.tail = ondisklog.head = 0; + + bufferlist blb; + store->collection_getattr(coll, "ondisklog", blb); + bufferlist::iterator p = blb.begin(); + ::decode(ondisklog, p); + + dout(10) << "read_log " << ondisklog.tail << "~" << ondisklog.length() << dendl; + + log.tail = info.log_tail; + + // In case of sobject_t based encoding, may need to list objects in the store + // to find hashes + bool listed_collection = false; + vector ls; + + if (ondisklog.head > 0) { + // read + bufferlist bl; + store->read(coll_t::META_COLL, log_oid, ondisklog.tail, ondisklog.length(), bl); + if (bl.length() < ondisklog.length()) { + std::ostringstream oss; + oss << "read_log got " << bl.length() << " bytes, expected " + << ondisklog.head << "-" << ondisklog.tail << "=" + << ondisklog.length(); + throw read_log_error(oss.str().c_str()); + } + + pg_log_entry_t e; + bufferlist::iterator p = bl.begin(); + assert(log.empty()); + eversion_t last; + bool reorder = false; + while (!p.end()) { + uint64_t pos = ondisklog.tail + p.get_off(); + if (ondisklog.has_checksums) { + bufferlist ebl; + ::decode(ebl, p); + __u32 crc; + ::decode(crc, p); + + __u32 got = ebl.crc32c(0); + if (crc == got) { + bufferlist::iterator q = ebl.begin(); + ::decode(e, q); + } else { + std::ostringstream oss; + oss << "read_log " << pos << " bad crc got " << got << " expected" << crc; + throw read_log_error(oss.str().c_str()); + } + } else { + ::decode(e, p); + } + dout(20) << "read_log " << pos << " " << e << dendl; + + // [repair] in order? + if (e.version < last) { + dout(0) << "read_log " << pos << " out of order entry " << e << " follows " << last << dendl; + osd->clog.error() << info.pgid << " log has out of order entry " + << e << " following " << last << "\n"; + reorder = true; + } + + if (e.version <= log.tail) { + dout(20) << "read_log ignoring entry at " << pos << " below log.tail" << dendl; + continue; + } + if (last.version == e.version.version) { + dout(0) << "read_log got dup " << e.version << " (last was " << last << ", dropping that one)" << dendl; + log.log.pop_back(); + osd->clog.error() << info.pgid << " read_log got dup " + << e.version << " after " << last << "\n"; + } + + if (e.invalid_hash) { + // We need to find the object in the store to get the hash + if (!listed_collection) { + store->collection_list(coll, ls); + listed_collection = true; + } + bool found = false; + for (vector::iterator i = ls.begin(); + i != ls.end(); + ++i) { + if (i->oid == e.soid.oid && i->snap == e.soid.snap) { + e.soid = *i; + found = true; + break; + } + } + if (!found) { + // Didn't find the correct hash + std::ostringstream oss; + oss << "Could not find hash for hoid " << e.soid << std::endl; + throw read_log_error(oss.str().c_str()); + } + } + + if (e.invalid_pool) { + e.soid.pool = info.pgid.pool(); + } + + e.offset = pos; + uint64_t endpos = ondisklog.tail + p.get_off(); + log.log.push_back(e); + last = e.version; + + // [repair] at end of log? + if (!p.end() && e.version == info.last_update) { + osd->clog.error() << info.pgid << " log has extra data at " + << endpos << "~" << (ondisklog.head-endpos) << " after " + << info.last_update << "\n"; + + dout(0) << "read_log " << endpos << " *** extra gunk at end of log, " + << "adjusting ondisklog.head" << dendl; + ondisklog.head = endpos; + break; + } + } + + if (reorder) { + dout(0) << "read_log reordering log" << dendl; + map m; + for (list::iterator p = log.log.begin(); p != log.log.end(); p++) + m[p->version] = *p; + log.log.clear(); + for (map::iterator p = m.begin(); p != m.end(); p++) + log.log.push_back(p->second); + } + } + + log.head = info.last_update; + log.index(); + + // build missing + if (info.last_complete < info.last_update) { + dout(10) << "read_log checking for missing items over interval (" << info.last_complete + << "," << info.last_update << "]" << dendl; + + set did; + for (list::reverse_iterator i = log.log.rbegin(); + i != log.log.rend(); + i++) { + if (i->version <= info.last_complete) break; + if (did.count(i->soid)) continue; + did.insert(i->soid); + + if (i->is_delete()) continue; + + bufferlist bv; + int r = osd->store->getattr(coll, i->soid, OI_ATTR, bv); + if (r >= 0) { + object_info_t oi(bv); + if (oi.version < i->version) { + dout(15) << "read_log missing " << *i << " (have " << oi.version << ")" << dendl; + missing.add(i->soid, i->version, oi.version); + } + } else { + dout(15) << "read_log missing " << *i << dendl; + missing.add(i->soid, i->version, eversion_t()); + } + } + for (map::reverse_iterator i = + ondisklog.divergent_priors.rbegin(); + i != ondisklog.divergent_priors.rend(); + ++i) { + if (i->first <= info.last_complete) break; + if (did.count(i->second)) continue; + did.insert(i->second); + bufferlist bv; + int r = osd->store->getattr(coll, i->second, OI_ATTR, bv); + if (r >= 0) { + object_info_t oi(bv); + /** + * 1) we see this entry in the divergent priors mapping + * 2) we didn't see an entry for this object in the log + * + * From 1 & 2 we know that either the object does not exist + * or it is at the version specified in the divergent_priors + * map since the object would have been deleted atomically + * with the addition of the divergent_priors entry, an older + * version would not have been recovered, and a newer version + * would show up in the log above. + */ + assert(oi.version == i->first); + } else { + dout(15) << "read_log missing " << *i << dendl; + missing.add(i->second, i->first, eversion_t()); + } + } + } + dout(10) << "read_log done" << dendl; +} + /*------------ Recovery State Machine----------------*/ #undef dout_prefix #define dout_prefix (*_dout << context< RecoveryMachine >().pg->gen_prefix() \