From: Sage Weil Date: Wed, 5 Jun 2013 05:42:52 +0000 (-0700) Subject: osd: do not use temp_coll for single-step pushes X-Git-Tag: v0.67-rc1~138^2~1^2~14 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=194e858c8f760cbdca2eac22dfc66d186cd7cb81;p=ceph.git osd: do not use temp_coll for single-step pushes If we are recovering an object in a single step, there is no need to write it to temp and then move it. Avoiding that is a very good thing when the FileStore has to do an fsync() for non-btrfs fs's. Signed-off-by: Sage Weil Conflicts: src/osd/ReplicatedPG.cc --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index a8b40b4b5cbd..3a664a94cbd1 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5390,6 +5390,7 @@ int ReplicatedPG::send_pull(int prio, int peer, void ReplicatedPG::submit_push_data( const ObjectRecoveryInfo &recovery_info, bool first, + bool complete, const interval_set &intervals_included, bufferlist data_included, bufferlist omap_header, @@ -5397,12 +5398,19 @@ void ReplicatedPG::submit_push_data( map &omap_entries, ObjectStore::Transaction *t) { + coll_t target_coll; + if (first && complete) + target_coll = coll; + else + target_coll = get_temp_coll(t); + if (first) { pg_log.revise_have(recovery_info.soid, eversion_t()); remove_snap_mapped_object(*t, recovery_info.soid); + t->remove(coll, recovery_info.soid); t->remove(get_temp_coll(t), recovery_info.soid); - t->touch(get_temp_coll(t), recovery_info.soid); - t->omap_setheader(get_temp_coll(t), recovery_info.soid, omap_header); + t->touch(target_coll, recovery_info.soid); + t->omap_setheader(target_coll, recovery_info.soid, omap_header); } uint64_t off = 0; for (interval_set::const_iterator p = intervals_included.begin(); @@ -5410,21 +5418,27 @@ void ReplicatedPG::submit_push_data( ++p) { bufferlist bit; bit.substr_of(data_included, off, p.get_len()); - t->write(get_temp_coll(t), recovery_info.soid, + t->write(target_coll, recovery_info.soid, p.get_start(), p.get_len(), bit); off += p.get_len(); } - t->omap_setkeys(get_temp_coll(t), recovery_info.soid, + t->omap_setkeys(target_coll, recovery_info.soid, omap_entries); - t->setattrs(get_temp_coll(t), recovery_info.soid, + t->setattrs(target_coll, recovery_info.soid, attrs); + + if (complete) { + if (!first) + t->collection_move(coll, target_coll, recovery_info.soid); + + submit_push_complete(recovery_info, t); + } } void ReplicatedPG::submit_push_complete(ObjectRecoveryInfo &recovery_info, ObjectStore::Transaction *t) { - t->collection_move(coll, get_temp_coll(t), recovery_info.soid); for (map >::const_iterator p = recovery_info.clone_subset.begin(); p != recovery_info.clone_subset.end(); @@ -5574,6 +5588,7 @@ void ReplicatedPG::handle_pull_response(OpRequestRef op) Context *onreadable_sync = 0; Context *oncomplete = 0; submit_push_data(pi.recovery_info, first, + complete, data_included, data, m->omap_header, m->attrset, @@ -5583,7 +5598,6 @@ void ReplicatedPG::handle_pull_response(OpRequestRef op) info.stats.stats.sum.num_keys_recovered += m->omap_entries.size(); if (complete) { - submit_push_complete(pi.recovery_info, t); info.stats.stats.sum.num_objects_recovered++; SnapSetContext *ssc; @@ -5671,15 +5685,13 @@ void ReplicatedPG::handle_push(OpRequestRef op) Context *onreadable_sync = 0; submit_push_data(m->recovery_info, first, + complete, m->data_included, data, m->omap_header, m->attrset, m->omap_entries, t); - if (complete) - submit_push_complete(m->recovery_info, - t); int r = osd->store-> queue_transaction( diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index d38fdcd51f2d..099f8b5d93b6 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -566,6 +566,7 @@ protected: ObjectRecoveryProgress progress); void submit_push_data(const ObjectRecoveryInfo &recovery_info, bool first, + bool complete, const interval_set &intervals_included, bufferlist data_included, bufferlist omap_header,