*
*/
-
#ifndef __MOSDPGTRIM_H
#define __MOSDPGTRIM_H
goto out;
}
assert(pg);
- assert(from == pg->acting[0]);
- ObjectStore::Transaction t;
- pg->trim(t, m->trim_to);
- pg->write_info(t);
+ if (pg->is_primary()) {
+ // peer is informing us of their last_complete_ondisk
+ dout(10) << *pg << " replica osd" << from << " lcod " << m->trim_to << dendl;
+ pg->peer_last_complete_ondisk[from] = m->trim_to;
+ if (pg->calc_min_last_complete_ondisk()) {
+ dout(10) << *pg << " min lcod now " << pg->min_last_complete_ondisk << dendl;
+ pg->trim_peers();
+ }
+ } else {
+ // primary is instructing us to trim
+ ObjectStore::Transaction t;
+ pg->trim(t, m->trim_to);
+ pg->write_info(t);
+ store->apply_transaction(t);
+ }
pg->unlock();
-
- store->apply_transaction(t);
}
out:
#include "messages/MOSDPGLog.h"
#include "messages/MOSDPGRemove.h"
#include "messages/MOSDPGInfo.h"
+#include "messages/MOSDPGTrim.h"
#include "messages/MOSDSubOp.h"
#include "messages/MOSDSubOpReply.h"
t.zero(0, log_oid, 0, ondisklog.bottom & ~4095);
}
+void PG::trim_peers()
+{
+ dout(10) << "trim_peers" << dendl;
+ for (unsigned i=1; i<acting.size(); i++)
+ osd->messenger->send_message(new MOSDPGTrim(osd->osdmap->get_epoch(), info.pgid, min_last_complete_ondisk),
+ osd->osdmap->get_inst(acting[i]));
+}
+
void PG::add_log_entry(Log::Entry& e, bufferlist& log_bl)
{
void trim(ObjectStore::Transaction &t, eversion_t s);
void trim_write_ahead(eversion_t last_update);
+
ostream& print(ostream& out) const;
};
if (a < min)
min = a;
}
+ if (min == min_last_complete_ondisk)
+ return false;
min_last_complete_ondisk = min;
return true;
}
void read_log(ObjectStore *store);
void trim(ObjectStore::Transaction& t, eversion_t v);
void trim_ondisklog_to(ObjectStore::Transaction& t, eversion_t v);
+ void trim_peers();
void read_state(ObjectStore *store);
coll_t make_snap_collection(ObjectStore::Transaction& t, snapid_t sn);
if (same_since == info.history.same_since) {
dout(10) << "_committed last_complete " << last_complete << " now ondisk" << dendl;
last_complete_ondisk = last_complete;
+
+ if (last_complete_ondisk == info.last_update) {
+ if (is_replica()) {
+ // we are fully up to date. tell the primary!
+ osd->messenger->send_message(new MOSDPGTrim(osd->osdmap->get_epoch(), info.pgid,
+ last_complete_ondisk),
+ osd->osdmap->get_inst(get_primary()));
+ } else if (is_primary()) {
+ // we are the primary. tell replicas to trim?
+ if (calc_min_last_complete_ondisk())
+ trim_peers();
+ }
+ }
+
} else {
dout(10) << "_committed pg has changed, not touching last_complete_ondisk" << dendl;
}
if (is_all_uptodate()) {
dout(-7) << "recover_primary complete" << dendl;
finish_recovery();
- trim_replicas();
} else {
dout(-10) << "recover_primary primary now complete, starting peer recovery" << dendl;
}
if (is_all_uptodate()) {
finish_recovery();
- trim_replicas();
} else {
dout(10) << "recover_replicas not all uptodate, acting " << acting << ", uptodate " << uptodate_set << dendl;
}
}
-void ReplicatedPG::trim_replicas()
-{
- dout(10) << "trim_replicas" << dendl;
-
- return; // hmm FIXME
-
-
- // trim myself
- eversion_t trim_to;
-
- for (unsigned i=1; i<acting.size(); i++)
- osd->messenger->send_message(new MOSDPGTrim(osd->osdmap->get_epoch(), info.pgid, trim_to),
- osd->osdmap->get_inst(acting[i]));
-}
-
/** clean_up_local
* remove any objects that we're storing but shouldn't.
void finish_recovery_op();
int recover_primary(int max);
int recover_replicas(int max);
- void trim_replicas();
void sub_op_modify(MOSDSubOp *op);
void sub_op_modify_ondisk(MOSDSubOp *op, int ackerosd, eversion_t last_complete);