Among other things, a stray/bad REPLAY flag can trigger a false activation
like so:
osd/PG.cc: In function 'void PG::activate(ObjectStore::Transaction&, std::list<Context*, std::allocator<Context*> >&, std::map<int, MOSDPGInfo*, std::less<int>, std::allocator<std::pair<const int, MOSDPGInfo*> > >*)':
osd/PG.cc:1641: FAILED assert(peer_info.count(peer))
1: (PG::activate(ObjectStore::Transaction&, std::list<Context*, std::allocator<Context*> >&, std::map<int, MOSDPGInfo*, std::less<int>, std::allocator<std::pair<int const, MOSDPGInfo*> > >*)+0x817) [0x75e637]
2: (OSD::activate_pg(pg_t, utime_t)+0x136) [0x6bfe86]
3: (OSD::check_replay_queue()+0x134) [0x6c0074]
4: (OSD::tick()+0x1db) [0x6d96eb]
5: (OSD::C_Tick::finish(int)+0x1c) [0x74844e]
6: (SafeTimer::EventWrapper::finish(int)+0x6d) [0x7fa2cf]
7: (Timer::timer_entry()+0x454) [0x7fb52a]
8: (Timer::TimerThread::entry()+0x19) [0x647c09]
9: (Thread::_entry_func(void*)+0x20) [0x659f18]
10: /lib/libpthread.so.0 [0x7fceb73d473a]
11: (clone()+0x6d) [0x7fceb65fe69d]
pg->state_clear(PG_STATE_DOWN);
pg->state_clear(PG_STATE_PEERING); // we'll need to restart peering
pg->state_clear(PG_STATE_DEGRADED);
+ pg->state_clear(PG_STATE_REPLAY);
if (pg->is_primary()) {
if (osdmap->get_pg_size(pg->info.pgid) != pg->acting.size())
if (role == 0) {
// i am (still) primary. but my replica set changed.
pg->state_clear(PG_STATE_CLEAN);
- pg->state_clear(PG_STATE_REPLAY);
dout(10) << *pg << " " << oldacting << " -> " << pg->acting
<< ", replicas changed" << dendl;
if (lost)
out << " l=" << lost;
}
- if (pg.info.snap_trimq.size())
- out << " snaptrimq=" << pg.info.snap_trimq;
+ //if (pg.info.snap_trimq.size())
+ //out << " snaptrimq=" << pg.info.snap_trimq;
if (pg.deleting)
out << " DELETING";