From 4d6a6bba70fd3cb17003db60d8ce9849ac573810 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 Jul 2008 10:21:47 -0700 Subject: [PATCH] mds: cache snap vectors, adjust snap_highwater meaning a bit --- src/client/Client.cc | 66 ++++++++++++++++---------------------- src/client/Client.h | 5 +-- src/mds/MDCache.cc | 9 +++--- src/mds/Migrator.cc | 4 +-- src/mds/Server.cc | 48 +++++++++++++++++---------- src/mds/snap.cc | 34 +++++++++++++------- src/mds/snap.h | 14 ++++---- src/messages/MClientSnap.h | 34 ++++++++------------ 8 files changed, 112 insertions(+), 102 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 2b1abab97981c..5d6dba39542f5 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1580,49 +1580,39 @@ void Client::handle_snap(MClientSnap *m) { dout(10) << "handle_snap " << *m << dendl; - SnapRealm *realm = get_snap_realm(m->realm); - - switch (m->op) { - case CEPH_SNAP_OP_UPDATE: - maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps); - break; - - case CEPH_SNAP_OP_SPLIT: - { - /* - * fixme: this blindly moves the inode from whatever its prior - * realm is. this needs to somehow be safe from races from - * multiple mds's... - */ - for (list::iterator p = m->split_inos.begin(); - p != m->split_inos.end(); - p++) { - if (inode_map.count(*p)) { - Inode *in = inode_map[*p]; - if (in->snaprealm) { - if (in->snaprealm->created > m->snap_created) { - dout(10) << " NOT moving " << *in << " from _newer_ realm " - << *in->snaprealm << dendl; - continue; - } - put_snap_realm(in->snaprealm); - } - dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl; - in->snaprealm = realm; - realm->nref++; + // split? + if (m->split) { + SnapRealm *realm = get_snap_realm(m->split); + realm->created = m->snap_created; + dout(10) << " splitting off " << *realm << dendl; + for (list::iterator p = m->split_inos.begin(); + p != m->split_inos.end(); + p++) { + if (inode_map.count(*p)) { + Inode *in = inode_map[*p]; + if (!in->snaprealm) + continue; + if (in->snaprealm->created > m->snap_created) { + dout(10) << " NOT moving " << *in << " from _newer_ realm " + << *in->snaprealm << dendl; + continue; } + dout(10) << " moving " << *in << " from " << *in->snaprealm << dendl; + put_snap_realm(in->snaprealm); + in->snaprealm = realm; + realm->nref++; } - // update it too - maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps); } - break; - - default: - assert(0); + put_snap_realm(realm); } - // done. - put_snap_realm(realm); + for (map >::iterator p = m->realms.begin(); + p != m->realms.end(); + p++) { + SnapRealm *realm = get_snap_realm(p->first); + maybe_update_snaprealm(realm, 0, m->snap_highwater, p->second); + put_snap_realm(realm); + } delete m; } diff --git a/src/client/Client.h b/src/client/Client.h index 3390812bccb24..bd9da386466c9 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -138,8 +138,9 @@ struct SnapRealm { dirino(i), nref(0), created(0), highwater(0) { } bool maybe_update(snapid_t c, snapid_t sh, vector &s) { - created = c; - if (sh > highwater) { + if (c) + created = c; + if (sh >= highwater) { highwater = sh; snaps = s; return true; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3d7c0d1304f1a..506b9e0bdd890 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2797,9 +2797,9 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& cap->pending(), cap->wanted(), cap->get_mseq()); - realm->get_snap_vector(reap->get_snaps()); + reap->get_snaps() = *realm->get_snap_vector(); reap->set_snap_created(realm->created); - reap->set_snap_highwater(realm->highwater); + reap->set_snap_highwater(realm->snap_highwater); mds->messenger->send_message(reap, session->inst); } @@ -2970,14 +2970,13 @@ void MDCache::do_file_recover() CInode *in = *file_recover_queue.begin(); file_recover_queue.erase(in); - vector snaps; - in->find_containing_snaprealm()->get_snap_vector(snaps); + vector *snaps = in->find_containing_snaprealm()->get_snap_vector(); if (in->inode.max_size > in->inode.size) { dout(10) << "do_file_recover starting " << in->inode.size << "/" << in->inode.max_size << " " << *in << dendl; file_recovering.insert(in); - mds->filer->probe(in->inode.ino, &in->inode.layout, CEPH_NOSNAP, snaps, + mds->filer->probe(in->inode.ino, &in->inode.layout, CEPH_NOSNAP, *snaps, in->inode.max_size, &in->inode.size, false, 0, new C_MDC_Recover(this, in)); } else { diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 53e8543e035fc..1e5d159c991da 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2061,9 +2061,9 @@ void Migrator::finish_import_inode_caps(CInode *in, int from, cap->pending(), cap->wanted(), cap->get_mseq()); - realm->get_snap_vector(caps->get_snaps()); + caps->get_snaps() = *realm->get_snap_vector(); caps->set_snap_created(realm->created); - caps->set_snap_highwater(realm->highwater); + caps->set_snap_highwater(realm->snap_highwater); mds->send_message_client(caps, session->inst); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 0de2fa6d269fe..23b91844f51f7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4434,8 +4434,8 @@ void Server::_do_open(MDRequest *mdr, CInode *cur) reply->set_file_caps_mseq(cap->get_mseq()); SnapRealm *realm = cur->find_containing_snaprealm(); - realm->get_snap_vector(reply->get_snaps()); - reply->set_snap_info(realm->inode->ino(), realm->created, realm->highwater); + reply->get_snaps() = *realm->get_snap_vector(); + reply->set_snap_info(realm->inode->ino(), realm->created, realm->snap_highwater); dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl; //reply->set_file_data_version(fdv); @@ -4726,40 +4726,48 @@ void Server::handle_client_mksnap(MDRequest *mdr) diri->snaprealm->snaps[snapid] = info; dout(10) << "snaprealm now " << *diri->snaprealm << dendl; - // build new snaps list - vector snaps; - diri->snaprealm->get_snap_vector(snaps); - // notify clients of update|split list split_inos; if (split_parent) for (xlist::iterator p = diri->snaprealm->inodes_with_caps.begin(); !p.end(); ++p) split_inos.push_back((*p)->ino()); + list realms; + map updates; list q; q.push_back(diri->snaprealm); while (!q.empty()) { SnapRealm *realm = q.front(); q.pop_front(); - dout(10) << " updating caps under realm " << *realm + // build new snaps list + vector *snaps; + snaps = diri->snaprealm->update_snap_vector(snapid); + + dout(10) << " realm " << *realm + << " snaps " << *snaps << " on " << *realm->inode << dendl; for (map >::iterator p = realm->client_caps.begin(); p != realm->client_caps.end(); p++) { assert(!p->second.empty()); - - MClientSnap *update = new MClientSnap(split_parent ? CEPH_SNAP_OP_SPLIT:CEPH_SNAP_OP_UPDATE, - realm->inode->ino()); - update->snaps = snaps; - update->snap_created = diri->snaprealm->created; - update->snap_highwater = diri->snaprealm->highwater; - update->split_parent = split_parent; - update->split_inos = split_inos; - mds->send_message_client(update, p->first); + MClientSnap *update; + update = updates[p->first]; + if (!update) { + update = new MClientSnap; + update->snap_highwater = snapid; + if (split_parent) { + update->snap_created = diri->snaprealm->created; + update->split = snapid; + update->split_inos = split_inos; + split_parent = 0; + } + updates[p->first] = update; + } + update->realms[realm->inode->ino()] = *snaps; } - + // notify for active children, too. dout(10) << " " << realm << " open_children are " << realm->open_children << dendl; for (set::iterator p = realm->open_children.begin(); @@ -4768,6 +4776,12 @@ void Server::handle_client_mksnap(MDRequest *mdr) q.push_back(*p); } + // send + for (map::iterator p = updates.begin(); + p != updates.end(); + p++) + mds->send_message_client(p->second, p->first); + // yay reply_request(mdr, 0, diri); } diff --git a/src/mds/snap.cc b/src/mds/snap.cc index 01888f03bdbbe..ffc8fcf71bca1 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -69,27 +69,39 @@ void SnapRealm::get_snap_set(set &s, snapid_t first, snapid_t last) MAX(first, p->second.first), MIN(last, p->first)); } - - if (!s.empty()) { - snapid_t t = *s.rbegin(); - if (highwater < t) - highwater = t; - } } /* * build vector in reverse sorted order */ -void SnapRealm::get_snap_vector(vector &v) +vector *SnapRealm::get_snap_vector() { + if (!cached_snaps.size()) { + dout(10) << "get_snap_vector " << cached_snaps << " (cached)" << dendl; + return &cached_snaps; + } + set s; - get_snap_set(s); - v.resize(s.size()); + get_snap_set(s, 0, CEPH_NOSNAP); + cached_snaps.resize(s.size()); int i = 0; for (set::reverse_iterator p = s.rbegin(); p != s.rend(); p++) - v[i++] = *p; + cached_snaps[i++] = *p; - dout(10) << "get_snap_vector " << v << " (highwater " << highwater << ")" << dendl; + dout(10) << "get_snap_vector " << cached_snaps << dendl; + //" (highwater " << cached_snaps_stamp << ")" << dendl; + return &cached_snaps; +} + +vector *SnapRealm::update_snap_vector(snapid_t creating) +{ + if (cached_snaps.empty() && !snap_highwater) { + snap_highwater = creating; + return get_snap_vector(); + } + snap_highwater = creating; + cached_snaps.push_back(creating); + return &cached_snaps; } diff --git a/src/mds/snap.h b/src/mds/snap.h index ef12bb14c1c5b..3f06c7ecf0d43 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -102,25 +102,27 @@ struct SnapRealm { MDCache *mdcache; CInode *inode; - snapid_t highwater; // largest snap this realm has exposed to clients (implicitly or explicitly) - // caches? SnapRealm *open_parent; set open_children; // active children that are currently open - //set cached_snaps; + vector cached_snaps; + snapid_t snap_highwater; xlist inodes_with_caps; // for efficient realm splits map > client_caps; // to identify clients who need snap notifications SnapRealm(MDCache *c, CInode *in) : created(0), - mdcache(c), inode(in), highwater(0), - open_parent(0) {} + mdcache(c), inode(in), + open_parent(0), + snap_highwater(0) + { } bool open_parents(MDRequest *mdr); void get_snap_set(set& s, snapid_t first=0, snapid_t last=CEPH_NOSNAP); - void get_snap_vector(vector& s); + vector *get_snap_vector(); + vector *update_snap_vector(snapid_t adding=0); void split_at(SnapRealm *child); diff --git a/src/messages/MClientSnap.h b/src/messages/MClientSnap.h index bf8f8628e5a97..64971f742ee94 100644 --- a/src/messages/MClientSnap.h +++ b/src/messages/MClientSnap.h @@ -27,47 +27,39 @@ struct MClientSnap : public Message { } } - __u32 op; - inodeno_t realm; + map > realms; // new snap state snapid_t snap_created, snap_highwater; - vector snaps; // (for split only) - inodeno_t split_parent; + inodeno_t split; list split_inos; - MClientSnap() : Message(CEPH_MSG_CLIENT_SNAP) {} - MClientSnap(int o, inodeno_t r) : + MClientSnap() : Message(CEPH_MSG_CLIENT_SNAP), - op(o), realm(r), - split_parent(0) {} + snap_created(0), snap_highwater(0), + split(0) {} const char *get_type_name() { return "Csnap"; } void print(ostream& out) { - out << "client_snap(" << get_opname(op) << " " << realm - << " " << snaps; - if (split_parent) - out << " split_parent=" << split_parent; - out << ")"; + out << "client_snap("; + if (split) + out << "split=" << split << " "; + out << realms << ")"; } void encode_payload() { - ::encode(op, payload); - ::encode(realm, payload); + ::encode(realms, payload); ::encode(snap_highwater, payload); - ::encode(snaps, payload); - ::encode(split_parent, payload); + ::encode(split, payload); ::encode(split_inos, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); - ::decode(op, p); - ::decode(realm, p); + ::decode(realms, p); ::decode(snap_highwater, p); - ::decode(snaps, p); - ::decode(split_parent, p); + ::decode(split, p); ::decode(split_inos, p); assert(p.end()); } -- 2.39.5