From 597f69d9538eb1b373611feaf5e96f0ec4c92605 Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Thu, 24 Dec 2015 12:45:16 -0500 Subject: [PATCH] librgw: dir traversal state GC added To minimize overhead, traversal events are modeled as a linear sequence of fh_key, timestamp tuples. GC operates out-of-line in LIFO order, and includes a ref cycle. If the matching directory object can't be found by handle, it's already gone, and nothing happens. To avoid lock contention, scanning of the event sequence is done in groups of 24 by default, but in groups of sequence/4 if the event sequence grows larger than 500 events. Push READDIR events on successful component readdir (bucket or object listing partial result, as appropriate). Revise locking, timestamp and update link count in all cases. Link counts are approximate (metadata to represent it is not stored). Signed-off-by: Matt Benjamin --- src/rgw/rgw_file.cc | 77 ++++++++++++++++++++++++++++++++++++++++----- src/rgw/rgw_file.h | 44 ++++++++++++++++++++++---- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index c4aa8b646afaa..8fcefd0074141 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -122,7 +122,7 @@ namespace rgw { void RGWLibFS::close() { - flags |= FLAG_CLOSED; + state.flags |= FLAG_CLOSED; class ObjUnref { @@ -148,6 +148,60 @@ namespace rgw { void RGWLibFS::gc() { + using std::get; + using directory = RGWFileHandle::directory; + + static constexpr uint32_t max_ev = 24; + static constexpr uint16_t expire_s = 300; /* 5m */ + + struct timespec now; + event_vector ve; + bool stop = false; + std::deque &events = state.events; + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); + + do { + { + lock_guard guard(state.mtx); /* LOCKED */ + uint32_t _max_ev = + (events.size() < 500) ? max_ev : (events.size() / 4); + for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) { + event& ev = events.front(); + if (ev.ts.tv_sec < (now.tv_sec + expire_s)) { + stop = true; + break; + } + ve.push_back(ev); + events.pop_front(); + } + } /* anon */ + /* !LOCKED */ + for (auto& ev : ve) { + if (likely(ev.t == event::type::READDIR)) { + RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk); + if (rgw_fh) { + RGWFileHandle::directory* d; + if (unlikely(! rgw_fh->is_dir())) { + lsubdout(get_context(), rgw, 0) + << __func__ + << " BUG non-directory found with READDIR event " + << "(" << rgw_fh->bucket_name() << "," + << rgw_fh->object_name() << ")" + << dendl; + goto rele; + } + /* clear state */ + d = get(&rgw_fh->variant_type); + if (d) { + lock_guard guard(rgw_fh->mtx); + d->clear_state(); + } + rele: + unref(rgw_fh); + } /* rgw_fh */ + } /* event::type::READDIR */ + } /* ev */ + } while (! stop); } /* RGWLibFS::gc */ bool RGWFileHandle::reclaim() { @@ -158,27 +212,36 @@ namespace rgw { int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg, uint64_t *offset, bool *eof) { + using event = RGWLibFS::event; int rc = 0; + struct timespec now; CephContext* cct = fs->get_context(); + directory* d = parent->get_directory(); /* already type-checked */ + + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */ + + // XXXX finish marker handling if (is_root()) { RGWListBucketsRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset); rc = librgw.get_fe()->execute_req(&req); if (! rc) { - lock_guard guard(mtx); - (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &state.atime); + parent->set_nlink(3 + d->name_cache.size()); + state.atime = now; *eof = req.eof(); + event ev(event::type::READDIR, get_key(), state.atime); + fs->state.push_event(ev); } } else { - // XXX finish marker handling rgw_obj_key marker{"", ""}; RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset); rc = librgw.get_fe()->execute_req(&req); if (! rc) { - lock_guard guard(mtx); - /* XXX update link count (incorrectly) */ - parent->set_nlink(3 + *offset); + state.atime = now; + parent->set_nlink(3 + d->name_cache.size()); *eof = req.eof(); + event ev(event::type::READDIR, get_key(), state.atime); + fs->state.push_event(ev); } } return rc; diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h index aeb734530b819..85f548560d3eb 100644 --- a/src/rgw/rgw_file.h +++ b/src/rgw/rgw_file.h @@ -9,11 +9,14 @@ /* internal header */ #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -173,9 +176,13 @@ namespace rgw { directory() : flags(FLAG_NONE) {} - void set_overflow() { + void clear_state() { marker_cache.clear(); name_cache.clear(); + } + + void set_overflow() { + clear_state(); flags |= FLAG_OVERFLOW; } }; @@ -551,7 +558,33 @@ namespace rgw { static atomic fs_inst; std::string fsid; - uint32_t flags; + + using lock_guard = std::lock_guard; + using unique_lock = std::unique_lock; + + struct event + { + enum class type : uint8_t { READDIR } ; + type t; + const fh_key fhk; + struct timespec ts; + event(type t, const fh_key& k, const struct timespec& ts) + : t(t), fhk(k), ts(ts) {} + }; + + using event_vector = /* boost::small_vector */ + std::vector; + + struct state { + std::mutex mtx; + std::atomic flags; + std::deque events; + state() : flags(0) {} + void push_event(const event& ev) { + lock_guard guard(mtx); + events.push_back(ev); + } + } state; friend class RGWFileHandle; @@ -567,8 +600,7 @@ namespace rgw { cct->_conf->rgw_nfs_fhcache_size), fh_lru(cct->_conf->rgw_nfs_lru_lanes, cct->_conf->rgw_nfs_lru_lane_hiwat), - uid(_uid), key(_user_id, _key), - flags(0) { + uid(_uid), key(_user_id, _key) { /* no bucket may be named rgw_fs_inst-(.*) */ fsid = RGWFileHandle::root_name + "rgw_fs_inst-" + @@ -623,7 +655,7 @@ namespace rgw { LookupFHResult fhr { nullptr, RGWFileHandle::FLAG_NONE }; - if (flags & FLAG_CLOSED) + if (state.flags & FLAG_CLOSED) return fhr; RGWFileHandle::FHCache::Latch lat; @@ -688,7 +720,7 @@ namespace rgw { /* find existing RGWFileHandle */ RGWFileHandle* lookup_handle(struct rgw_fh_hk fh_hk) { - if (flags & FLAG_CLOSED) + if (state.flags & FLAG_CLOSED) return nullptr; RGWFileHandle::FHCache::Latch lat; -- 2.39.5