From: Sage Weil Date: Thu, 28 Oct 2010 18:53:38 +0000 (-0700) Subject: objecter: refactor interface with object_locator_t X-Git-Tag: v0.23~90 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ee27a61b01d4a500a9d66c23b11ba3de178f145e;p=ceph.git objecter: refactor interface with object_locator_t This paves the way for a locator that lets the user specify an arbitrary string to hash for placement (instead of the object name). It also captures everything that would affect placement (pool, preferred, etc.) in a single type. Just the client side; no server side or protocol changes yet. Signed-off-by: Sage Weil --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 495a1e46d96..f8378cb2864 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -6357,7 +6357,7 @@ int Client::get_file_stripe_address(int fd, loff_t offset, string& address) assert(extents.size() == 1); // now we have the object and its 'layout' - pg_t pg = (pg_t)extents[0].layout.ol_pgid; + pg_t pg = osdmap->object_locator_to_pg(extents[0].oid, extents[0].oloc); vector osds; osdmap->pg_to_osds(pg, osds); if (!osds.size()) diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index d589dbe8ff3..97c228c479d 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -1359,10 +1359,10 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t ol = t.get_int(); object_t oid = file_object_t(oh, ol); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); + object_locator_t oloc(CEPH_CASDATA_RULE); uint64_t size; utime_t mtime; - client->objecter->stat(oid, layout, CEPH_NOSNAP, &size, &mtime, 0, new C_SafeCond(&lock, &cond, &ack)); + client->objecter->stat(oid, oloc, CEPH_NOSNAP, &size, &mtime, 0, new C_SafeCond(&lock, &cond, &ack)); while (!ack) cond.Wait(lock); lock.Unlock(); } @@ -1372,10 +1372,10 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t off = t.get_int(); int64_t len = t.get_int(); object_t oid = file_object_t(oh, ol); + object_locator_t oloc(CEPH_CASDATA_RULE); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); bufferlist bl; - client->objecter->read(oid, layout, off, len, CEPH_NOSNAP, &bl, 0, new C_SafeCond(&lock, &cond, &ack)); + client->objecter->read(oid, oloc, off, len, CEPH_NOSNAP, &bl, 0, new C_SafeCond(&lock, &cond, &ack)); while (!ack) cond.Wait(lock); lock.Unlock(); } @@ -1385,13 +1385,13 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t off = t.get_int(); int64_t len = t.get_int(); object_t oid = file_object_t(oh, ol); + object_locator_t oloc(CEPH_CASDATA_RULE); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); bufferptr bp(len); bufferlist bl; bl.push_back(bp); SnapContext snapc; - client->objecter->write(oid, layout, off, len, snapc, bl, g_clock.now(), 0, + client->objecter->write(oid, oloc, off, len, snapc, bl, g_clock.now(), 0, new C_SafeCond(&lock, &cond, &ack), safeg->new_sub()); while (!ack) cond.Wait(lock); @@ -1403,10 +1403,10 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only) int64_t off = t.get_int(); int64_t len = t.get_int(); object_t oid = file_object_t(oh, ol); + object_locator_t oloc(CEPH_CASDATA_RULE); lock.Lock(); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); SnapContext snapc; - client->objecter->zero(oid, layout, off, len, snapc, g_clock.now(), 0, + client->objecter->zero(oid, oloc, off, len, snapc, g_clock.now(), 0, new C_SafeCond(&lock, &cond, &ack), safeg->new_sub()); while (!ack) cond.Wait(lock); @@ -1633,7 +1633,7 @@ int SyntheticClient::dump_placement(string& fn) { for (vector::iterator i = extents.begin(); i != extents.end(); ++i) { - int osd = client->osdmap->get_pg_primary(pg_t(i->layout.ol_pgid)); + int osd = client->osdmap->get_pg_primary(client->osdmap->object_locator_to_pg(i->oid, i->oloc)); // run through all the buffer extents for (map<__u32,__u32>::iterator j = i ->buffer_extents.begin(); @@ -1914,7 +1914,8 @@ int SyntheticClient::overload_osd_0(int n, int size, int wrsize) { int SyntheticClient::check_first_primary(int fh) { vector extents; client->enumerate_layout(fh, extents, 1, 0); - return client->osdmap->get_pg_primary(pg_t((extents.begin())->layout.ol_pgid)); + return client->osdmap->get_pg_primary(client->osdmap->object_locator_to_pg(extents.begin()->oid, + extents.begin()->oloc)); } int SyntheticClient::rm_file(string& fn) @@ -2161,7 +2162,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight) if (time_to_stop()) break; object_t oid = file_object_t(999, i); - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); + object_locator_t oloc(CEPH_CASDATA_RULE); SnapContext snapc; if (i % inflight == 0) { @@ -2171,7 +2172,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight) starts.push_back(g_clock.now()); client->client_lock.Lock(); - client->objecter->write(oid, layout, 0, osize, snapc, bl, g_clock.now(), 0, + client->objecter->write(oid, oloc, 0, osize, snapc, bl, g_clock.now(), 0, new C_Ref(lock, cond, &unack), new C_Ref(lock, cond, &unsafe)); client->client_lock.Unlock(); @@ -2263,8 +2264,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, o = (long)trunc(pow(r, rskew) * (double)nobj); // exponentially skew towards 0 } object_t oid = file_object_t(999, o); - - ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE); + object_locator_t oloc(CEPH_CASDATA_RULE); SnapContext snapc; client->client_lock.Lock(); @@ -2284,7 +2284,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, op.op.op = CEPH_OSD_OP_STARTSYNC; m.ops.push_back(op); } - client->objecter->mutate(oid, layout, m, snapc, g_clock.now(), 0, + client->objecter->mutate(oid, oloc, m, snapc, g_clock.now(), 0, NULL, new C_Ref(lock, cond, &unack)); /*client->objecter->write(oid, layout, 0, osize, snapc, bl, 0, new C_Ref(lock, cond, &unack), @@ -2292,7 +2292,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, } else { dout(10) << "read from " << oid << dendl; bufferlist inbl; - client->objecter->read(oid, layout, 0, osize, CEPH_NOSNAP, &inbl, 0, + client->objecter->read(oid, oloc, 0, osize, CEPH_NOSNAP, &inbl, 0, new C_Ref(lock, cond, &unack)); } client->client_lock.Unlock(); diff --git a/src/include/object.h b/src/include/object.h index 1039e2f783d..bb668a884f9 100644 --- a/src/include/object.h +++ b/src/include/object.h @@ -101,6 +101,33 @@ struct file_object_t { }; +// a locator constrains the placement of an object. mainly, which pool +// does it go in. +struct object_locator_t { + int pool; + int preferred; + string key; + + object_locator_t(int po=-1, int pre=-1) : pool(po), preferred(pre) {} + + int get_pool() const { + return pool; + } + int get_preferred() const { + return preferred; + } +}; + +inline ostream& operator<<(ostream& out, const object_locator_t& loc) +{ + out << "@" << loc.pool; + if (loc.preferred >= 0) + out << "p" << loc.preferred; + if (loc.key.length()) + out << ":" << loc.key; + return out; +} + // --------------------------- // snaps diff --git a/src/librados.cc b/src/librados.cc index c20a7cfbb13..fd359816d57 100644 --- a/src/librados.cc +++ b/src/librados.cc @@ -580,8 +580,8 @@ int RadosClient::selfmanaged_snap_rollback_object(const rados_pool_t pool, { int reply; PoolCtx* ctx = (PoolCtx *) pool; - ceph_object_layout layout = objecter->osdmap - ->make_object_layout(oid, ctx->poolid); + + object_locator_t oloc(ctx->poolid); Mutex mylock("RadosClient::snap_rollback::mylock"); Cond cond; @@ -589,7 +589,7 @@ int RadosClient::selfmanaged_snap_rollback_object(const rados_pool_t pool, Context *onack = new C_SafeCond(&mylock, &cond, &done, &reply); lock.Lock(); - objecter->rollback_object(oid, layout, snapc, snapid, + objecter->rollback_object(oid, oloc, snapc, snapid, g_clock.now(), onack, NULL); lock.Unlock(); @@ -801,8 +801,8 @@ int RadosClient::create(PoolCtx& pool, const object_t& oid, bool exclusive) Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->create(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->create(oid, oloc, pool.snapc, ut, 0, (exclusive ? CEPH_OSD_OP_FLAG_EXCL : 0), onack, NULL); lock.Unlock(); @@ -854,8 +854,8 @@ int RadosClient::write(PoolCtx& pool, const object_t& oid, off_t off, bufferlist Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->write(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->write(oid, oloc, off, len, pool.snapc, bl, ut, 0, onack, NULL); lock.Unlock(); @@ -887,8 +887,8 @@ int RadosClient::write_full(PoolCtx& pool, const object_t& oid, bufferlist& bl) Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->write_full(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->write_full(oid, oloc, pool.snapc, bl, ut, 0, onack, NULL); lock.Unlock(); @@ -910,8 +910,8 @@ int RadosClient::aio_read(PoolCtx& pool, const object_t oid, off_t off, bufferli c->pbl = pbl; Mutex::Locker l(lock); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->read(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->read(oid, oloc, off, len, pool.snap_seq, &c->bl, 0, onack); @@ -926,8 +926,8 @@ int RadosClient::aio_read(PoolCtx& pool, const object_t oid, off_t off, char *bu c->maxlen = len; Mutex::Locker l(lock); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->read(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->read(oid, oloc, off, len, pool.snap_seq, &c->bl, 0, onack); @@ -944,8 +944,8 @@ int RadosClient::aio_write(PoolCtx& pool, const object_t oid, off_t off, const b Context *onsafe = new C_aio_Safe(c); Mutex::Locker l(lock); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->write(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->write(oid, oloc, off, len, pool.snapc, bl, ut, 0, onack, onsafe); @@ -962,8 +962,8 @@ int RadosClient::aio_write_full(PoolCtx& pool, const object_t oid, const bufferl Context *onsafe = new C_aio_Safe(c); Mutex::Locker l(lock); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->write_full(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->write_full(oid, oloc, pool.snapc, bl, ut, 0, onack, onsafe); @@ -982,8 +982,8 @@ int RadosClient::remove(PoolCtx& pool, const object_t& oid) Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->remove(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->remove(oid, oloc, snapc, ut, 0, onack, NULL); lock.Unlock(); @@ -1012,8 +1012,8 @@ int RadosClient::trunc(PoolCtx& pool, const object_t& oid, size_t size) Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->trunc(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->trunc(oid, oloc, pool.snapc, ut, 0, size, 0, onack, NULL); @@ -1041,10 +1041,10 @@ int RadosClient::tmap_update(PoolCtx& pool, const object_t& oid, bufferlist& cmd lock.Lock(); SnapContext snapc; - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); + object_locator_t oloc(pool.poolid); ObjectOperation wr; wr.tmap_update(cmdbl); - objecter->mutate(oid, layout, wr, snapc, ut, 0, onack, NULL); + objecter->mutate(oid, oloc, wr, snapc, ut, 0, onack, NULL); lock.Unlock(); mylock.Lock(); @@ -1069,10 +1069,10 @@ int RadosClient::exec(PoolCtx& pool, const object_t& oid, const char *cls, const lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); + object_locator_t oloc(pool.poolid); ObjectOperation rd; rd.call(cls, method, inbl); - objecter->read(oid, layout, rd, pool.snap_seq, &outbl, 0, onack); + objecter->read(oid, oloc, rd, pool.snap_seq, &outbl, 0, onack); lock.Unlock(); mylock.Lock(); @@ -1094,8 +1094,8 @@ int RadosClient::read(PoolCtx& pool, const object_t& oid, off_t off, bufferlist& Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->read(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->read(oid, oloc, off, len, pool.snap_seq, &bl, 0, onack); lock.Unlock(); @@ -1133,8 +1133,8 @@ int RadosClient::stat(PoolCtx& pool, const object_t& oid, uint64_t *psize, time_ psize = &size; lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->stat(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->stat(oid, oloc, pool.snap_seq, psize, &mtime, 0, onack); lock.Unlock(); @@ -1163,8 +1163,8 @@ int RadosClient::getxattr(PoolCtx& pool, const object_t& oid, const char *name, Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->getxattr(oid, layout, + object_locator_t oloc(pool.poolid); + objecter->getxattr(oid, oloc, name, pool.snap_seq, &bl, 0, onack); lock.Unlock(); @@ -1196,9 +1196,10 @@ int RadosClient::rmxattr(PoolCtx& pool, const object_t& oid, const char *name) Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); + object_locator_t oloc(pool.poolid); + lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->removexattr(oid, layout, name, + objecter->removexattr(oid, oloc, name, pool.snapc, ut, 0, onack, NULL); lock.Unlock(); @@ -1230,8 +1231,8 @@ int RadosClient::setxattr(PoolCtx& pool, const object_t& oid, const char *name, Context *onack = new C_SafeCond(&mylock, &cond, &done, &r); lock.Lock(); - ceph_object_layout layout = objecter->osdmap->make_object_layout(oid, pool.poolid); - objecter->setxattr(oid, layout, name, + object_locator_t oloc(pool.poolid); + objecter->setxattr(oid, oloc, name, pool.snapc, bl, ut, 0, onack, NULL); lock.Unlock(); @@ -1263,9 +1264,9 @@ int RadosClient::getxattrs(PoolCtx& pool, const object_t& oid, maposdmap->make_object_layout(oid, pool.poolid); + object_locator_t oloc(pool.poolid); map aset; - objecter->getxattrs(oid, layout, pool.snap_seq, + objecter->getxattrs(oid, oloc, pool.snap_seq, aset, 0, onack); lock.Unlock(); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 266a7527e9b..6060981c654 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1123,12 +1123,10 @@ void CDir::fetch(Context *c, const string& want_dn, bool ignore_authpinnability) // start by reading the first hunk of it C_Dir_Fetch *fin = new C_Dir_Fetch(this, want_dn); object_t oid = get_ondisk_object(); - OSDMap *osdmap = cache->mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - cache->mds->mdsmap->get_metadata_pg_pool()); + object_locator_t oloc(cache->mds->mdsmap->get_metadata_pg_pool()); ObjectOperation rd; rd.tmap_get(); - cache->mds->objecter->read(oid, ol, rd, CEPH_NOSNAP, &fin->bl, 0, fin); + cache->mds->objecter->read(oid, oloc, rd, CEPH_NOSNAP, &fin->bl, 0, fin); } void CDir::_fetched(bufferlist &bl, const string& want_dn) @@ -1681,13 +1679,11 @@ void CDir::_commit(version_t want) SnapContext snapc; object_t oid = get_ondisk_object(); - OSDMap *osdmap = cache->mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - cache->mds->mdsmap->get_metadata_pg_pool()); + object_locator_t oloc(cache->mds->mdsmap->get_metadata_pg_pool()); m.priority = CEPH_MSG_PRIO_LOW; // set priority lower than journal! - cache->mds->objecter->mutate(oid, ol, m, snapc, g_clock.now(), 0, + cache->mds->objecter->mutate(oid, oloc, m, snapc, g_clock.now(), 0, NULL, new C_Dir_Committed(this, get_version(), inode->inode.last_renamed_version) ); } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 5f400bdabbe..00db3a723f6 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -845,11 +845,9 @@ void CInode::store(Context *fin) char n[30]; snprintf(n, sizeof(n), "%llx.%08llx.inode", (long long unsigned)ino(), (long long unsigned)frag_t()); object_t oid(n); - OSDMap *osdmap = mdcache->mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mdcache->mds->mdsmap->get_metadata_pg_pool()); + object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pg_pool()); - mdcache->mds->objecter->mutate(oid, ol, m, snapc, g_clock.now(), 0, + mdcache->mds->objecter->mutate(oid, oloc, m, snapc, g_clock.now(), 0, NULL, new C_Inode_Stored(this, get_version(), fin) ); } @@ -883,25 +881,17 @@ void CInode::fetch(Context *fin) char n[30]; snprintf(n, sizeof(n), "%llx.%08llx", (long long unsigned)ino(), (long long unsigned)frag_t()); object_t oid(n); + object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pg_pool()); ObjectOperation rd; rd.getxattr("inode"); - OSDMap *osdmap = mdcache->mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mdcache->mds->mdsmap->get_metadata_pg_pool()); + mdcache->mds->objecter->read(oid, oloc, rd, CEPH_NOSNAP, &c->bl, 0, gather->new_sub()); - mdcache->mds->objecter->read(oid, ol, rd, CEPH_NOSNAP, &c->bl, 0, gather->new_sub() ); - - // read from separate object too snprintf(n, sizeof(n), "%llx.%08llx.inode", (long long unsigned)ino(), (long long unsigned)frag_t()); object_t oid2(n); - - ceph_object_layout ol2 = osdmap->make_object_layout(oid2, - mdcache->mds->mdsmap->get_metadata_pg_pool()); - - mdcache->mds->objecter->read(oid2, ol2, 0, 0, CEPH_NOSNAP, &c->bl2, 0, gather->new_sub() ); + mdcache->mds->objecter->read(oid2, oloc, 0, 0, CEPH_NOSNAP, &c->bl2, 0, gather->new_sub()); } void CInode::_fetched(bufferlist& bl, bufferlist& bl2, Context *fin) @@ -974,11 +964,9 @@ void CInode::store_parent(Context *fin) char n[30]; snprintf(n, sizeof(n), "%llx.%08llx", (long long unsigned)ino(), (long long unsigned)frag_t()); object_t oid(n); - OSDMap *osdmap = mdcache->mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mdcache->mds->mdsmap->get_metadata_pg_pool()); + object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pg_pool()); - mdcache->mds->objecter->mutate(oid, ol, m, snapc, g_clock.now(), 0, + mdcache->mds->objecter->mutate(oid, oloc, m, snapc, g_clock.now(), 0, NULL, new C_Inode_StoredParent(this, inode.last_renamed_version, fin) ); } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 2aa2fecb769..95787853514 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4668,11 +4668,11 @@ void MDCache::purge_prealloc_ino(inodeno_t ino, Context *fin) char n[30]; snprintf(n, sizeof(n), "%llx.%08llx", (long long unsigned)ino, 0ull); object_t oid(n); + object_locator_t oloc(mds->mdsmap->get_metadata_pg_pool()); + dout(10) << "purge_prealloc_ino " << ino << " oid " << oid << dendl; - ceph_object_layout ol = mds->osdmap->make_object_layout(oid, - mds->mdsmap->get_metadata_pg_pool()); SnapContext snapc; - mds->objecter->remove(oid, ol, snapc, g_clock.now(), 0, 0, fin); + mds->objecter->remove(oid, oloc, snapc, g_clock.now(), 0, 0, fin); } diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc index d92b6f345be..4ea7b336080 100644 --- a/src/mds/MDSTable.cc +++ b/src/mds/MDSTable.cc @@ -63,11 +63,8 @@ void MDSTable::save(Context *onfinish, version_t v) // write (async) SnapContext snapc; object_t oid = get_object_name(); - OSDMap *osdmap = mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mds->mdsmap->get_metadata_pg_pool()); - - mds->objecter->write_full(oid, ol, + object_locator_t oloc(mds->mdsmap->get_metadata_pg_pool()); + mds->objecter->write_full(oid, oloc, snapc, bl, g_clock.now(), 0, NULL, new C_MT_Save(this, version)); @@ -129,10 +126,8 @@ void MDSTable::load(Context *onfinish) C_MT_Load *c = new C_MT_Load(this, onfinish); object_t oid = get_object_name(); - OSDMap *osdmap = mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mds->mdsmap->get_metadata_pg_pool()); - mds->objecter->read_full(oid, ol, CEPH_NOSNAP, &c->bl, 0, c); + object_locator_t oloc(mds->mdsmap->get_metadata_pg_pool()); + mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0, c); } void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish) diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index 9ffcb240ed5..01da0218d4c 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -69,10 +69,8 @@ void SessionMap::load(Context *onload) C_SM_Load *c = new C_SM_Load(this); object_t oid = get_object_name(); - OSDMap *osdmap = mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mds->mdsmap->get_metadata_pg_pool()); - mds->objecter->read_full(oid, ol, CEPH_NOSNAP, &c->bl, 0, c); + object_locator_t oloc(mds->mdsmap->get_metadata_pg_pool()); + mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0, c); } void SessionMap::_load_finish(int r, bufferlist &bl) @@ -121,11 +119,9 @@ void SessionMap::save(Context *onsave, version_t needv) committing = version; SnapContext snapc; object_t oid = get_object_name(); - OSDMap *osdmap = mds->objecter->osdmap; - ceph_object_layout ol = osdmap->make_object_layout(oid, - mds->mdsmap->get_metadata_pg_pool()); + object_locator_t oloc(mds->mdsmap->get_metadata_pg_pool()); - mds->objecter->write_full(oid, ol, + mds->objecter->write_full(oid, oloc, snapc, bl, g_clock.now(), 0, NULL, new C_SM_Save(this, version)); diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index df888b83d17..a2c4479429a 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -737,47 +737,50 @@ private: /**** mapping facilities ****/ - // oid -> pg - ceph_object_layout file_to_object_layout(object_t oid, ceph_file_layout& layout) { - return make_object_layout(oid, layout.fl_pg_pool, - layout.fl_pg_preferred, - layout.fl_stripe_unit); - } - - ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1, int object_stripe_unit = 0) { + pg_t object_locator_to_pg(const object_t& oid, const object_locator_t& loc) { // calculate ps (placement seed) - const pg_pool_t *pool = get_pg_pool(pg_pool); - ps_t ps = ceph_str_hash(pool->v.object_hash, oid.name.c_str(), oid.name.length()); + const pg_pool_t *pool = get_pg_pool(loc.get_pool()); + ps_t ps; + if (loc.key.length()) + ps = ceph_str_hash(pool->v.object_hash, loc.key.c_str(), loc.key.length()); + else + ps = ceph_str_hash(pool->v.object_hash, oid.name.c_str(), oid.name.length()); // mix in preferred osd, so we don't get the same peers for // all of the placement pgs (e.g. 0.0p*) - if (preferred >= 0) - ps += preferred; + if (loc.get_preferred() >= 0) + ps += loc.get_preferred(); //cout << "preferred " << preferred << " num " // << num << " mask " << num_mask << " ps " << ps << endl; - // construct object layout - pg_t pgid = pg_t(ps, pg_pool, preferred); - ceph_object_layout layout; - layout.ol_pgid = pgid.v; - layout.ol_stripe_unit = object_stripe_unit; - return layout; + return pg_t(ps, loc.get_pool(), loc.get_preferred()); } - int get_pg_num(int pg_pool) - { - const pg_pool_t *pool = get_pg_pool(pg_pool); - return pool->get_pg_num(); + object_locator_t file_to_object_locator(const ceph_file_layout& layout) { + return object_locator_t(layout.fl_pg_pool, layout.fl_pg_preferred); } - int get_pg_layout(int pg_pool, int seed, ceph_object_layout& layout) { - const pg_pool_t *pool = get_pg_pool(pg_pool); + // oid -> pg + ceph_object_layout file_to_object_layout(object_t oid, ceph_file_layout& layout) { + return make_object_layout(oid, layout.fl_pg_pool, + layout.fl_pg_preferred); + } - pg_t pgid = pg_t(seed, pg_pool, -1); - layout.ol_pgid = pgid.v; - layout.ol_stripe_unit = 0; + ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1) { + object_locator_t loc(pg_pool); + loc.preferred = preferred; + + ceph_object_layout ol; + pg_t pgid = object_locator_to_pg(oid, loc); + ol.ol_pgid = pgid.v; + ol.ol_stripe_unit = 0; + return ol; + } + int get_pg_num(int pg_pool) + { + const pg_pool_t *pool = get_pg_pool(pg_pool); return pool->get_pg_num(); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 4ffee949653..706e647a923 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1088,7 +1088,7 @@ class ObjectExtent { __u32 offset; // in object __u32 length; // in object - ceph_object_layout layout; // object layout (pgid, etc.) + object_locator_t oloc; // object locator (pool etc) map<__u32, __u32> buffer_extents; // off -> len. extents in buffer being mapped (may be fragmented bc of striping!) @@ -1099,7 +1099,7 @@ class ObjectExtent { inline ostream& operator<<(ostream& out, ObjectExtent &ex) { return out << "extent(" - << ex.oid << " in " << ex.layout + << ex.oid << " in " << ex.oloc << " " << ex.offset << "~" << ex.length << ")"; } diff --git a/src/osdc/Filer.cc b/src/osdc/Filer.cc index fe8901767ad..a9d55c2347e 100644 --- a/src/osdc/Filer.cc +++ b/src/osdc/Filer.cc @@ -100,7 +100,7 @@ void Filer::_probe(Probe *probe) p++) { dout(10) << "_probe probing " << p->oid << dendl; C_Probe *c = new C_Probe(this, probe, p->oid); - probe->ops[p->oid] = objecter->stat(p->oid, p->layout, probe->snapid, &c->size, &c->mtime, probe->flags, c); + probe->ops[p->oid] = objecter->stat(p->oid, p->oloc, probe->snapid, &c->size, &c->mtime, probe->flags, c); } } @@ -236,8 +236,8 @@ int Filer::purge_range(inodeno_t ino, // single object? easy! if (num_obj == 1) { object_t oid = file_object_t(ino, first_obj); - ceph_object_layout ol = objecter->osdmap->file_to_object_layout(oid, *layout); - objecter->remove(oid, ol, snapc, mtime, flags, NULL, oncommit); + object_locator_t oloc = objecter->osdmap->file_to_object_locator(*layout); + objecter->remove(oid, oloc, snapc, mtime, flags, NULL, oncommit); return 0; } @@ -282,8 +282,8 @@ void Filer::_do_purge_range(PurgeRange *pr, int fin) int max = 10 - pr->uncommitted; while (pr->num > 0 && max > 0) { object_t oid = file_object_t(pr->ino, pr->first); - ceph_object_layout ol = objecter->osdmap->file_to_object_layout(oid, pr->layout); - objecter->remove(oid, ol, pr->snapc, pr->mtime, pr->flags, + object_locator_t oloc = objecter->osdmap->file_to_object_locator(pr->layout); + objecter->remove(oid, oloc, pr->snapc, pr->mtime, pr->flags, NULL, new C_PurgeRange(this, pr)); pr->uncommitted++; pr->first++; @@ -335,7 +335,7 @@ void Filer::file_to_extents(inodeno_t ino, ceph_file_layout *layout, else { ex = &object_extents[oid]; ex->oid = oid; - ex->layout = objecter->osdmap->file_to_object_layout( oid, *layout ); + ex->oloc = objecter->osdmap->file_to_object_locator(*layout); } // map range into object @@ -362,7 +362,7 @@ void Filer::file_to_extents(inodeno_t ino, ceph_file_layout *layout, } ex->buffer_extents[cur-offset] = x_len; - dout(15) << "file_to_extents " << *ex << " in " << ex->layout << dendl; + dout(15) << "file_to_extents " << *ex << " in " << ex->oloc << dendl; //dout(0) << "map: ino " << ino << " oid " << ex.oid << " osd " << ex.osd << " offset " << ex.offset << " len " << ex.len << " ... left " << left << dendl; left -= x_len; diff --git a/src/osdc/Filer.h b/src/osdc/Filer.h index a06c99c6d6b..0ca90a4285f 100644 --- a/src/osdc/Filer.h +++ b/src/osdc/Filer.h @@ -190,7 +190,7 @@ class Filer { ops[0].op.op = CEPH_OSD_OP_TRIMTRUNC; ops[0].op.extent.truncate_seq = truncate_seq; ops[0].op.extent.truncate_size = extents[0].offset; - objecter->_modify(extents[0].oid, extents[0].layout, ops, mtime, snapc, flags, onack, oncommit); + objecter->_modify(extents[0].oid, extents[0].oloc, ops, mtime, snapc, flags, onack, oncommit); } else { C_Gather *gack = 0, *gcom = 0; if (onack) @@ -202,7 +202,7 @@ class Filer { ops[0].op.op = CEPH_OSD_OP_TRIMTRUNC; ops[0].op.extent.truncate_size = p->offset; ops[0].op.extent.truncate_seq = truncate_seq; - objecter->_modify(p->oid, p->layout, ops, mtime, snapc, flags, + objecter->_modify(p->oid, p->oloc, ops, mtime, snapc, flags, gack ? gack->new_sub():0, gcom ? gcom->new_sub():0); } @@ -222,7 +222,7 @@ class Filer { vector extents; file_to_extents(ino, layout, offset, len, extents); if (extents.size() == 1) { - objecter->zero(extents[0].oid, extents[0].layout, extents[0].offset, extents[0].length, + objecter->zero(extents[0].oid, extents[0].oloc, extents[0].offset, extents[0].length, snapc, mtime, flags, onack, oncommit); } else { C_Gather *gack = 0, *gcom = 0; @@ -231,7 +231,7 @@ class Filer { if (oncommit) gcom = new C_Gather(oncommit); for (vector::iterator p = extents.begin(); p != extents.end(); p++) { - objecter->zero(p->oid, p->layout, p->offset, p->length, + objecter->zero(p->oid, p->oloc, p->offset, p->length, snapc, mtime, flags, gack ? gack->new_sub():0, gcom ? gcom->new_sub():0); diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index 641fe6da125..5cc80b2ae5d 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -102,8 +102,8 @@ void Journaler::recover(Context *onread) vector snaps; object_t oid = file_object_t(ino, 0); - ceph_object_layout ol = objecter->osdmap->make_object_layout(oid, pg_pool); - objecter->read_full(oid, ol, CEPH_NOSNAP, &fin->bl, 0, fin); + object_locator_t oloc(pg_pool); + objecter->read_full(oid, oloc, CEPH_NOSNAP, &fin->bl, 0, fin); } void Journaler::_finish_read_head(int r, bufferlist& bl) @@ -205,8 +205,8 @@ void Journaler::write_head(Context *oncommit) SnapContext snapc; object_t oid = file_object_t(ino, 0); - ceph_object_layout ol = objecter->osdmap->make_object_layout(oid, pg_pool); - objecter->write_full(oid, ol, snapc, bl, g_clock.now(), 0, + object_locator_t oloc(pg_pool); + objecter->write_full(oid, oloc, snapc, bl, g_clock.now(), 0, NULL, new C_WriteHead(this, last_written, oncommit)); } diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index 6654774ae7c..3e34ab692f9 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -427,7 +427,7 @@ void ObjectCacher::close_object(Object *ob) assert(ob->can_close()); // ok! - objects[ob->layout.ol_pgid.pool].erase(ob->get_soid()); + objects[ob->oloc.pool].erase(ob->get_soid()); delete ob; } @@ -441,13 +441,13 @@ void ObjectCacher::bh_read(BufferHead *bh) mark_rx(bh); // finisher - C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob->layout.ol_pgid.pool, + C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob->oloc.pool, bh->ob->get_soid(), bh->start(), bh->length()); ObjectSet *oset = bh->ob->oset; // go - objecter->read_trunc(bh->ob->get_oid(), bh->ob->get_layout(), + objecter->read_trunc(bh->ob->get_oid(), bh->ob->get_oloc(), bh->start(), bh->length(), bh->ob->get_snap(), &onfinish->bl, 0, oset->truncate_size, oset->truncate_seq, @@ -535,15 +535,15 @@ void ObjectCacher::bh_write(BufferHead *bh) dout(7) << "bh_write " << *bh << dendl; // finishers - C_WriteAck *onack = new C_WriteAck(this, bh->ob->layout.ol_pgid.pool, + C_WriteAck *onack = new C_WriteAck(this, bh->ob->oloc.pool, bh->ob->get_soid(), bh->start(), bh->length()); - C_WriteCommit *oncommit = new C_WriteCommit(this, bh->ob->layout.ol_pgid.pool, + C_WriteCommit *oncommit = new C_WriteCommit(this, bh->ob->oloc.pool, bh->ob->get_soid(), bh->start(), bh->length()); ObjectSet *oset = bh->ob->oset; // go - tid_t tid = objecter->write_trunc(bh->ob->get_oid(), bh->ob->get_layout(), + tid_t tid = objecter->write_trunc(bh->ob->get_oid(), bh->ob->get_oloc(), bh->start(), bh->length(), bh->snapc, bh->bl, bh->last_write, 0, oset->truncate_size, oset->truncate_seq, @@ -802,7 +802,7 @@ bool ObjectCacher::is_cached(ObjectSet *oset, vector& extents, sna // get Object cache sobject_t soid(ex_it->oid, snapid); - Object *o = get_object_maybe(soid, ex_it->layout); + Object *o = get_object_maybe(soid, ex_it->oloc); if (!o) return false; if (!o->is_cached(ex_it->offset, ex_it->length)) @@ -829,7 +829,7 @@ int ObjectCacher::readx(OSDRead *rd, ObjectSet *oset, Context *onfinish) // get Object cache sobject_t soid(ex_it->oid, rd->snap); - Object *o = get_object(soid, oset, ex_it->layout); + Object *o = get_object(soid, oset, ex_it->oloc); // map extent into bufferheads map hits, missing, rx; @@ -967,7 +967,7 @@ int ObjectCacher::writex(OSDWrite *wr, ObjectSet *oset) ex_it++) { // get object cache sobject_t soid(ex_it->oid, CEPH_NOSNAP); - Object *o = get_object(soid, oset, ex_it->layout); + Object *o = get_object(soid, oset, ex_it->oloc); // map it all into a single bufferhead. BufferHead *bh = o->map_write(wr); @@ -1104,7 +1104,7 @@ int ObjectCacher::atomic_sync_readx(OSDRead *rd, ObjectSet *oset, Mutex& lock) Mutex flock("ObjectCacher::atomic_sync_readx flock 1"); Cond cond; bool done = false; - objecter->read_trunc(rd->extents[0].oid, rd->extents[0].layout, + objecter->read_trunc(rd->extents[0].oid, rd->extents[0].oloc, rd->extents[0].offset, rd->extents[0].length, rd->snap, rd->bl, 0, oset->truncate_size, oset->truncate_seq, @@ -1127,7 +1127,7 @@ int ObjectCacher::atomic_sync_readx(OSDRead *rd, ObjectSet *oset, Mutex& lock) i != by_oid.end(); i++) { sobject_t soid(i->first, rd->snap); - Object *o = get_object(soid, oset, i->second.layout); + Object *o = get_object(soid, oset, i->second.oloc); rdlock(o); } @@ -1172,7 +1172,7 @@ int ObjectCacher::atomic_sync_writex(OSDWrite *wr, ObjectSet *oset, Mutex& lock) sobject_t oid(wr->extents.front().oid, CEPH_NOSNAP); Object *o = 0; if (objects[oset->poolid].count(oid)) - o = get_object(oid, oset, wr->extents.front().layout); + o = get_object(oid, oset, wr->extents.front().oloc); if (!o || (o->lock_state != Object::LOCK_WRLOCK && o->lock_state != Object::LOCK_WRLOCKING && @@ -1209,7 +1209,7 @@ int ObjectCacher::atomic_sync_writex(OSDWrite *wr, ObjectSet *oset, Mutex& lock) i != by_oid.end(); i++) { sobject_t soid(i->first, CEPH_NOSNAP); - Object *o = get_object(soid, oset, i->second.layout); + Object *o = get_object(soid, oset, i->second.oloc); wrlock(o); } @@ -1248,13 +1248,13 @@ void ObjectCacher::rdlock(Object *o) o->lock_state = Object::LOCK_RDLOCKING; - C_LockAck *ack = new C_LockAck(this, o->layout.ol_pgid.pool, o->get_soid()); - C_WriteCommit *commit = new C_WriteCommit(this, o->layout.ol_pgid.pool, + C_LockAck *ack = new C_LockAck(this, o->oloc.pool, o->get_soid()); + C_WriteCommit *commit = new C_WriteCommit(this, o->oloc.pool, o->get_soid(), 0, 0); commit->tid = ack->tid = - o->last_write_tid = objecter->lock(o->get_oid(), o->get_layout(), CEPH_OSD_OP_RDLOCK, 0, ack, commit); + o->last_write_tid = objecter->lock(o->get_oid(), o->get_oloc(), CEPH_OSD_OP_RDLOCK, 0, ack, commit); } // stake our claim. @@ -1293,13 +1293,13 @@ void ObjectCacher::wrlock(Object *o) op = CEPH_OSD_OP_WRLOCK; } - C_LockAck *ack = new C_LockAck(this, o->layout.ol_pgid.pool, o->get_soid()); - C_WriteCommit *commit = new C_WriteCommit(this, o->layout.ol_pgid.pool, + C_LockAck *ack = new C_LockAck(this, o->oloc.pool, o->get_soid()); + C_WriteCommit *commit = new C_WriteCommit(this, o->oloc.pool, o->get_soid(), 0, 0); commit->tid = ack->tid = - o->last_write_tid = objecter->lock(o->get_oid(), o->get_layout(), op, 0, ack, commit); + o->last_write_tid = objecter->lock(o->get_oid(), o->get_oloc(), op, 0, ack, commit); } // stake our claim. @@ -1339,12 +1339,12 @@ void ObjectCacher::rdunlock(Object *o) o->lock_state = Object::LOCK_RDUNLOCKING; - C_LockAck *lockack = new C_LockAck(this, o->layout.ol_pgid.pool, o->get_soid()); - C_WriteCommit *commit = new C_WriteCommit(this, o->layout.ol_pgid.pool, + C_LockAck *lockack = new C_LockAck(this, o->oloc.pool, o->get_soid()); + C_WriteCommit *commit = new C_WriteCommit(this, o->oloc.pool, o->get_soid(), 0, 0); commit->tid = lockack->tid = - o->last_write_tid = objecter->lock(o->get_oid(), o->get_layout(), CEPH_OSD_OP_RDUNLOCK, 0, lockack, commit); + o->last_write_tid = objecter->lock(o->get_oid(), o->get_oloc(), CEPH_OSD_OP_RDUNLOCK, 0, lockack, commit); } void ObjectCacher::wrunlock(Object *o) @@ -1372,12 +1372,12 @@ void ObjectCacher::wrunlock(Object *o) o->lock_state = Object::LOCK_WRUNLOCKING; } - C_LockAck *lockack = new C_LockAck(this, o->layout.ol_pgid.pool, o->get_soid()); - C_WriteCommit *commit = new C_WriteCommit(this, o->layout.ol_pgid.pool, + C_LockAck *lockack = new C_LockAck(this, o->oloc.pool, o->get_soid()); + C_WriteCommit *commit = new C_WriteCommit(this, o->oloc.pool, o->get_soid(), 0, 0); commit->tid = lockack->tid = - o->last_write_tid = objecter->lock(o->get_oid(), o->get_layout(), op, 0, lockack, commit); + o->last_write_tid = objecter->lock(o->get_oid(), o->get_oloc(), op, 0, lockack, commit); } diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h index 563bf079080..2e10b487f47 100644 --- a/src/osdc/ObjectCacher.h +++ b/src/osdc/ObjectCacher.h @@ -137,7 +137,8 @@ class ObjectCacher { public: ObjectSet *oset; xlist::item set_item; - ceph_object_layout layout; + object_locator_t oloc; + public: map data; @@ -168,9 +169,9 @@ class ObjectCacher { int rdlock_ref; // how many ppl want or are using a READ lock public: - Object(ObjectCacher *_oc, sobject_t o, ObjectSet *os, ceph_object_layout& l) : + Object(ObjectCacher *_oc, sobject_t o, ObjectSet *os, object_locator_t& l) : oc(_oc), - oid(o), oset(os), set_item(this), layout(l), + oid(o), oset(os), set_item(this), oloc(l), last_write_tid(0), last_ack_tid(0), last_commit_tid(0), uncommitted_item(this), lock_state(LOCK_NONE), wrlock_ref(0), rdlock_ref(0) { @@ -187,8 +188,8 @@ class ObjectCacher { snapid_t get_snap() { return oid.snap; } ObjectSet *get_object_set() { return oset; } - ceph_object_layout& get_layout() { return layout; } - void set_layout(ceph_object_layout& l) { layout = l; } + object_locator_t& get_oloc() { return oloc; } + void set_object_locator(object_locator_t& l) { oloc = l; } bool can_close() { return data.empty() && lock_state == LOCK_NONE && @@ -293,26 +294,28 @@ class ObjectCacher { // objects - Object *get_object_maybe(sobject_t oid, ceph_object_layout &l) { + Object *get_object_maybe(sobject_t oid, object_locator_t &l) { // have it? - if ((l.ol_pgid.pool < objects.size()) && - (objects[l.ol_pgid.pool].count(oid))) - return objects[l.ol_pgid.pool][oid]; + assert(l.pool >= 0); + if ((l.pool < (int)objects.size()) && + (objects[l.pool].count(oid))) + return objects[l.pool][oid]; return NULL; } - Object *get_object(sobject_t oid, ObjectSet *oset, ceph_object_layout &l) { + Object *get_object(sobject_t oid, ObjectSet *oset, object_locator_t &l) { // have it? - if (l.ol_pgid.pool < objects.size()) { - if (objects[l.ol_pgid.pool].count(oid)) - return objects[l.ol_pgid.pool][oid]; + assert(l.pool >= 0); + if (l.pool < (int)objects.size()) { + if (objects[l.pool].count(oid)) + return objects[l.pool][oid]; } else { - objects.resize(l.ol_pgid.pool+1); + objects.resize(l.pool+1); } // create it. Object *o = new Object(this, oid, oset, l); - objects[l.ol_pgid.pool][oid] = o; + objects[l.pool][oid] = o; return o; } void close_object(Object *ob); diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 69d8222b9c1..21e2f8bb9ec 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -396,8 +396,12 @@ void Objecter::resend_mon_ops() tid_t Objecter::op_submit(Op *op) { + + if (op->oid.name.length()) + op->pgid = osdmap->object_locator_to_pg(op->oid, op->oloc); + // find - PG &pg = get_pg( pg_t(op->layout.ol_pgid) ); + PG &pg = get_pg(op->pgid); // pick tid if (!op->tid) @@ -425,8 +429,8 @@ tid_t Objecter::op_submit(Op *op) // send? dout(10) << "op_submit oid " << op->oid + << " " << op->oloc << " " << op->ops << " tid " << op->tid - << " " << op->layout << " osd" << pg.primary() << dendl; @@ -454,8 +458,12 @@ tid_t Objecter::op_submit(Op *op) if (op->onack) flags |= CEPH_OSD_FLAG_ACK; + ceph_object_layout ol; + ol.ol_pgid = op->pgid.v; + ol.ol_stripe_unit = 0; + MOSDOp *m = new MOSDOp(client_inc, op->tid, - op->oid, op->layout, osdmap->get_epoch(), + op->oid, ol, osdmap->get_epoch(), flags); m->set_snapid(op->snapid); @@ -625,10 +633,9 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) { return; } - ceph_object_layout layout; - object_t oid; + const pg_pool_t *pool = osdmap->get_pg_pool(list_context->pool_id); + int pg_num = pool->get_pg_num(); - int pg_num = osdmap->get_pg_layout(list_context->pool_id, list_context->current_pg, layout); if (list_context->starting_pg_num == 0) { // there can't be zero pgs! list_context->starting_pg_num = pg_num; dout(20) << pg_num << " placement groups" << dendl; @@ -639,7 +646,6 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) { list_context->current_pg = 0; list_context->cookie = 0; list_context->starting_pg_num = pg_num; - osdmap->get_pg_layout(list_context->pool_id, list_context->current_pg, layout); } if (list_context->current_pg == pg_num){ //this context got all the way through onfinish->finish(0); @@ -652,7 +658,19 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) { bufferlist *bl = new bufferlist(); C_List *onack = new C_List(list_context, onfinish, bl, this); - read(oid, layout, op, list_context->pool_snap_seq, bl, 0, onack); + + object_t oid; + object_locator_t oloc(list_context->pool_id); + + // + Op *o = new Op(oid, oloc, op.ops, CEPH_OSD_FLAG_READ, onack, NULL); + o->priority = op.priority; + o->snapid = list_context->pool_snap_seq; + o->outbl = bl; + + o->pgid = pg_t(list_context->current_pg, list_context->pool_id, -1); + + op_submit(o); } void Objecter::_list_reply(ListContext *list_context, bufferlist *bl, Context *final_finish) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index cd9e883d8ec..8826740ff5a 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -213,7 +213,9 @@ public: xlist::item session_item; object_t oid; - ceph_object_layout layout; + object_locator_t oloc; + pg_t pgid; + vector ops; snapid_t snapid; @@ -231,10 +233,10 @@ public: bool paused; - Op(const object_t& o, ceph_object_layout& l, vector& op, + Op(const object_t& o, const object_locator_t& ol, vector& op, int f, Context *ac, Context *co) : session_item(this), - oid(o), layout(l), + oid(o), oloc(ol), snapid(CEPH_NOSNAP), outbl(0), flags(f), priority(0), onack(ac), oncommit(co), tid(0), attempts(0), paused(false) { @@ -487,21 +489,21 @@ private: } // mid-level helpers - tid_t mutate(const object_t& oid, ceph_object_layout ol, + tid_t mutate(const object_t& oid, const object_locator_t& oloc, ObjectOperation& op, const SnapContext& snapc, utime_t mtime, int flags, Context *onack, Context *oncommit) { - Op *o = new Op(oid, ol, op.ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, op.ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->priority = op.priority; o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t read(const object_t& oid, ceph_object_layout ol, + tid_t read(const object_t& oid, const object_locator_t& oloc, ObjectOperation& op, snapid_t snapid, bufferlist *pbl, int flags, Context *onack) { - Op *o = new Op(oid, ol, op.ops, flags | CEPH_OSD_FLAG_READ, onack, NULL); + Op *o = new Op(oid, oloc, op.ops, flags | CEPH_OSD_FLAG_READ, onack, NULL); o->priority = op.priority; o->snapid = snapid; o->outbl = pbl; @@ -509,19 +511,19 @@ private: } // high-level helpers - tid_t stat(const object_t& oid, ceph_object_layout ol, snapid_t snap, + tid_t stat(const object_t& oid, const object_locator_t& oloc, snapid_t snap, uint64_t *psize, utime_t *pmtime, int flags, Context *onfinish) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_STAT; C_Stat *fin = new C_Stat(psize, pmtime, onfinish); - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_READ, fin, 0); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_READ, fin, 0); o->snapid = snap; o->outbl = &fin->bl; return op_submit(o); } - tid_t read(const object_t& oid, ceph_object_layout ol, + tid_t read(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl, int flags, Context *onfinish) { vector ops(1); @@ -530,12 +532,12 @@ private: ops[0].op.extent.length = len; ops[0].op.extent.truncate_size = 0; ops[0].op.extent.truncate_seq = 0; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); o->snapid = snap; o->outbl = pbl; return op_submit(o); } - tid_t read_trunc(const object_t& oid, ceph_object_layout ol, + tid_t read_trunc(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl, int flags, uint64_t trunc_size, __u32 trunc_seq, Context *onfinish) { @@ -545,13 +547,13 @@ private: ops[0].op.extent.length = len; ops[0].op.extent.truncate_size = trunc_size; ops[0].op.extent.truncate_seq = trunc_seq; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); o->snapid = snap; o->outbl = pbl; return op_submit(o); } - tid_t getxattr(const object_t& oid, ceph_object_layout ol, + tid_t getxattr(const object_t& oid, const object_locator_t& oloc, const char *name, snapid_t snap, bufferlist *pbl, int flags, Context *onfinish) { vector ops(1); @@ -560,41 +562,41 @@ private: ops[0].op.xattr.value_len = 0; if (name) ops[0].data.append(name); - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_READ, onfinish, 0); o->snapid = snap; o->outbl = pbl; return op_submit(o); } - tid_t getxattrs(const object_t& oid, ceph_object_layout ol, snapid_t snap, + tid_t getxattrs(const object_t& oid, const object_locator_t& oloc, snapid_t snap, map& attrset, int flags, Context *onfinish) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_GETXATTRS; C_GetAttrs *fin = new C_GetAttrs(attrset, onfinish); - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_READ, fin, 0); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_READ, fin, 0); o->snapid = snap; o->outbl = &fin->bl; return op_submit(o); } - tid_t read_full(const object_t& oid, ceph_object_layout ol, + tid_t read_full(const object_t& oid, const object_locator_t& oloc, snapid_t snap, bufferlist *pbl, int flags, Context *onfinish) { - return read(oid, ol, 0, 0, snap, pbl, flags | CEPH_OSD_FLAG_READ, onfinish); + return read(oid, oloc, 0, 0, snap, pbl, flags | CEPH_OSD_FLAG_READ, onfinish); } // writes - tid_t _modify(const object_t& oid, ceph_object_layout ol, + tid_t _modify(const object_t& oid, const object_locator_t& oloc, vector& ops, utime_t mtime, const SnapContext& snapc, int flags, Context *onack, Context *oncommit) { - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t write(const object_t& oid, ceph_object_layout ol, + tid_t write(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, int flags, Context *onack, Context *oncommit) { @@ -605,12 +607,12 @@ private: ops[0].op.extent.truncate_size = 0; ops[0].op.extent.truncate_seq = 0; ops[0].data = bl; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t write_trunc(const object_t& oid, ceph_object_layout ol, + tid_t write_trunc(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, int flags, uint64_t trunc_size, __u32 trunc_seq, @@ -622,12 +624,12 @@ private: ops[0].op.extent.truncate_size = trunc_size; ops[0].op.extent.truncate_seq = trunc_seq; ops[0].data = bl; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t write_full(const object_t& oid, ceph_object_layout ol, + tid_t write_full(const object_t& oid, const object_locator_t& oloc, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, int flags, Context *onack, Context *oncommit) { vector ops(1); @@ -635,12 +637,12 @@ private: ops[0].op.extent.offset = 0; ops[0].op.extent.length = bl.length(); ops[0].data = bl; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t trunc(const object_t& oid, ceph_object_layout ol, + tid_t trunc(const object_t& oid, const object_locator_t& oloc, const SnapContext& snapc, utime_t mtime, int flags, uint64_t trunc_size, __u32 trunc_seq, @@ -650,67 +652,67 @@ private: ops[0].op.extent.offset = trunc_size; ops[0].op.extent.truncate_size = trunc_size; ops[0].op.extent.truncate_seq = trunc_seq; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t zero(const object_t& oid, ceph_object_layout ol, + tid_t zero(const object_t& oid, const object_locator_t& oloc, uint64_t off, uint64_t len, const SnapContext& snapc, utime_t mtime, int flags, Context *onack, Context *oncommit) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_ZERO; ops[0].op.extent.offset = off; ops[0].op.extent.length = len; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t rollback_object(const object_t& oid, ceph_object_layout ol, + tid_t rollback_object(const object_t& oid, const object_locator_t& oloc, const SnapContext& snapc, snapid_t snapid, utime_t mtime, Context *onack, Context *oncommit) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_ROLLBACK; ops[0].op.snap.snapid = snapid; - Op *o = new Op(oid, ol, ops, CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t create(const object_t& oid, ceph_object_layout ol, + tid_t create(const object_t& oid, const object_locator_t& oloc, const SnapContext& snapc, utime_t mtime, int global_flags, int create_flags, Context *onack, Context *oncommit) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_CREATE; ops[0].op.flags = create_flags; - Op *o = new Op(oid, ol, ops, global_flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, global_flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t remove(const object_t& oid, ceph_object_layout ol, + tid_t remove(const object_t& oid, const object_locator_t& oloc, const SnapContext& snapc, utime_t mtime, int flags, Context *onack, Context *oncommit) { vector ops(1); ops[0].op.op = CEPH_OSD_OP_DELETE; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t lock(const object_t& oid, ceph_object_layout ol, int op, int flags, + tid_t lock(const object_t& oid, const object_locator_t& oloc, int op, int flags, Context *onack, Context *oncommit) { SnapContext snapc; // no snapc for lock ops vector ops(1); ops[0].op.op = op; - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->snapc = snapc; return op_submit(o); } - tid_t setxattr(const object_t& oid, ceph_object_layout ol, + tid_t setxattr(const object_t& oid, const object_locator_t& oloc, const char *name, const SnapContext& snapc, const bufferlist &bl, utime_t mtime, int flags, Context *onack, Context *oncommit) { @@ -720,13 +722,13 @@ private: ops[0].op.xattr.value_len = bl.length(); if (name) ops[0].data.append(name); - ops[0].data.append(bl); - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + ops[0].data.append(bl); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); } - tid_t removexattr(const object_t& oid, ceph_object_layout ol, + tid_t removexattr(const object_t& oid, const object_locator_t& oloc, const char *name, const SnapContext& snapc, utime_t mtime, int flags, Context *onack, Context *oncommit) { @@ -736,7 +738,7 @@ private: ops[0].op.xattr.value_len = 0; if (name) ops[0].data.append(name); - Op *o = new Op(oid, ol, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); + Op *o = new Op(oid, oloc, ops, flags | CEPH_OSD_FLAG_WRITE, onack, oncommit); o->mtime = mtime; o->snapc = snapc; return op_submit(o); @@ -804,14 +806,14 @@ public: void sg_read_trunc(vector& extents, snapid_t snap, bufferlist *bl, int flags, uint64_t trunc_size, __u32 trunc_seq, Context *onfinish) { if (extents.size() == 1) { - read_trunc(extents[0].oid, extents[0].layout, extents[0].offset, extents[0].length, + read_trunc(extents[0].oid, extents[0].oloc, extents[0].offset, extents[0].length, snap, bl, flags, trunc_size, trunc_seq, onfinish); } else { C_Gather *g = new C_Gather; vector resultbl(extents.size()); int i=0; for (vector::iterator p = extents.begin(); p != extents.end(); p++) { - read_trunc(p->oid, p->layout, p->offset, p->length, + read_trunc(p->oid, p->oloc, p->offset, p->length, snap, &resultbl[i++], flags, trunc_size, trunc_seq, g->new_sub()); } g->set_finisher(new C_SGRead(this, extents, resultbl, bl, onfinish)); @@ -826,7 +828,7 @@ public: int flags, uint64_t trunc_size, __u32 trunc_seq, Context *onack, Context *oncommit) { if (extents.size() == 1) { - write_trunc(extents[0].oid, extents[0].layout, extents[0].offset, extents[0].length, + write_trunc(extents[0].oid, extents[0].oloc, extents[0].offset, extents[0].length, snapc, bl, mtime, flags, trunc_size, trunc_seq, onack, oncommit); } else { C_Gather *gack = 0, *gcom = 0; @@ -841,7 +843,7 @@ public: bit++) bl.copy(bit->first, bit->second, cur); assert(cur.length() == p->length); - write_trunc(p->oid, p->layout, p->offset, p->length, + write_trunc(p->oid, p->oloc, p->offset, p->length, snapc, cur, mtime, flags, trunc_size, trunc_seq, gack ? gack->new_sub():0, gcom ? gcom->new_sub():0);