]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ObjectStore: Add collection_list_partial for hash order
authorSage Weil <sage.weil@dreamhost.com>
Fri, 2 Dec 2011 04:57:23 +0000 (20:57 -0800)
committerSamuel Just <samuel.just@dreamhost.com>
Wed, 7 Dec 2011 19:40:11 +0000 (11:40 -0800)
Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/os/CollectionIndex.h
src/os/FileStore.cc
src/os/FileStore.h
src/os/FlatIndex.cc
src/os/FlatIndex.h
src/os/HashIndex.cc
src/os/HashIndex.h
src/os/LFNIndex.cc
src/os/LFNIndex.h
src/os/ObjectStore.h
src/test/store_test.cc

index 2fca26fb0a6e55298b26a92f2d818ef863e4340b..c6af505d1e6d5f4e4b64a34c796c2553d2a0b488 100644 (file)
@@ -142,6 +142,15 @@ protected:
     collection_list_handle_t *last
     ) = 0;
 
+  /// List contents of collection by hash
+  virtual int collection_list_partial(
+    const hobject_t &start, ///< [in] object at which to start
+    int min_count,          ///< [in] get at least min_count objects
+    int max_count,          ///< [in] return at most max_count objects
+    vector<hobject_t> *ls,  ///< [out] Listed objects
+    hobject_t *next         ///< [out] Next object to list
+    ) = 0;
+
   /// List contents of collection.
   virtual int collection_list(
     vector<hobject_t> *ls ///< [out] Listed Objects
index 7f88ea91a30e942c83d2a3a513fa4f9c6e1e26e4..d6117402628e7143175c0ffb4d9ac3805c363f1e 100644 (file)
@@ -3740,6 +3740,23 @@ int FileStore::collection_list_partial(coll_t c, snapid_t seq, vector<hobject_t>
   return 0;
 }
 
+int FileStore::collection_list_partial(coll_t c, hobject_t start,
+                                      int min, int max,
+                                      vector<hobject_t> *ls, hobject_t *next)
+{
+  if (fake_collections) return -1;
+  Index index;
+  int r = get_index(c, &index);
+  if (r < 0)
+    return r;
+  r = index->collection_list_partial(start,
+                                    min, max,
+                                    ls, next);
+  if (r < 0)
+    return r;
+  return 0;
+}
+
 int FileStore::collection_list(coll_t c, vector<hobject_t>& ls) 
 {  
   if (fake_collections) return collections.collection_list(c, ls);
index 3eb1400f3c4ef2d73d238a2451fec76978e6268d..ce4464aa54ffb232158af30381cbcfcbcdce203a 100644 (file)
@@ -354,6 +354,9 @@ public:
   bool collection_empty(coll_t c);
   int collection_list_partial(coll_t c, snapid_t seq, vector<hobject_t>& o, int count, collection_list_handle_t *handle);
   int collection_list(coll_t c, vector<hobject_t>& o);
+  int collection_list_partial(coll_t c, hobject_t start,
+                             int min, int max,
+                             vector<hobject_t> *ls, hobject_t *next);
 
   int _create_collection(coll_t c);
   int _destroy_collection(coll_t c);
index 93e02f38f1ec7605e34dfdec46ede3414dd92e11..19fe57ba3ea61d6b1cd1a69b0e2418dde676526e 100644 (file)
@@ -383,6 +383,15 @@ int FlatIndex::collection_list_partial(snapid_t seq, int max_count,
   return 0;
 }
 
+int FlatIndex::collection_list_partial(const hobject_t &start,
+                                      int min_count,
+                                      int max_count,
+                                      vector<hobject_t> *ls,
+                                      hobject_t *next) {
+  assert(0); // Should not be called
+  return 0;
+}
+
 int FlatIndex::collection_list(vector<hobject_t> *ls) {
   char dir_name[PATH_MAX], buf[PATH_MAX], new_name[PATH_MAX];
   strncpy(dir_name, base_path.c_str(), sizeof(dir_name));
index 53e27f5ec08ac3a66e64b6c3abe2f656be2a4b11..30c710764840d924d4082785e8b1ee9490412fe3 100644 (file)
@@ -76,6 +76,15 @@ public:
   int collection_list(
     vector<hobject_t> *ls
     );
+
+  /// @see CollectionIndex
+  int collection_list_partial(
+    const hobject_t &start,
+    int min_count,
+    int max_count,
+    vector<hobject_t> *ls,
+    hobject_t *next
+    );
 };
 
 #endif
index cfbd9bf0cf4525560c64ed3187f92aa4ebc1647e..279cbccd61f6e3dbbd664f06315aeacde9317b85 100644 (file)
@@ -163,6 +163,16 @@ int HashIndex::_collection_list(vector<hobject_t> *ls) {
   return list(path, NULL, NULL, NULL, NULL, ls);
 }
 
+int HashIndex::_collection_list_partial(const hobject_t &start,
+                                       int min_count,
+                                       int max_count,
+                                       vector<hobject_t> *ls,
+                                       hobject_t *next) {
+  vector<string> path;
+  *next = start;
+  return list_by_hash(path, min_count, max_count, next, ls);
+}
+
 int HashIndex::start_split(const vector<string> &path) {
   bufferlist bl;
   InProgressOp op_tag(InProgressOp::SPLIT, path);
@@ -371,12 +381,12 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) {
 void HashIndex::get_path_components(const hobject_t &hoid,
                                    vector<string> *path) {
   char buf[MAX_HASH_LEVEL + 1];
-  snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hoid.hash);
+  snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hoid.get_filestore_key());
 
-  // Path components are the hex characters of hoid.hash in, least
+  // Path components are the hex characters of hoid.hash, least
   // significant first
   for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
-    path->push_back(string(&buf[MAX_HASH_LEVEL - 1 - i], 1));
+    path->push_back(string(&buf[i], 1));
   }
 }
 
@@ -394,28 +404,34 @@ string HashIndex::get_path_str(const hobject_t &hoid) {
   return get_hash_str(hoid.hash);
 }
 
-int HashIndex::list(const vector<string> &path,
-                   const int *max_count,
-                   const snapid_t *seq,
-                   const string *lower_bound,
-                   uint32_t *index,
-                   vector<hobject_t> *out) {
-  if (lower_bound)
-    assert(index);
-  vector<string> next_path = path;
-  next_path.push_back("");
-  set<string> hash_prefixes;
-  multimap<string, hobject_t> objects;
+uint32_t HashIndex::hash_prefix_to_hash(string prefix) {
+  while (prefix.size() < sizeof(uint32_t) * 2) {
+    prefix.push_back('0');
+  }
+  uint32_t hash;
+  sscanf(prefix.c_str(), "%x", &hash);
+  // nibble reverse
+  hash = ((hash & 0x0f0f0f0f) << 4) | ((hash & 0xf0f0f0f0) >> 4);
+  hash = ((hash & 0x00ff00ff) << 8) | ((hash & 0xff00ff00) >> 8);
+  hash = ((hash & 0x0000ffff) << 16) | ((hash & 0xffff0000) >> 16);
+  return hash;
+}
+
+int HashIndex::get_path_contents_by_hash(const vector<string> &path,
+                                        const string *lower_bound,
+                                        const hobject_t *next_object,
+                                        const snapid_t *seq,
+                                        set<string> *hash_prefixes,
+                                        multimap<string, hobject_t> *objects) {
+  set<string> subdirs;
   map<string, hobject_t> rev_objects;
   int r;
-  int max = max_count ? *max_count : 0;
   string cur_prefix;
   for (vector<string>::const_iterator i = path.begin();
        i != path.end();
        ++i) {
     cur_prefix.append(*i);
   }
-
   r = list_objects(path, 0, 0, &rev_objects);
   if (r < 0)
     return r;
@@ -425,12 +441,13 @@ int HashIndex::list(const vector<string> &path,
     string hash_prefix = get_path_str(i->second);
     if (lower_bound && hash_prefix < *lower_bound)
       continue;
+    if (next_object && i->second < *next_object)
+      continue;
     if (seq && i->second.snap < *seq)
       continue;
-    hash_prefixes.insert(hash_prefix);
-    objects.insert(pair<string, hobject_t>(hash_prefix, i->second));
+    hash_prefixes->insert(hash_prefix);
+    objects->insert(pair<string, hobject_t>(hash_prefix, i->second));
   }
-  set<string> subdirs;
   r = list_subdirs(path, &subdirs);
   if (r < 0)
     return r;
@@ -440,9 +457,92 @@ int HashIndex::list(const vector<string> &path,
     string candidate = cur_prefix + *i;
     if (lower_bound && candidate < lower_bound->substr(0, candidate.size()))
       continue;
-    hash_prefixes.insert(cur_prefix + *i);
+    if (next_object &&
+       candidate < get_path_str(*next_object).substr(0, candidate.size()))
+      continue;
+    hash_prefixes->insert(cur_prefix + *i);
   }
+  return 0;
+}
 
+int HashIndex::list_by_hash(const vector<string> &path,
+                           int min_count,
+                           int max_count,
+                           hobject_t *next,
+                           vector<hobject_t> *out) {
+  assert(next);
+  assert(out);
+  vector<string> next_path = path;
+  next_path.push_back("");
+  set<string> hash_prefixes;
+  multimap<string, hobject_t> objects;
+  int r = get_path_contents_by_hash(path,
+                                   NULL,
+                                   next,
+                                   NULL,
+                                   &hash_prefixes,
+                                   &objects);
+  if (r < 0)
+    return r;
+  for (set<string>::iterator i = hash_prefixes.begin();
+       i != hash_prefixes.end();
+       ++i) {
+    multimap<string, hobject_t>::iterator j = objects.find(*i);
+    if (j == objects.end()) {
+      if (out->size() > (unsigned)min_count) {
+       *next = hobject_t("", "", CEPH_NOSNAP, hash_prefix_to_hash(*i));
+       return 0;
+      }
+      *(next_path.rbegin()) = *(i->rbegin());
+      hobject_t next_recurse = *next;
+      r = list_by_hash(next_path,
+                      min_count,
+                      max_count,
+                      &next_recurse,
+                      out);
+
+      if (r < 0)
+       return r;
+      if (!next_recurse.max) {
+       *next = next_recurse;
+       return 0;
+      }
+    } else {
+      while (j != objects.end() && j->first == *i) {
+       if (out->size() == (unsigned)max_count) {
+         *next = j->second;
+         return 0;
+       }
+       if (j->second >= *next) {
+         out->push_back(j->second);
+       }
+       ++j;
+      }
+    }
+  }
+  *next = hobject_t::get_max();
+  return 0;
+}
+
+int HashIndex::list(const vector<string> &path,
+                   const int *max_count,
+                   const snapid_t *seq,
+                   const string *lower_bound,
+                   uint32_t *index,
+                   vector<hobject_t> *out) {
+  if (lower_bound)
+    assert(index);
+  vector<string> next_path = path;
+  next_path.push_back("");
+  int max = max_count ? *max_count : 0;
+  set<string> hash_prefixes;
+  multimap<string, hobject_t> objects;
+  int r = get_path_contents_by_hash(path,
+                                   lower_bound,
+                                   NULL,
+                                   seq,
+                                   &hash_prefixes,
+                                   &objects);
   uint32_t counter = 0;
   for (set<string>::iterator i = hash_prefixes.begin();
        i != hash_prefixes.end() && (!max_count || max > 0);
index a4256e3fd24d49e3219af7a753368905bb66c7ca..5f568068f0d6c5cb3218f3330a9c25c1d8b65f21 100644 (file)
@@ -167,6 +167,13 @@ protected:
   int _collection_list(
     vector<hobject_t> *ls
     );
+  int _collection_list_partial(
+    const hobject_t &start,
+    int min_count,
+    int max_count,
+    vector<hobject_t> *ls,
+    hobject_t *next
+    );
 private:
   /// Tag root directory at beginning of split
   int start_split(
@@ -246,7 +253,22 @@ private:
     uint32_t hash ///< [in] Hash to convert to a string.
     ); ///< @return String representation of hash
 
-  /** 
+  /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
+  uint32_t hash_prefix_to_hash(
+    string prefix ///< [in] string to convert
+    ); ///< @return Hash
+
+  /// Get path contents by hash
+  int get_path_contents_by_hash(
+    const vector<string> &path,          /// [in] Path to list
+    const string *lower_bound,           /// [in] list > *lower_bound
+    const hobject_t *next_object,        /// [in] list > *next_object
+    const snapid_t *seq,                 /// [in] list >= *seq
+    set<string> *hash_prefixes,          /// [out] prefixes in dir
+    multimap<string, hobject_t> *objects /// [out] objects
+    );
+
+  /**
    * Recursively lists all objects in path.
    *
    * Lists all objects in path or a subdirectory of path which
@@ -267,6 +289,15 @@ private:
     uint32_t *index,           ///< [in,out] last index (NULL iff !lower_bound)
     vector<hobject_t> *out     ///< [out] Listed objects
     ); ///< @return Error Code, 0 on success
+
+  /// List objects in collection in hobject_t order
+  int list_by_hash(
+    const vector<string> &path, /// [in] Path to list
+    int min_count,              /// [in] List at least min_count
+    int max_count,              /// [in] List at most max_count
+    hobject_t *next,            /// [in,out] List objects >= *next
+    vector<hobject_t> *out      /// [out] Listed objects
+    ); ///< @return Error Code, 0 on success
 };
 
 #endif
index 7684b1d746cee6c17e83c62f2404b1760a6a6063..cf4f2d50d1f6972438c2b1d99842fc01e73f556e 100644 (file)
@@ -123,6 +123,15 @@ int LFNIndex::collection_list(vector<hobject_t> *ls) {
   return _collection_list(ls);
 }
 
+
+int LFNIndex::collection_list_partial(const hobject_t &start,
+                                     int min_count,
+                                     int max_count,
+                                     vector<hobject_t> *ls,
+                                     hobject_t *next) {
+  return _collection_list_partial(start, min_count, max_count, ls, next);
+}
+
 /* Derived class utility methods */
 
 int LFNIndex::fsync_dir(const vector<string> &path) {
index e0321b45dbfed2748b00fcef8b0f6ea2949f6b48..cad93eedc281b726cd43f309a378dd42ab3c933e 100644 (file)
@@ -140,6 +140,15 @@ public:
     vector<hobject_t> *ls
     );
 
+  /// @see CollectionIndex
+  int collection_list_partial(
+    const hobject_t &start,
+    int min_count,
+    int max_count,
+    vector<hobject_t> *ls,
+    hobject_t *next
+    );
+
 protected:
   virtual int _init() = 0;
 
@@ -187,6 +196,15 @@ protected:
     vector<hobject_t> *ls ///< [out] Listed objects.
     ) = 0;
 
+  /// @see CollectionIndex
+  virtual int _collection_list_partial(
+    const hobject_t &start,
+    int min_count,
+    int max_count,
+    vector<hobject_t> *ls,
+    hobject_t *next
+    ) = 0;
+
 protected:
 
   /* Non-virtual utility methods */
index 63a68e51977bd096628373f62c344e73824ecc7b..35b59bf009ba2a95ea7bb8bdeb8ab832cba7b699 100644 (file)
@@ -623,6 +623,23 @@ public:
   virtual int collection_list_partial(coll_t c, snapid_t seq, vector<hobject_t>& o, int count, collection_list_handle_t *handle) = 0;
   virtual int collection_list(coll_t c, vector<hobject_t>& o) = 0;
 
+
+  /**
+   * list partial contents of collection relative to a hash offset/position
+   *
+   * @param c collection
+   * @param start list objects that sort >= this value
+   * @param min return at least this many results, unless we reach the end
+   * @param max return no more than this many results
+   * @param ls [out] result
+   * @param next [out] next item sorts >= this value
+   * @return zero on success, or negative error
+   */
+  virtual int collection_list_partial(coll_t c, hobject_t start,
+                                     int min, int max,
+                                     vector<hobject_t> *ls, hobject_t *next) = 0;
+
+
   /*
   virtual int _create_collection(coll_t c) = 0;
   virtual int _destroy_collection(coll_t c) = 0;
index 568fa1296c8764004ffb37c0fccd4862a8153c28..b18a3c80e40f8f5447277847fefbee37129fbd30 100644 (file)
@@ -369,12 +369,29 @@ public:
       if (objects.size() < 50) break;
       objects.clear();
     }
+    hobject_t next, current;
+    set<hobject_t> objects_set2;
+    while (1) {
+      cerr << "scanning (by hash)..." << std::endl;
+      int r = store->collection_list_partial(cid, current, 50, 100, &objects, &next);
+      ASSERT_EQ(r, 0);
+      objects_set2.insert(objects.begin(), objects.end());
+      if (next.max) break;
+      objects.clear();
+      current = next;
+    }
     ASSERT_EQ(objects_set.size(), available_objects.size());
+    ASSERT_EQ(objects_set2.size(), available_objects.size());
     for (set<hobject_t>::iterator i = objects_set.begin();
         i != objects_set.end();
         ++i) {
       ASSERT_GT(available_objects.count(*i), (unsigned)0);
     }
+    for (set<hobject_t>::iterator i = objects_set2.begin();
+        i != objects_set2.end();
+        ++i) {
+      ASSERT_GT(available_objects.count(*i), (unsigned)0);
+    }
   }
 
   int stat() {