Simplify bucketing logic in ldb-ttl

author Mayank Agarwal <amayank@fb.com>

Thu, 20 Jun 2013 18:50:33 +0000 (11:50 -0700)

committer Mayank Agarwal <amayank@fb.com>

Fri, 21 Jun 2013 16:49:24 +0000 (09:49 -0700)
author Mayank Agarwal <amayank@fb.com>
Thu, 20 Jun 2013 18:50:33 +0000 (11:50 -0700)
committer Mayank Agarwal <amayank@fb.com>
Fri, 21 Jun 2013 16:49:24 +0000 (09:49 -0700)
diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc

index 8476f3e8eae23e43cecc67e661592432f89e626b..780d3d2a4afe4984ca3c8b54852afacd797ff10c 100644 (file)
--- a/util/ldb_cmd.cc
+++ b/util/ldb_cmd.cc
@@ -538,20 +538,16 @@ string ReadableTime(int unixtime) {
  
  // This function only called when it's the sane case of >1 buckets in time-range
  // Also called only when timekv falls between ttl_start and ttl_end provided
-void IncBucketCounts(uint64_t* bucket_counts, int ttl_start, int time_range,
-      int bucket_size, int timekv, int num_buckets) {
-  if (time_range <= 0 || timekv < ttl_start || timekv > (ttl_start + time_range)
-      || bucket_size <= 0 || num_buckets < 2) {
-    fprintf(stderr, "Error: bucketizing\n");
-    return;
-  }
-  int bucket = (timekv - ttl_start);
-  bucket = (bucket == 0) ? 1 : ceil(bucket / (double)bucket_size);
-  bucket_counts[bucket - 1]++;
+void IncBucketCounts(vector<uint64_t>& bucket_counts, int ttl_start,
+      int time_range, int bucket_size, int timekv, int num_buckets) {
+  assert(time_range > 0 && timekv >= ttl_start && bucket_size > 0 &&
+    timekv < (ttl_start + time_range) && num_buckets > 1);
+  int bucket = (timekv - ttl_start) / bucket_size;
+  bucket_counts[bucket]++;
  }
  
-void PrintBucketCounts(uint64_t* bucket_counts, int ttl_start, int ttl_end,
-      int bucket_size, int num_buckets) {
+void PrintBucketCounts(const vector<uint64_t>& bucket_counts, int ttl_start,
+      int ttl_end, int bucket_size, int num_buckets) {
    int time_point = ttl_start;
    for(int i = 0; i < num_buckets - 1; i++, time_point += bucket_size) {
      fprintf(stdout, "Keys in range %s to %s : %lu\n",
@@ -629,8 +625,8 @@ void DBDumperCommand::Help(string& ret) {
    ret.append(" [--" + ARG_COUNT_ONLY + "]");
    ret.append(" [--" + ARG_STATS + "]");
    ret.append(" [--" + ARG_TTL_BUCKET + "=<N>]");
-  ret.append(" [--" + ARG_TTL_START + "=<N>]");
-  ret.append(" [--" + ARG_TTL_END + "=<N>]");
+  ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
+  ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
    ret.append("\n");
  }
  
@@ -682,10 +678,9 @@ void DBDumperCommand::DoCommand() {
      bucket_size = time_range; // Will have just 1 bucket by default
    }
    // At this point, bucket_size=0 => time_range=0
-  uint64_t num_buckets =
-    bucket_size >= time_range ? 1 : ceil((double)time_range/bucket_size);
-  unique_ptr<uint64_t[]> bucket_counts(new uint64_t[num_buckets]);
-  fill(bucket_counts.get(), bucket_counts.get() + num_buckets, 0);
+  uint64_t num_buckets = (bucket_size >= time_range) ? 1 :
+    ((time_range + bucket_size - 1) / bucket_size);
+  vector<uint64_t> bucket_counts(num_buckets, 0);
    if (is_db_ttl_ && !count_only_ && timestamp_) {
      fprintf(stdout, "Dumping key-values from %s to %s\n",
              ReadableTime(ttl_start).c_str(), ReadableTime(ttl_end).c_str());
@@ -693,7 +688,6 @@ void DBDumperCommand::DoCommand() {
  
    for (; iter->Valid(); iter->Next()) {
      int rawtime = 0;
-    string value;
      // If end marker was specified, we stop before it
      if (!null_to_ && (iter->key().ToString() >= to_))
        break;
@@ -702,20 +696,16 @@ void DBDumperCommand::DoCommand() {
        break;
      if (is_db_ttl_) {
        TtlIterator* it_ttl = (TtlIterator*)iter;
-      struct ValueAndTimestamp val_ts = it_ttl->ValueWithTimestamp();
-      value = val_ts.value.ToString();
-      rawtime = val_ts.timestamp;
-      if (rawtime < ttl_start || rawtime > ttl_end) {
+      rawtime = it_ttl->timestamp();
+      if (rawtime < ttl_start || rawtime >= ttl_end) {
          continue;
        }
-    } else {
-      value = iter->value().ToString();
      }
      if (max_keys > 0) {
        --max_keys;
      }
      if (is_db_ttl_ && num_buckets > 1) {
-      IncBucketCounts(bucket_counts.get(), ttl_start, time_range, bucket_size,
+      IncBucketCounts(bucket_counts, ttl_start, time_range, bucket_size,
                        rawtime, num_buckets);
      }
      ++count;
@@ -724,12 +714,13 @@ void DBDumperCommand::DoCommand() {
          fprintf(stdout, "%s ", ReadableTime(rawtime).c_str());
        }
        string str = PrintKeyValue(iter->key().ToString(),
-                                 value, is_key_hex_, is_value_hex_);
+                                 iter->value().ToString(), is_key_hex_,
+                                 is_value_hex_);
        fprintf(stdout, "%s\n", str.c_str());
      }
    }
    if (num_buckets > 1 && is_db_ttl_) {
-    PrintBucketCounts(bucket_counts.get(), ttl_start, ttl_end, bucket_size,
+    PrintBucketCounts(bucket_counts, ttl_start, ttl_end, bucket_size,
                        num_buckets);
    } else {
      fprintf(stdout, "Keys in range: %lld\n", (long long) count);
@@ -1189,8 +1180,8 @@ void ScanCommand::Help(string& ret) {
    ret.append(" [--" + ARG_TTL + "]");
    ret.append(" [--" + ARG_TIMESTAMP + "]");
    ret.append(" [--" + ARG_MAX_KEYS + "=<N>q] ");
-  ret.append(" [--" + ARG_TTL_START + "=<N>]");
-  ret.append(" [--" + ARG_TTL_END + "=<N>]");
+  ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
+  ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
    ret.append("\n");
  }
  
@@ -1224,21 +1215,17 @@ void ScanCommand::DoCommand() {
          it->Valid() && (!end_key_specified_ || it->key().ToString() < end_key_);
          it->Next()) {
      string key = it->key().ToString();
-    string value;
      if (is_db_ttl_) {
        TtlIterator* it_ttl = (TtlIterator*)it;
-      struct ValueAndTimestamp val_ts = it_ttl->ValueWithTimestamp();
-      int rawtime = val_ts.timestamp;
-      value = val_ts.value.ToString();
-      if (rawtime < ttl_start || rawtime > ttl_end) {
+      int rawtime = it_ttl->timestamp();
+      if (rawtime < ttl_start || rawtime >= ttl_end) {
          continue;
        }
        if (timestamp_) {
          fprintf(stdout, "%s ", ReadableTime(rawtime).c_str());
        }
-    } else {
-      value = it->value().ToString();
      }
+    string value = it->value().ToString();
      fprintf(stdout, "%s : %s\n",
            (is_key_hex_ ? StringToHex(key) : key).c_str(),
            (is_value_hex_ ? StringToHex(value) : value).c_str()
diff --git a/utilities/ttl/db_ttl.cc b/utilities/ttl/db_ttl.cc

index af99f80e4b57a9bef50a054aa234eedee8aae784..d201208d38536800ae523be3081598b78741147b 100644 (file)
--- a/utilities/ttl/db_ttl.cc
+++ b/utilities/ttl/db_ttl.cc
@@ -84,8 +84,8 @@ Status DBWithTTL::AppendTS(const Slice& val, std::string& val_with_ts) {
  
  // Returns corruption if the length of the string is lesser than timestamp, or
  // timestamp refers to a time lesser than ttl-feature release time
-Status DBWithTTL::SanityCheckTimestamp(const std::string& str) {
-  if (str.length() < (unsigned)kTSLength) {
+Status DBWithTTL::SanityCheckTimestamp(const Slice& str) {
+  if (str.size() < (unsigned)kTSLength) {
      return Status::Corruption("Error: value's length less than timestamp's\n");
    }
    // Checks that TS is not lesser than kMinTimestamp
@@ -200,7 +200,7 @@ Status DBWithTTL::Write(const WriteOptions& opts, WriteBatch* updates) {
  }
  
  Iterator* DBWithTTL::NewIterator(const ReadOptions& opts) {
-  return new TtlIterator(db_->NewIterator(opts), kTSLength);
+  return new TtlIterator(db_->NewIterator(opts));
  }
  
  const Snapshot* DBWithTTL::GetSnapshot() {
diff --git a/utilities/ttl/db_ttl.h b/utilities/ttl/db_ttl.h

index b7533cd49ceb4af256b04796e4a72d98d8a64132..2e01c1e3d9b42eb24ede42f3971446ce5486ecfa 100644 (file)
--- a/utilities/ttl/db_ttl.h
+++ b/utilities/ttl/db_ttl.h
@@ -89,7 +89,7 @@ class DBWithTTL : public DB, CompactionFilter {
  
    static Status AppendTS(const Slice& val, std::string& val_with_ts);
  
-  static Status SanityCheckTimestamp(const std::string& str);
+  static Status SanityCheckTimestamp(const Slice& str);
  
    static Status StripTS(std::string* str);
  
@@ -106,17 +106,11 @@ class DBWithTTL : public DB, CompactionFilter {
    int32_t ttl_;
  };
  
-struct ValueAndTimestamp {
-  Slice value;
-  int32_t timestamp;
-};
-
  class TtlIterator : public Iterator {
  
   public:
-  TtlIterator(Iterator* iter, int32_t ts_len)
-    : iter_(iter),
-      ts_len_(ts_len) {
+  explicit TtlIterator(Iterator* iter)
+    : iter_(iter) {
      assert(iter_);
    }
  
@@ -152,20 +146,16 @@ class TtlIterator : public Iterator {
      return iter_->key();
    }
  
-  struct ValueAndTimestamp ValueWithTimestamp() const {
-    assert(DBWithTTL::SanityCheckTimestamp(iter_->value().ToString()).ok());
-    struct ValueAndTimestamp val_ts;
-    val_ts.timestamp = DecodeFixed32(
+  int32_t timestamp() const {
+    return DecodeFixed32(
        iter_->value().data() + iter_->value().size() - DBWithTTL::kTSLength);
-    val_ts.value = iter_->value();
-    val_ts.value.size_ -= ts_len_;
-    return val_ts;
    }
  
    Slice value() const {
-    assert(DBWithTTL::SanityCheckTimestamp(iter_->value().ToString()).ok());
+    //TODO: handle timestamp corruption like in general iterator semantics
+    assert(DBWithTTL::SanityCheckTimestamp(iter_->value()).ok());
      Slice trimmed_value = iter_->value();
-    trimmed_value.size_ -= ts_len_;
+    trimmed_value.size_ -= DBWithTTL::kTSLength;
      return trimmed_value;
    }
  
@@ -175,7 +165,6 @@ class TtlIterator : public Iterator {
  
   private:
    Iterator* iter_;
-  int32_t ts_len_;
  };
  
  }
author	Mayank Agarwal <amayank@fb.com>
	Thu, 20 Jun 2013 18:50:33 +0000 (11:50 -0700)
committer	Mayank Agarwal <amayank@fb.com>
	Fri, 21 Jun 2013 16:49:24 +0000 (09:49 -0700)
util/ldb_cmd.cc		patch \| blob \| history
utilities/ttl/db_ttl.cc		patch \| blob \| history
utilities/ttl/db_ttl.h		patch \| blob \| history