]> git-server-git.apps.pok.os.sepia.ceph.com Git - s3-tests.git/commitdiff
precompute files in readwrite tool
authorSteven Berler <steven.berler@dreamhost.com>
Fri, 30 Dec 2011 19:05:12 +0000 (19:05 +0000)
committerSteven Berler <steven.berler@dreamhost.com>
Fri, 30 Dec 2011 19:05:12 +0000 (19:05 +0000)
Makes the readwrite tool precompute a set of files and reuse them
when writing objects rather than generating each file on the fly.

s3tests/readwrite.py
s3tests/realistic.py

index a06210b4aa5450368ccb226f92878bfe5b5a4d59..3298a64877a9d420d6ac066aea66af93130afa7c 100644 (file)
@@ -168,7 +168,7 @@ def main():
             )
         file_names = itertools.islice(file_names, config.readwrite.files.num)
         file_names = list(file_names)
-        files = realistic.files(
+        files = realistic.files2(
             mean=1024 * config.readwrite.files.size,
             stddev=1024 * config.readwrite.files.stddev,
             seed=seeds['contents'],
index 38fd9ba47b41f4fa104998cecf73b3c767a4f44e..1829048ddfd4d1428ecb02ee5f0d4534837331a2 100644 (file)
@@ -4,6 +4,8 @@ import string
 import struct
 import time
 import math
+import tempfile
+import shutil
 
 
 NANOSECOND = int(1e9)
@@ -84,6 +86,37 @@ class RandomContentFile(object):
 
         return ''.join(r)
 
+class PrecomputedContentFile(object):
+    def __init__(self, f):
+        self._file = tempfile.SpooledTemporaryFile()
+        f.seek(0)
+        shutil.copyfileobj(f, self._file)
+        
+        self.last_chunks = self.chunks = None
+        self.seek(0)
+
+    def seek(self, offset):
+        self._file.seek(offset)
+
+        if offset == 0:
+            # only reset the chunks when seeking to the beginning
+            self.last_chunks = self.chunks
+            self.last_seek = time.time()
+            self.chunks = []
+
+    def tell(self):
+        return self._file.tell()
+
+    def read(self, size=-1):
+        data = self._file.read(size)
+        self._mark_chunk()
+        return data
+
+    def _mark_chunk(self):
+        elapsed = time.time() - self.last_seek
+        elapsed_nsec = int(round(elapsed * NANOSECOND))
+        self.chunks.append([self.tell(), elapsed_nsec])
+
 class FileVerifier(object):
     def __init__(self):
         self.size = 0
@@ -141,6 +174,28 @@ def files(mean, stddev, seed=None):
                 break
         yield RandomContentFile(size=size, seed=rand.getrandbits(32))
 
+def files2(mean, stddev, seed=None, numfiles=10):
+    """
+    Yields file objects with effectively random contents, where the
+    size of each file follows the normal distribution with `mean` and
+    `stddev`.
+
+    Rather than continuously generating new files, this pre-computes and
+    stores `numfiles` files and yields them in a loop.
+    """
+    # pre-compute all the files (and save with TemporaryFiles)
+    rand_files = files(mean, stddev, seed)
+    fs = []
+    for _ in xrange(numfiles):
+        f = next(rand_files)
+        t = tempfile.SpooledTemporaryFile()
+        shutil.copyfileobj(f, t)
+        fs.append(t)
+
+    while True:
+        for f in fs:
+            yield PrecomputedContentFile(f)
+
 def names(mean, stddev, charset=None, seed=None):
     """
     Yields strings that are somewhat plausible as file names, where