]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
task.internal.archive: gzip archived file size > 128MB 1586/head
authorKefu Chai <kchai@redhat.com>
Wed, 2 Dec 2020 05:11:09 +0000 (13:11 +0800)
committerKefu Chai <kchai@redhat.com>
Wed, 2 Dec 2020 05:24:54 +0000 (13:24 +0800)
* misc: add an optional write_to argument to misc.pull_directory()
        so the caller can optionally specify the function to write
        to local file.
* task/internal: add a global option "log-compress-min-size" which
        defaults to "128MB". if the size of a file pulled from remote
        host is greater or equal to the specified size, it will be
        compressed with gzip with the extension of ".gz" before
        stored in the archive directory.

Signed-off-by: Kefu Chai <kchai@redhat.com>
docs/detailed_test_config.rst
teuthology/misc.py
teuthology/task/internal/__init__.py

index 42b17f1f359dfe238d3fe868cb73ecefc33c5b93..319eefb3d603e4b83b3ef653f57cb276efa7df66 100644 (file)
@@ -239,6 +239,15 @@ using the top-level configuration, like::
 If ``archive-on-error`` is ``true``, the ``archive`` subdirectory is
 archived only for failed tests.
 
+If the size of the archived file exceeds 128MB, the file will be compressed
+using GZip. This threshold can be configured using the top-level option
+named ``log-compress-min-size``, like::
+
+  log-compress-min-size: 256GB
+
+Other size unit postfixes are also supported,
+see `humanfriendly document <https://pypi.org/project/humanfriendly/#a-note-about-size-units>`__
+for more details.
 
 Situ Debugging
 --------------
index 29c21286d495f352bcb6879a66dc4cc61aee7aee..4d6cfd5b5950cd86715522faa951111b0478d67e 100644 (file)
@@ -7,6 +7,7 @@ import os
 import logging
 import configobj
 import getpass
+import shutil
 import socket
 import subprocess
 import tarfile
@@ -714,9 +715,24 @@ def get_file(remote, path, sudo=False, dest_dir='/tmp'):
     return file_data
 
 
-def pull_directory(remote, remotedir, localdir):
+def copy_fileobj(src, tarinfo, local_path):
+    with open(local_path, 'wb') as dest:
+        shutil.copyfileobj(src, dest)
+
+
+def pull_directory(remote, remotedir, localdir, write_to=copy_fileobj):
     """
     Copy a remote directory to a local directory.
+
+    :param remote: the remote object representing the remote host from where
+                   the specified directory is pulled
+    :param remotedir: the source directory on remote host
+    :param localdir: the destination directory on localhost
+    :param write_to: optional function to write the file to localdir.
+                     its signature should be:
+                     func(src: fileobj,
+                          tarinfo: tarfile.TarInfo,
+                          local_path: str)
     """
     log.debug('Transferring archived files from %s:%s to %s',
               remote.shortname, remotedir, localdir)
@@ -736,7 +752,8 @@ def pull_directory(remote, remotedir, localdir):
         elif ti.isfile():
             sub = safepath.munge(ti.name)
             safepath.makedirs(root=localdir, path=os.path.dirname(sub))
-            tar.makefile(ti, targetpath=os.path.join(localdir, sub))
+            with tar.extractfile(ti) as src:
+                write_to(src, ti, os.path.join(localdir, sub))
         else:
             if ti.isdev():
                 type_ = 'device'
index 640008e7e3c2f90fd1c236b3344bf1954009682a..5e63d91c1cabb11c4fe095926a2ae2a8a388ff91 100644 (file)
@@ -4,18 +4,23 @@ Note that there is no corresponding task defined for this module.  All of
 the calls are made from other modules, most notably teuthology/run.py
 """
 import contextlib
+import functools
+import gzip
 import logging
 import os
+import shutil
 import time
 import yaml
 import subprocess
 
+import humanfriendly
+
 import teuthology.lock.ops
 from teuthology import misc
 from teuthology.packaging import get_builder_project
 from teuthology import report
 from teuthology.config import config as teuth_config
-from teuthology.exceptions import VersionNotFoundError
+from teuthology.exceptions import ConfigError, VersionNotFoundError
 from teuthology.job_status import get_status, set_status
 from teuthology.orchestra import cluster, remote, run
 # the below import with noqa is to workaround run.py which does not support multilevel submodule import
@@ -334,6 +339,14 @@ def fetch_binaries_for_coredumps(path, remote):
             remote.get_file(debug_path, coredump_path)
 
 
+def gzip_if_too_large(compress_min_size, src, tarinfo, local_path):
+    if tarinfo.size >= compress_min_size:
+        with gzip.open(local_path + '.gz', 'wb') as dest:
+            shutil.copyfileobj(src, dest)
+    else:
+        misc.copy_fileobj(src, tarinfo, local_path)
+
+
 @contextlib.contextmanager
 def archive(ctx, config):
     """
@@ -364,7 +377,18 @@ def archive(ctx, config):
                 os.mkdir(logdir)
             for rem in ctx.cluster.remotes.keys():
                 path = os.path.join(logdir, rem.shortname)
-                misc.pull_directory(rem, archive_dir, path)
+                min_size_option = ctx.config.get('log-compress-min-size',
+                                                 '128MB')
+                try:
+                    compress_min_size_bytes = \
+                        humanfriendly.parse_size(min_size_option)
+                except humanfriendly.InvalidSize:
+                    msg = 'invalid "log-compress-min-size": {}'.format(min_size_option)
+                    log.error(msg)
+                    raise ConfigError(msg)
+                maybe_compress = functools.partial(gzip_if_too_large,
+                                                   compress_min_size_bytes)
+                misc.pull_directory(rem, archive_dir, path, maybe_compress)
                 # Check for coredumps and pull binaries
                 fetch_binaries_for_coredumps(path, rem)