From 15af4a245af1c2b774fa0ac3aa03c4c1d00b46df Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Wed, 2 Dec 2020 13:11:09 +0800 Subject: [PATCH] task.internal.archive: gzip archived file size > 128MB * misc: add an optional write_to argument to misc.pull_directory() so the caller can optionally specify the function to write to local file. * task/internal: add a global option "log-compress-min-size" which defaults to "128MB". if the size of a file pulled from remote host is greater or equal to the specified size, it will be compressed with gzip with the extension of ".gz" before stored in the archive directory. Signed-off-by: Kefu Chai --- docs/detailed_test_config.rst | 9 +++++++++ teuthology/misc.py | 21 +++++++++++++++++++-- teuthology/task/internal/__init__.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/docs/detailed_test_config.rst b/docs/detailed_test_config.rst index 42b17f1f..319eefb3 100644 --- a/docs/detailed_test_config.rst +++ b/docs/detailed_test_config.rst @@ -239,6 +239,15 @@ using the top-level configuration, like:: If ``archive-on-error`` is ``true``, the ``archive`` subdirectory is archived only for failed tests. +If the size of the archived file exceeds 128MB, the file will be compressed +using GZip. This threshold can be configured using the top-level option +named ``log-compress-min-size``, like:: + + log-compress-min-size: 256GB + +Other size unit postfixes are also supported, +see `humanfriendly document `__ +for more details. Situ Debugging -------------- diff --git a/teuthology/misc.py b/teuthology/misc.py index 29c21286..4d6cfd5b 100644 --- a/teuthology/misc.py +++ b/teuthology/misc.py @@ -7,6 +7,7 @@ import os import logging import configobj import getpass +import shutil import socket import subprocess import tarfile @@ -714,9 +715,24 @@ def get_file(remote, path, sudo=False, dest_dir='/tmp'): return file_data -def pull_directory(remote, remotedir, localdir): +def copy_fileobj(src, tarinfo, local_path): + with open(local_path, 'wb') as dest: + shutil.copyfileobj(src, dest) + + +def pull_directory(remote, remotedir, localdir, write_to=copy_fileobj): """ Copy a remote directory to a local directory. + + :param remote: the remote object representing the remote host from where + the specified directory is pulled + :param remotedir: the source directory on remote host + :param localdir: the destination directory on localhost + :param write_to: optional function to write the file to localdir. + its signature should be: + func(src: fileobj, + tarinfo: tarfile.TarInfo, + local_path: str) """ log.debug('Transferring archived files from %s:%s to %s', remote.shortname, remotedir, localdir) @@ -736,7 +752,8 @@ def pull_directory(remote, remotedir, localdir): elif ti.isfile(): sub = safepath.munge(ti.name) safepath.makedirs(root=localdir, path=os.path.dirname(sub)) - tar.makefile(ti, targetpath=os.path.join(localdir, sub)) + with tar.extractfile(ti) as src: + write_to(src, ti, os.path.join(localdir, sub)) else: if ti.isdev(): type_ = 'device' diff --git a/teuthology/task/internal/__init__.py b/teuthology/task/internal/__init__.py index 640008e7..5e63d91c 100644 --- a/teuthology/task/internal/__init__.py +++ b/teuthology/task/internal/__init__.py @@ -4,18 +4,23 @@ Note that there is no corresponding task defined for this module. All of the calls are made from other modules, most notably teuthology/run.py """ import contextlib +import functools +import gzip import logging import os +import shutil import time import yaml import subprocess +import humanfriendly + import teuthology.lock.ops from teuthology import misc from teuthology.packaging import get_builder_project from teuthology import report from teuthology.config import config as teuth_config -from teuthology.exceptions import VersionNotFoundError +from teuthology.exceptions import ConfigError, VersionNotFoundError from teuthology.job_status import get_status, set_status from teuthology.orchestra import cluster, remote, run # the below import with noqa is to workaround run.py which does not support multilevel submodule import @@ -334,6 +339,14 @@ def fetch_binaries_for_coredumps(path, remote): remote.get_file(debug_path, coredump_path) +def gzip_if_too_large(compress_min_size, src, tarinfo, local_path): + if tarinfo.size >= compress_min_size: + with gzip.open(local_path + '.gz', 'wb') as dest: + shutil.copyfileobj(src, dest) + else: + misc.copy_fileobj(src, tarinfo, local_path) + + @contextlib.contextmanager def archive(ctx, config): """ @@ -364,7 +377,18 @@ def archive(ctx, config): os.mkdir(logdir) for rem in ctx.cluster.remotes.keys(): path = os.path.join(logdir, rem.shortname) - misc.pull_directory(rem, archive_dir, path) + min_size_option = ctx.config.get('log-compress-min-size', + '128MB') + try: + compress_min_size_bytes = \ + humanfriendly.parse_size(min_size_option) + except humanfriendly.InvalidSize: + msg = 'invalid "log-compress-min-size": {}'.format(min_size_option) + log.error(msg) + raise ConfigError(msg) + maybe_compress = functools.partial(gzip_if_too_large, + compress_min_size_bytes) + misc.pull_directory(rem, archive_dir, path, maybe_compress) # Check for coredumps and pull binaries fetch_binaries_for_coredumps(path, rem) -- 2.47.3