From e0a671aac772c20660a006b7f14706d8701e871d Mon Sep 17 00:00:00 2001 From: Nitzan Mordechai Date: Mon, 8 Dec 2025 12:25:39 +0000 Subject: [PATCH] script/gen-corpus: cleanup and improve readability and performance - gen-corpus cleanup missed removing the temporary directory. - improve it a bit for readability - import.sh script was slow, improve performance by using less forks and batch processing Signed-off-by: Nitzan Mordechai --- src/script/gen-corpus.sh | 18 ++++++------------ src/test/encoding/import.sh | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/script/gen-corpus.sh b/src/script/gen-corpus.sh index 8550c2080706..1188b7327961 100755 --- a/src/script/gen-corpus.sh +++ b/src/script/gen-corpus.sh @@ -6,18 +6,14 @@ set -ex function get_jobs() { local jobs=$(nproc) - if [ $jobs -ge 8 ] ; then - echo 8 - else - echo $jobs - fi + [ $jobs -gt 8 ] && jobs=8 + echo $jobs } [ -z "$BUILD_DIR" ] && BUILD_DIR=build function build() { local encode_dump_path=$1 - shift ./do_cmake.sh \ -DWITH_MGR_DASHBOARD_FRONTEND=OFF \ @@ -54,9 +50,7 @@ function run() { function import_corpus() { local encode_dump_path=$1 - shift - local version=$1 - shift + local version=$2 # import the corpus ../src/test/encoding/import.sh \ @@ -77,18 +71,18 @@ function verify() { function commit_and_push() { local version=$1 - shift pushd ../ceph-object-corpus git checkout -b wip-${version} git add archive/${version} git commit --signoff --message=${version} - git remote add cc git@github.com:ceph/ceph-object-corpus.git + git remote get-url cc &>/dev/null || git remote add cc git@github.com:ceph/ceph-object-corpus.git git push cc wip-${version} popd } encode_dump_path=$(mktemp -d) +trap "rm -rf '$encode_dump_path'" EXIT build $encode_dump_path echo "generating corpus objects.." run @@ -99,4 +93,4 @@ echo "verifying imported corpus.." verify echo "all good, pushing to remote repo.." commit_and_push ${version} -rm -rf encode_dump_path + diff --git a/src/test/encoding/import.sh b/src/test/encoding/import.sh index eea96e353a98..e7faa6bb0fc8 100755 --- a/src/test/encoding/import.sh +++ b/src/test/encoding/import.sh @@ -6,18 +6,18 @@ archive=$3 [ -d "$archive" ] && [ -d "$src" ] || echo "usage: $0 " -[ -d "$archive/$ver" ] || mkdir "$archive/$ver" +mkdir -p "$archive/$ver" -dest_dir="$archive/$ver/objects" +dest_base="$archive/$ver/objects" -[ -d "$dest_dir" ] || mkdir "$dest_dir" +mkdir -p "$dest_base" -for f in `find $src -type f` -do - n=`basename $f` - type=`echo $n | sed 's/__.*//'` - md=`md5sum $f | awk '{print $1}'` +find "$src" -type f -exec md5sum {} + | \ +while read -r md_hash path; do + filename=$(basename "$path") + prefix=$(echo "$filename" | cut -d'_' -f1) + dest_dir="$dest_base/$prefix" - [ -d "$dest_dir/$type" ] || mkdir $dest_dir/$type - [ -e "$dest_dir/$type/$md" ] || cp $f $dest_dir/$type/$md + mkdir -p "$dest_dir" + [ -e "$dest_dir/$md_hash" ] || cp "$path" "$dest_dir/$md_hash" done -- 2.47.3