From bd1bd2e80509a19526eaa9f2a96023d222dbcc48 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Thu, 2 May 2024 09:02:08 +0300 Subject: [PATCH] windows: increase log collection timeout Some failed Windows jobs are missing the build artifacts and it seems like the log collection function times out. https://jenkins.ceph.com/job/ceph-windows-pull-requests/39380/ We'll increase the timeout from 30s to 10m. While at it, we're updating the scp_download and scp_upload helpers, checking for timeout errors. Signed-off-by: Lucian Petrut --- ceph-windows-installer-build/build/build | 2 +- scripts/build_utils.sh | 22 ++++++++++++++++++++-- scripts/ceph-windows/run_tests | 3 ++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/ceph-windows-installer-build/build/build b/ceph-windows-installer-build/build/build index b8ddd1b1..55d9fd28 100644 --- a/ceph-windows-installer-build/build/build +++ b/ceph-windows-installer-build/build/build @@ -33,7 +33,7 @@ popd # # Upload ceph-windows-installer repo to the Windows VM # -SSH_TIMEOUT=5m scp_upload $WORKSPACE/ceph-windows-installer /workspace/ceph-windows-installer +scp_upload $WORKSPACE/ceph-windows-installer /workspace/ceph-windows-installer # # Build the Visual Studio project diff --git a/scripts/build_utils.sh b/scripts/build_utils.sh index 63eddd60..37e10333 100755 --- a/scripts/build_utils.sh +++ b/scripts/build_utils.sh @@ -1747,7 +1747,16 @@ function scp_upload() { if [[ ! -z $SSH_KNOWN_HOSTS_FILE ]]; then SSH_OPTS="$SSH_OPTS -o UserKnownHostsFile=$SSH_KNOWN_HOSTS_FILE" fi - timeout ${SSH_TIMEOUT:-"30s"} scp -i ${SSH_KEY:-"$HOME/.ssh/id_rsa"} $SSH_OPTS -r $LOCAL_FILE ${SSH_USER}@${SSH_ADDRESS}:${REMOTE_FILE} + timeout ${SSH_TIMEOUT:-"10m"} scp -i ${SSH_KEY:-"$HOME/.ssh/id_rsa"} $SSH_OPTS -r $LOCAL_FILE ${SSH_USER}@${SSH_ADDRESS}:${REMOTE_FILE} || { + EXIT_CODE=$? + # By default, the "timeout" CLI tool always exits with 124 when the + # timeout is exceeded. Unless "--preserve-status" argument is used, the + # exit code is never set to the exit code of the command that timed out. + if [[ $EXIT_CODE -eq 124 ]]; then + echo "ERROR: scp upload timed out" + fi + return $EXIT_CODE + } } function scp_download() { @@ -1765,7 +1774,16 @@ function scp_download() { if [[ ! -z $SSH_KNOWN_HOSTS_FILE ]]; then SSH_OPTS="$SSH_OPTS -o UserKnownHostsFile=$SSH_KNOWN_HOSTS_FILE" fi - timeout ${SSH_TIMEOUT:-"30s"} scp -i ${SSH_KEY:-"$HOME/.ssh/id_rsa"} $SSH_OPTS -r ${SSH_USER}@${SSH_ADDRESS}:${REMOTE_FILE} $LOCAL_FILE + timeout ${SSH_TIMEOUT:-"10m"} scp -i ${SSH_KEY:-"$HOME/.ssh/id_rsa"} $SSH_OPTS -r ${SSH_USER}@${SSH_ADDRESS}:${REMOTE_FILE} $LOCAL_FILE || { + EXIT_CODE=$? + # By default, the "timeout" CLI tool always exits with 124 when the + # timeout is exceeded. Unless "--preserve-status" argument is used, the + # exit code is never set to the exit code of the command that timed out. + if [[ $EXIT_CODE -eq 124 ]]; then + echo "ERROR: scp download timed out" + fi + return $EXIT_CODE + } } function retrycmd_if_failure() { diff --git a/scripts/ceph-windows/run_tests b/scripts/ceph-windows/run_tests index 56d8775d..6e2da2d7 100644 --- a/scripts/ceph-windows/run_tests +++ b/scripts/ceph-windows/run_tests @@ -38,7 +38,7 @@ scp_upload $CEPH_KEYRING /ProgramData/ceph/keyring # # Setup the Ceph Windows build in the Windows VM # -SSH_TIMEOUT=5m scp_upload $WORKSPACE/ceph.zip /ceph.zip +scp_upload $WORKSPACE/ceph.zip /ceph.zip SSH_TIMEOUT=10m ssh_exec powershell.exe "\$ProgressPreference='SilentlyContinue'; Expand-Archive -Path /ceph.zip -DestinationPath / -Force" ssh_exec powershell.exe "Add-MpPreference -ExclusionPath 'C:\ceph'" ssh_exec powershell.exe "New-Service -Name ceph-rbd -BinaryPathName 'c:\ceph\rbd-wnbd.exe service'" @@ -83,6 +83,7 @@ function collect_artifacts() { ssh_exec curl.exe --retry-max-time 30 --retry 10 -L -o /workspace/collect-event-logs.ps1 $COLLECT_EVENT_LOGS_SCRIPT_URL SSH_TIMEOUT=30m ssh_exec powershell.exe /workspace/collect-event-logs.ps1 -LogDirectory /workspace/eventlogs scp_download /workspace/eventlogs $WORKSPACE/artifacts/client/eventlogs + echo "Successfully retrieved artifacts." } trap collect_artifacts EXIT -- 2.39.5