]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
cephadm: fix timeout argument to call function 52909/head
authorJohn Mulligan <jmulligan@redhat.com>
Mon, 27 Feb 2023 19:38:50 +0000 (14:38 -0500)
committerAdam King <adking@redhat.com>
Wed, 9 Aug 2023 20:42:35 +0000 (16:42 -0400)
The timeout argument to call function, for executing sub-processes, did
not function - this patch makes timeout work as (probably) intended.
Use the `process.communicate()` method rather than `tee` functions to
handle IO collection. Since no logging is done until after the exit code
is known the tee calls are not necessary. Add calls to kill the child
process when the time out occurs. This helps prevent event loop "leaks"
that generate python warnings.

Signed-off-by: John Mulligan <jmulligan@redhat.com>
(cherry picked from commit cefe44cde8d53d7bfd935435f87205a01d677986)

src/cephadm/cephadm

index 7b022037e09c4caaea12c14e42b1dbec20fb1667..eaaeb840664172ed5f9ca9d524f90ab7bfc40cf1 100755 (executable)
@@ -1747,13 +1747,6 @@ def call(ctx: CephadmContext,
         prefix += ': '
     timeout = timeout or ctx.timeout
 
-    async def tee(reader: asyncio.StreamReader) -> str:
-        collected = StringIO()
-        async for line in reader:
-            message = line.decode('utf-8')
-            collected.write(message)
-        return collected.getvalue()
-
     async def run_with_timeout() -> Tuple[str, str, int]:
         process = await asyncio.create_subprocess_exec(
             *command,
@@ -1763,14 +1756,29 @@ def call(ctx: CephadmContext,
         assert process.stdout
         assert process.stderr
         try:
-            stdout, stderr = await asyncio.gather(tee(process.stdout),
-                                                  tee(process.stderr))
-            returncode = await asyncio.wait_for(process.wait(), timeout)
+            stdout, stderr = await asyncio.wait_for(
+                process.communicate(),
+                timeout,
+            )
         except asyncio.TimeoutError:
+            # try to terminate the process assuming it is still running.  It's
+            # possible that even after killing the process it will not
+            # complete, particularly if it is D-state.  If that happens the
+            # process.wait call will block, but we're no worse off than before
+            # when the timeout did not work.  Additionally, there are other
+            # corner-cases we could try and handle here but we decided to start
+            # simple.
+            process.kill()
+            await process.wait()
             logger.info(prefix + f'timeout after {timeout} seconds')
             return '', '', 124
         else:
-            return stdout, stderr, returncode
+            assert process.returncode is not None
+            return (
+                stdout.decode('utf-8'),
+                stderr.decode('utf-8'),
+                process.returncode,
+            )
 
     stdout, stderr, returncode = async_run(run_with_timeout())
     log_level = verbosity.success_log_level()