fix pexpect child shutdown race (#1331) (#1336)

* ensure that any stdout data produced by the child after the last `expect()` call is consumed Co-authored-by: Matt Davis <[email protected]>
ansible · Feb 13, 2024 · c129775 · c129775
1 parent c311e81
commit c129775
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 6 deletions.
diff --git a/ansible_runner/runner.py b/ansible_runner/runner.py
@@ -294,10 +294,11 @@ def run(self):
                 child.logfile_read = stdout_handle
             except pexpect.exceptions.ExceptionPexpect as e:
                 child = collections.namedtuple(
-                    'MissingProcess', 'exitstatus isalive close'
+                    'MissingProcess', 'exitstatus isalive expect close'
                 )(
                     exitstatus=127,
                     isalive=lambda: False,
+                    expect=lambda *args, **kwargs: None,
                     close=lambda: None,
                 )
 
@@ -339,9 +340,17 @@ def _decode(x):
                     Runner.handle_termination(child.pid, is_cancel=False)
                     self.timed_out = True
 
+            # fix for https://github.com/ansible/ansible-runner/issues/1330
+            # Since we're (ab)using pexpect's logging callback as our source of stdout data, we need to pump the stream one last
+            # time, in case any new output was written by the child between the last return from expect and its termination. Ideally
+            # this would have an arbitrarily large timeout value as well, in case a ridiculous amount of data was written, but just
+            # invoking one last pump should cover the vast majority of real-world cases.
+            child.expect(pexpect.EOF, timeout=5)
+
+            # close the child to ensure no more output will be written before we close the stream interposers
+            child.close()
             stdout_handle.close()
             stderr_handle.close()
-            child.close()
             self.rc = child.exitstatus if not (self.timed_out or self.canceled) else 254
 
         if self.canceled:

diff --git a/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml b/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml
@@ -0,0 +1,9 @@
+# part of the regression test for https://github.com/ansible/ansible-runner/issues/1330
+
+- hosts: localhost
+  gather_facts: no
+  tasks:
+    # sleep significantly longer than the configured pexpect timeout; the cancel callback will inject
+    # additional delay before the next process status sampling interval that can cause further output to be lost;
+    # if all is well, we'll do another loop over the child output until it's all been consumed...
+    - raw: sleep 2
diff --git a/test/integration/test_runner.py b/test/integration/test_runner.py
@@ -3,15 +3,16 @@
 import json
 import os
 import re
-import pytest
 import six
 import sys
+import time
 
-from ansible_runner import Runner
+from test.utils.common import iterate_timeout
 
-from ansible_runner.exceptions import AnsibleRunnerException
+import pytest
 
-from test.utils.common import iterate_timeout
+from ansible_runner import Runner, run
+from ansible_runner.exceptions import AnsibleRunnerException
 
 
 @pytest.mark.xfail(reason='Test is unstable')
@@ -281,3 +282,16 @@ def test_set_extra_vars(rc):
         with open(os.path.join(rc.artifact_dir, 'stdout')) as f:
             if 'hello there' in f.read():
                 break
+
+
+# regression test for https://github.com/ansible/ansible-runner/issues/1330
+def test_pexpect_timeout(project_fixtures):
+    r = run(
+        private_data_dir=str(project_fixtures / 'pexpect_timeout_data_loss'),
+        playbook='pb.yml',
+        settings={"pexpect_timeout": 0.1},  # set the pexpect timeout very low
+        cancel_callback=lambda: time.sleep(3) or False,  # induce enough delay in the child polling loop that the child will exit before being polled again
+    )
+
+    # ensure we got playbook_on_stats; if pexpect ate it, we won't...
+    assert any(ev for ev in r.events if ev.get('event', None) == 'playbook_on_stats')