1
0
Fork 0
mirror of https://github.com/ansible-collections/community.general.git synced 2026-04-21 11:19:00 +00:00

pacemaker: fix race condition on resource creation (#11750)

* remove pacemaker wait arg and fix race condition

* fix up pacemaker resource and stonith polling

* add changelog for pacemaker timeout bug

* remove env from test case and fix changelog file name

* Update changelogs/fragments/11750-pacemaker-wait-race-condition.yml

Co-authored-by: Felix Fontein <felix@fontein.de>

---------

Co-authored-by: Felix Fontein <felix@fontein.de>
This commit is contained in:
munchtoast 2026-04-18 16:45:58 -04:00 committed by GitHub
parent afe9de7562
commit 6c809dd9db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 382 additions and 17 deletions

View file

@ -10,8 +10,138 @@
from __future__ import annotations
import json
import pytest
from ansible_collections.community.general.plugins.modules import pacemaker_resource
from .uthelper import RunCommandMock, UTHelper
UTHelper.from_module(pacemaker_resource, __name__, mocks=[RunCommandMock])
# ---------------------------------------------------------------------------
# Race condition tests: resource starts after one or more Stopped polls
# ---------------------------------------------------------------------------
NO_MAINTENANCE_OUT = (
"Cluster Properties: cib-bootstrap-options\n"
"cluster-infrastructure=corosync\n"
"cluster-name=hacluster\n"
"dc-version=2.1.9-1.fc41-7188dbf\n"
"have-watchdog=false\n"
)
@pytest.fixture
def patch_bin(mocker):
def mockie(self_, path, *args, **kwargs):
return f"/testbin/{path}"
mocker.patch("ansible.module_utils.basic.AnsibleModule.get_bin_path", mockie)
@pytest.mark.usefixtures("patch_bin")
def test_present_race_condition_stopped_then_started(mocker, capfd):
"""Resource reports Stopped on the first poll then Started on the second — must succeed."""
mocker.patch("ansible_collections.community.general.plugins.module_utils.pacemaker.time.sleep")
# Sequence of run_command calls:
# 1. initial _get(): resource status → not found (rc=1)
# 2. state_present: property config → no maintenance
# 3. state_present: resource create → rc=0
# 4. post-create maintenance check → no maintenance
# 5. wait_for_resource poll 1: status → Stopped (not yet running)
# 6. wait_for_resource poll 2: status → Started
# 7. __quit_module__ _get(): status → Started
run_command_calls = [
(1, "", "Error: resource or tag id 'virtual-ip' not found"),
(1, NO_MAINTENANCE_OUT, ""),
(0, "Assumed agent name 'ocf:heartbeat:IPaddr2'", ""),
(1, NO_MAINTENANCE_OUT, ""),
(0, " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Stopped", ""),
(0, " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Started", ""),
(0, " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Started", ""),
]
def side_effect(self_, **kwargs):
return run_command_calls.pop(0)
mocker.patch("ansible.module_utils.basic.AnsibleModule.run_command", side_effect=side_effect)
with pytest.raises(SystemExit):
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"ansible.module_utils.basic._ANSIBLE_ARGS",
json.dumps(
{
"ANSIBLE_MODULE_ARGS": {
"state": "present",
"name": "virtual-ip",
"resource_type": {"resource_name": "IPaddr2"},
"resource_option": ["ip=192.168.2.1"],
"wait": 30,
}
}
).encode(),
)
mp.setattr("ansible.module_utils.basic._ANSIBLE_PROFILE", "legacy", raising=False)
pacemaker_resource.main()
out, _err = capfd.readouterr()
result = json.loads(out)
assert result["changed"] is True
assert result.get("failed") is not True
assert "Started" in result["value"]
@pytest.mark.usefixtures("patch_bin")
def test_present_wait_timeout_raises(mocker, capfd):
"""Resource never starts within the wait window — must fail with a timeout message."""
mocker.patch("ansible_collections.community.general.plugins.module_utils.pacemaker.time.sleep")
# Simulate time advancing past the deadline immediately on the first poll
monotonic_values = iter([0.0, 999.0])
mocker.patch(
"ansible_collections.community.general.plugins.module_utils.pacemaker.time.monotonic",
side_effect=lambda: next(monotonic_values),
)
run_command_calls = [
(1, "", "Error: resource or tag id 'virtual-ip' not found"),
(1, NO_MAINTENANCE_OUT, ""),
(0, "Assumed agent name 'ocf:heartbeat:IPaddr2'", ""),
(1, NO_MAINTENANCE_OUT, ""),
(0, " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Stopped", ""),
]
def side_effect(self_, **kwargs):
return run_command_calls.pop(0)
mocker.patch("ansible.module_utils.basic.AnsibleModule.run_command", side_effect=side_effect)
with pytest.raises(SystemExit):
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"ansible.module_utils.basic._ANSIBLE_ARGS",
json.dumps(
{
"ANSIBLE_MODULE_ARGS": {
"state": "present",
"name": "virtual-ip",
"resource_type": {"resource_name": "IPaddr2"},
"resource_option": ["ip=192.168.2.1"],
"wait": 10,
}
}
).encode(),
)
mp.setattr("ansible.module_utils.basic._ANSIBLE_PROFILE", "legacy", raising=False)
pacemaker_resource.main()
out, _err = capfd.readouterr()
result = json.loads(out)
assert result.get("failed") is True
assert "Timed out" in result["msg"]
assert "virtual-ip" in result["msg"]

View file

@ -40,11 +40,26 @@ test_cases:
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]", --wait=300]
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]"]
environ: *env-def
rc: 0
out: "Assumed agent name 'ocf:heartbeat:IPaddr2' (deduced from 'IPAddr2')"
err: ""
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 1
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure=corosync
cluster-name=hacluster
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
out: " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Started"
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
@ -103,11 +118,26 @@ test_cases:
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, create, virtual-ip, ocf:heartbeat:IPaddr2, "ip=[192.168.2.1]", op, start, timeout=1200, op, stop, timeout=1200, op, monitor, timeout=1200, meta, test_meta1=123, meta, test_meta2=456, --group, test_group, clone, test_clone, test_clone_meta1=789, --wait=200]
- command: [/testbin/pcs, resource, create, virtual-ip, ocf:heartbeat:IPaddr2, "ip=[192.168.2.1]", op, start, timeout=1200, op, stop, timeout=1200, op, monitor, timeout=1200, meta, test_meta1=123, meta, test_meta2=456, --group, test_group, clone, test_clone, test_clone_meta1=789]
environ: *env-def
rc: 0
out: "Assumed agent name 'ocf:heartbeat:IPaddr2'"
err: ""
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 1
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure=corosync
cluster-name=hacluster
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
out: " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Started"
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
@ -142,11 +172,26 @@ test_cases:
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]", --wait=300]
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]"]
environ: *env-def
rc: 1
out: ""
err: "Error: 'virtual-ip' already exists\n"
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 1
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure=corosync
cluster-name=hacluster
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
out: " * virtual-ip\t(ocf:heartbeat:IPAddr2):\t Started"
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
@ -182,11 +227,22 @@ test_cases:
have-watchdog=false
maintenance-mode=true
err: ""
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]", --wait=300]
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]"]
environ: *env-def
rc: 1
rc: 0
out: ""
err: "Error: resource 'virtual-ip' is not running on any node"
err: ""
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 0
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure=corosync
cluster-name=hacluster
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
maintenance-mode=true
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
@ -222,11 +278,22 @@ test_cases:
have-watchdog: false
maintenance-mode: true
err: ""
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]", --wait=300]
- command: [/testbin/pcs, resource, create, virtual-ip, IPaddr2, "ip=[192.168.2.1]"]
environ: *env-def
rc: 1
rc: 0
out: ""
err: "Error: resource 'virtual-ip' is not running on any node"
err: ""
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 0
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure: corosync
cluster-name: hacluster
dc-version: 2.1.9-1.fc41-7188dbf
have-watchdog: false
maintenance-mode: true
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
@ -523,7 +590,7 @@ test_cases:
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, clone, virtual-ip, --wait=300]
- command: [/testbin/pcs, resource, clone, virtual-ip]
environ: *env-def
rc: 1
out: ""
@ -558,11 +625,26 @@ test_cases:
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, clone, virtual-ip, test_clone, meta, test_clone_meta1=789, --wait=200]
- command: [/testbin/pcs, resource, clone, virtual-ip, test_clone, meta, test_clone_meta1=789]
environ: *env-def
rc: 0
out: ""
err: ""
- command: [/testbin/pcs, property, config]
environ: *env-def
rc: 1
out: |
Cluster Properties: cib-bootstrap-options
cluster-infrastructure=corosync
cluster-name=hacluster
dc-version=2.1.9-1.fc41-7188dbf
have-watchdog=false
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0
out: " * Clone Set: virtual-ip-clone [virtual-ip]\t(ocf:heartbeat:IPAddr2):\t Started"
err: ""
- command: [/testbin/pcs, resource, status, virtual-ip]
environ: *env-def
rc: 0

View file

@ -10,8 +10,125 @@
from __future__ import annotations
import json
import pytest
from ansible_collections.community.general.plugins.modules import pacemaker_stonith
from .uthelper import RunCommandMock, UTHelper
UTHelper.from_module(pacemaker_stonith, __name__, mocks=[RunCommandMock])
# ---------------------------------------------------------------------------
# Race condition tests: resource starts after one or more Stopped polls
# ---------------------------------------------------------------------------
@pytest.fixture
def patch_bin(mocker):
def mockie(self_, path, *args, **kwargs):
return f"/testbin/{path}"
mocker.patch("ansible.module_utils.basic.AnsibleModule.get_bin_path", mockie)
@pytest.mark.usefixtures("patch_bin")
def test_present_race_condition_stopped_then_started(mocker, capfd):
"""Resource reports Stopped on the first poll then Started on the second — must succeed."""
mocker.patch("ansible_collections.community.general.plugins.module_utils.pacemaker.time.sleep")
# Sequence of run_command calls:
# 1. initial _get(): stonith status → not found (rc=1)
# 2. state_present create → rc=0
# 3. wait_for_resource poll 1: status → Stopped (not yet running)
# 4. wait_for_resource poll 2: status → Started
# 5. __quit_module__ _get(): status → Started
run_command_calls = [
(1, "", ""),
(0, "", ""),
(0, " * virtual-stonith\t(stonith:fence_virt):\t Stopped", ""),
(0, " * virtual-stonith\t(stonith:fence_virt):\t Started", ""),
(0, " * virtual-stonith\t(stonith:fence_virt):\t Started", ""),
]
def side_effect(self_, **kwargs):
return run_command_calls.pop(0)
mocker.patch("ansible.module_utils.basic.AnsibleModule.run_command", side_effect=side_effect)
with pytest.raises(SystemExit):
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"ansible.module_utils.basic._ANSIBLE_ARGS",
json.dumps(
{
"ANSIBLE_MODULE_ARGS": {
"state": "present",
"name": "virtual-stonith",
"stonith_type": "fence_virt",
"stonith_options": ["pcmk_host_list=f1"],
"wait": 30,
}
}
).encode(),
)
mp.setattr("ansible.module_utils.basic._ANSIBLE_PROFILE", "legacy", raising=False)
pacemaker_stonith.main()
out, _err = capfd.readouterr()
result = json.loads(out)
assert result["changed"] is True
assert result.get("failed") is not True
assert "Started" in result["value"]
@pytest.mark.usefixtures("patch_bin")
def test_present_wait_timeout_raises(mocker, capfd):
"""Resource never starts within the wait window — must fail with a timeout message."""
mocker.patch("ansible_collections.community.general.plugins.module_utils.pacemaker.time.sleep")
# Simulate time advancing past the deadline immediately on the first poll
monotonic_values = iter([0.0, 999.0])
mocker.patch(
"ansible_collections.community.general.plugins.module_utils.pacemaker.time.monotonic",
side_effect=lambda: next(monotonic_values),
)
# Sequence: initial _get() → not found, create → rc=0, poll → Stopped (deadline exceeded)
run_command_calls = [
(1, "", ""),
(0, "", ""),
(0, " * virtual-stonith\t(stonith:fence_virt):\t Stopped", ""),
]
def side_effect(self_, **kwargs):
return run_command_calls.pop(0)
mocker.patch("ansible.module_utils.basic.AnsibleModule.run_command", side_effect=side_effect)
with pytest.raises(SystemExit):
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"ansible.module_utils.basic._ANSIBLE_ARGS",
json.dumps(
{
"ANSIBLE_MODULE_ARGS": {
"state": "present",
"name": "virtual-stonith",
"stonith_type": "fence_virt",
"stonith_options": ["pcmk_host_list=f1"],
"wait": 10,
}
}
).encode(),
)
mp.setattr("ansible.module_utils.basic._ANSIBLE_PROFILE", "legacy", raising=False)
pacemaker_stonith.main()
out, _err = capfd.readouterr()
result = json.loads(out)
assert result.get("failed") is True
assert "Timed out" in result["msg"]
assert "virtual-stonith" in result["msg"]

View file

@ -33,7 +33,7 @@ test_cases:
rc: 1
out: ""
err: ""
- command: ["/testbin/pcs", stonith, create, virtual-stonith, fence_virt, "pcmk_host_list=f1", "op", "monitor", "interval=30s", "--wait=300"]
- command: ["/testbin/pcs", stonith, create, virtual-stonith, fence_virt, "pcmk_host_list=f1", "op", "monitor", "interval=30s"]
environ: *env-def
rc: 0
out: ""
@ -43,6 +43,11 @@ test_cases:
rc: 0
out: " * virtual-stonith\t(stonith:fence_virt):\t Started"
err: ""
- command: ["/testbin/pcs", stonith, status, virtual-stonith]
environ: *env-def
rc: 0
out: " * virtual-stonith\t(stonith:fence_virt):\t Started"
err: ""
- id: test_present_minimal_input_stonith_exists
input:
state: present
@ -65,7 +70,7 @@ test_cases:
rc: 0
out: " * virtual-stonith\t(stonith:fence_virt):\t Started"
err: ""
- command: ["/testbin/pcs", stonith, create, virtual-stonith, fence_virt, "pcmk_host_list=f1", "op", "monitor", "interval=30s", "--wait=300"]
- command: ["/testbin/pcs", stonith, create, virtual-stonith, fence_virt, "pcmk_host_list=f1", "op", "monitor", "interval=30s"]
environ: *env-def
rc: 0
out: ""
@ -75,6 +80,11 @@ test_cases:
rc: 0
out: " * virtual-stonith\t(stonith:fence_virt):\t Started"
err: ""
- command: ["/testbin/pcs", stonith, status, virtual-stonith]
environ: *env-def
rc: 0
out: " * virtual-stonith\t(stonith:fence_virt):\t Started"
err: ""
- id: test_absent_minimal_input_stonith_not_exists
input:
state: absent