Skip to content

Commit 4cdb81d

Browse files
authored
[RUN-4297] ansible inventory fix (#432)
* fix race condition, now each instance uses its own tmp directory * it keeps previous behavior not validatin tmp folder to exist * add tests
1 parent cfe9b5a commit 4cdb81d

2 files changed

Lines changed: 235 additions & 23 deletions

File tree

src/main/groovy/com/rundeck/plugins/ansible/ansible/AnsibleRunnerContextBuilder.java

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ public class AnsibleRunnerContextBuilder {
4242
private final Map<String, Object> jobConf;
4343
private final Collection<INodeEntry> nodes;
4444
private final Collection<File> tempFiles;
45+
/**
46+
* Per-builder working directory (e.g. {@code ansible-exec-<executionId>-builder-<rand>/})
47+
* created atomically with
48+
* {@link Files#createTempDirectory(java.nio.file.Path, String, java.nio.file.attribute.FileAttribute[])}.
49+
* EXCLUSIVELY owned by this builder; safe to delete recursively.
50+
*/
4551
private File executionSpecificDir;
4652

4753
private AnsiblePluginGroup pluginGroup;
@@ -827,20 +833,25 @@ public void cleanupTempFiles() {
827833
}
828834
tempFiles.clear();
829835

830-
// Clean up execution-specific directory (including group_vars when inventory was generated/inline)
831-
// Note: group_vars created alongside user-provided inventory files is cleaned up by AnsibleRunner
836+
// Clean up the builder-specific working directory (including group_vars when inventory
837+
// was generated/inline). Note: group_vars created alongside user-provided inventory files
838+
// is cleaned up by AnsibleRunner.
839+
// executionSpecificDir is an EXCLUSIVE per-builder directory (ansible-exec-<execId>-builder-<rand>/),
840+
// so recursive deletion is always safe — no sibling builder shares files with it.
832841
if (executionSpecificDir != null && executionSpecificDir.exists()) {
833842
if (!getDebug()) {
834-
log.debug("Cleaning up execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
843+
log.debug("Cleaning up builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
835844
if (!deleteDirectoryRecursively(executionSpecificDir)) {
836-
log.warn("Failed to completely delete execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
845+
log.warn("Failed to completely delete builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
837846
} else {
838-
log.debug("Successfully deleted execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
847+
log.debug("Successfully deleted builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
839848
}
840849
} else {
841-
// In debug mode, the execution-specific directory is intentionally preserved for troubleshooting.
842-
// Note: These directories will accumulate over time and may require periodic manual cleanup.
843-
log.debug("Debug mode enabled; not cleaning up execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
850+
// In debug mode the per-builder directory is intentionally preserved for troubleshooting.
851+
// Sibling directories of the same Rundeck execution share the ansible-exec-<execId>-* prefix
852+
// for easy filtering. These directories will accumulate over time and may require periodic
853+
// manual cleanup.
854+
log.debug("Debug mode enabled; not cleaning up builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
844855
}
845856
}
846857
}
@@ -1145,16 +1156,25 @@ public Boolean generateInventoryNodesAuth() {
11451156
}
11461157

11471158
/**
1148-
* Creates and returns an execution-specific temporary directory path.
1149-
* This ensures that each execution has its own isolated directory for inventory and group_vars,
1150-
* preventing conflicts when multiple workflow step executions run in parallel.
1159+
* Creates and returns a builder-specific temporary directory path.
1160+
*
1161+
* <p>Each builder receives a unique flat directory directly under the configured base tmp
1162+
* dir, named {@code ansible-exec-<executionId>-builder-<rand>/}. The directory is created
1163+
* atomically with {@link Files#createTempDirectory(java.nio.file.Path, String, java.nio.file.attribute.FileAttribute[])},
1164+
* which guarantees uniqueness without any coordination between sibling threads sharing the
1165+
* same Rundeck executionId.</p>
1166+
*
1167+
* <p>The {@code ansible-exec-<executionId>} prefix preserves the ability to filter all
1168+
* directories belonging to a given Rundeck execution (useful in debug mode), while keeping
1169+
* each directory exclusively owned by a single builder. Recursive cleanup of one builder's
1170+
* directory cannot affect siblings.</p>
11511171
*
1152-
* @return The path to the execution-specific directory
1172+
* @return The absolute path to the builder-specific directory
11531173
*/
11541174
String getExecutionSpecificTmpDir() {
11551175
// Return cached directory if already created
11561176
if (executionSpecificDir != null) {
1157-
log.debug("Using cached execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
1177+
log.debug("Using cached builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
11581178
return executionSpecificDir.getAbsolutePath();
11591179
}
11601180

@@ -1169,18 +1189,17 @@ String getExecutionSpecificTmpDir() {
11691189
// Get base tmp directory
11701190
String baseTmpDir = AnsibleUtil.getCustomTmpPathDir(framework);
11711191

1172-
// Create execution-specific directory
11731192
if (executionId != null && !executionId.isEmpty()) {
1174-
executionSpecificDir = new File(baseTmpDir, "ansible-exec-" + executionId);
1175-
log.debug("Creating execution-specific directory: {}", executionSpecificDir.getAbsolutePath());
1193+
// Atomic + unique by construction. No coordination needed between sibling threads
1194+
// sharing the same Rundeck executionId. The executionId prefix lets operators filter
1195+
// all directories belonging to a given Rundeck execution with a single glob.
11761196
try {
1177-
// Use Files.createDirectories() which is idempotent (safe to call if directory exists)
1178-
// and handles race conditions properly. Unlike mkdirs(), it doesn't return false
1179-
// when the directory already exists - it only throws IOException on actual failure.
1180-
Files.createDirectories(executionSpecificDir.toPath());
1181-
log.debug("Successfully ensured execution-specific directory exists: {}", executionSpecificDir.getAbsolutePath());
1197+
executionSpecificDir = Files
1198+
.createTempDirectory(Paths.get(baseTmpDir), "ansible-exec-" + executionId + "-builder-")
1199+
.toFile();
1200+
log.debug("Created builder-specific directory: {}", executionSpecificDir.getAbsolutePath());
11821201
} catch (IOException e) {
1183-
String errorMsg = "Failed to create execution-specific directory: " + executionSpecificDir.getAbsolutePath();
1202+
String errorMsg = "Failed to create builder-specific directory under: " + baseTmpDir;
11841203
log.error(errorMsg, e);
11851204
throw new IllegalStateException(errorMsg, e);
11861205
}

src/test/groovy/com/rundeck/plugins/ansible/ansible/AnsibleRunnerContextBuilderSpec.groovy

Lines changed: 194 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,26 @@ package com.rundeck.plugins.ansible.ansible
33
import com.dtolabs.rundeck.core.common.Framework
44
import com.dtolabs.rundeck.core.common.INodeSet
55
import com.dtolabs.rundeck.core.execution.ExecutionContext
6+
import com.dtolabs.rundeck.core.execution.ExecutionLogger
7+
import com.dtolabs.rundeck.core.utils.PropertyLookup
68
import com.dtolabs.rundeck.plugins.step.PluginStepContext
79
import com.rundeck.plugins.ansible.plugin.AnsiblePluginGroup
810
import spock.lang.Specification
9-
import com.dtolabs.rundeck.core.execution.ExecutionLogger
11+
import spock.lang.TempDir
12+
13+
import java.nio.file.Files
14+
import java.util.concurrent.ConcurrentLinkedQueue
15+
import java.util.concurrent.CountDownLatch
16+
import java.util.concurrent.CyclicBarrier
17+
import java.util.concurrent.ExecutorService
18+
import java.util.concurrent.Executors
19+
import java.util.concurrent.TimeUnit
1020

1121
class AnsibleRunnerContextBuilderSpec extends Specification {
1222

23+
@TempDir
24+
File baseTmpDir
25+
1326
def "test plugin group"(){
1427
given:
1528

@@ -80,4 +93,184 @@ class AnsibleRunnerContextBuilderSpec extends Specification {
8093
!contextBuilder.encryptExtraVars()
8194
contextBuilder.getPlaybookPath() == "path/to/playbook"
8295
}
96+
97+
// ----------------------------------------------------------------------------
98+
// Race condition fix: getExecutionSpecificTmpDir() / cleanupTempFiles()
99+
// See ansible-plugin-race-condition-context.md
100+
// ----------------------------------------------------------------------------
101+
102+
/**
103+
* Builds a fresh AnsibleRunnerContextBuilder wired so that
104+
* AnsibleUtil.getCustomTmpPathDir(framework) resolves to {@code baseTmpDir} and
105+
* the data context exposes the given {@code execId}.
106+
*/
107+
private AnsibleRunnerContextBuilder newBuilder(String execId) {
108+
def propertyLookup = Mock(PropertyLookup)
109+
propertyLookup.getProperty("framework.tmp.dir") >> baseTmpDir.absolutePath
110+
111+
def framework = Mock(Framework)
112+
framework.getPropertyLookup() >> propertyLookup
113+
framework.hasProjectProperty(_, _) >> false
114+
framework.hasProperty(_) >> false
115+
116+
def execContext = Mock(ExecutionContext)
117+
execContext.getDataContext() >> ['job': ['execid': execId]]
118+
execContext.getFrameworkProject() >> 'test-project'
119+
120+
def nodes = Mock(INodeSet)
121+
nodes.getNodes() >> []
122+
123+
return new AnsibleRunnerContextBuilder(execContext, framework, nodes, [:])
124+
}
125+
126+
/**
127+
* Drops a fake "inventory" file inside the builder's working directory, mirroring
128+
* what AnsibleInventoryBuilder.buildInventory() does in production.
129+
*/
130+
private File seedInventoryFile(AnsibleRunnerContextBuilder builder) {
131+
File dir = new File(builder.getExecutionSpecificTmpDir())
132+
File inventory = File.createTempFile("ansible-inventory", ".json", dir)
133+
inventory.text = '{"all":{"hosts":{}}}'
134+
return inventory
135+
}
136+
137+
def "two builders with the same executionId get distinct directories with the executionId prefix"() {
138+
given:
139+
def builderA = newBuilder("4242")
140+
def builderB = newBuilder("4242")
141+
142+
when:
143+
File dirA = new File(builderA.getExecutionSpecificTmpDir())
144+
File dirB = new File(builderB.getExecutionSpecificTmpDir())
145+
146+
then: "both directories exist"
147+
dirA.exists() && dirA.isDirectory()
148+
dirB.exists() && dirB.isDirectory()
149+
150+
and: "they are different directories"
151+
dirA.absolutePath != dirB.absolutePath
152+
153+
and: "they sit directly under the configured base tmp dir (no shared parent)"
154+
dirA.parentFile.absolutePath == baseTmpDir.absolutePath
155+
dirB.parentFile.absolutePath == baseTmpDir.absolutePath
156+
157+
and: "their names start with the ansible-exec-<execId>-builder- prefix for filtering"
158+
dirA.name.startsWith("ansible-exec-4242-builder-")
159+
dirB.name.startsWith("ansible-exec-4242-builder-")
160+
}
161+
162+
def "directories of distinct executions can coexist under the same base tmp dir"() {
163+
given:
164+
def builderX = newBuilder("4242")
165+
def builderY = newBuilder("9999")
166+
167+
when:
168+
File dirX = new File(builderX.getExecutionSpecificTmpDir())
169+
File dirY = new File(builderY.getExecutionSpecificTmpDir())
170+
171+
then:
172+
dirX.parentFile.absolutePath == baseTmpDir.absolutePath
173+
dirY.parentFile.absolutePath == baseTmpDir.absolutePath
174+
dirX.name.startsWith("ansible-exec-4242-builder-")
175+
dirY.name.startsWith("ansible-exec-9999-builder-")
176+
}
177+
178+
def "cleanupTempFiles on one builder must not delete a sibling's inventory"() {
179+
given:
180+
def builderA = newBuilder("9001")
181+
def builderB = newBuilder("9001")
182+
183+
File invA = seedInventoryFile(builderA)
184+
File invB = seedInventoryFile(builderB)
185+
File dirA = new File(builderA.getExecutionSpecificTmpDir())
186+
File dirB = new File(builderB.getExecutionSpecificTmpDir())
187+
188+
expect:
189+
invA.exists()
190+
invB.exists()
191+
192+
when: "builder A finishes and cleans up"
193+
builderA.cleanupTempFiles()
194+
195+
then: "A's directory and inventory are gone, but B's is intact"
196+
!dirA.exists()
197+
!invA.exists()
198+
dirB.exists()
199+
invB.exists()
200+
201+
when: "builder B finishes and cleans up"
202+
builderB.cleanupTempFiles()
203+
204+
then: "B's directory is also gone"
205+
!dirB.exists()
206+
!invB.exists()
207+
}
208+
209+
def "parallel builders never lose their inventory files before their own cleanup"() {
210+
given:
211+
int parallelism = 16
212+
ExecutorService pool = Executors.newFixedThreadPool(parallelism)
213+
CyclicBarrier barrier = new CyclicBarrier(parallelism)
214+
CountDownLatch done = new CountDownLatch(parallelism)
215+
ConcurrentLinkedQueue<String> failures = new ConcurrentLinkedQueue<>()
216+
217+
when:
218+
parallelism.times {
219+
pool.submit({
220+
AnsibleRunnerContextBuilder builder = null
221+
try {
222+
builder = newBuilder("55555")
223+
File workDir = new File(builder.getExecutionSpecificTmpDir())
224+
File inventory = File.createTempFile("ansible-inventory", ".json", workDir)
225+
inventory.text = '{"all":{"hosts":{}}}'
226+
227+
// All threads have created their own dir+inventory; now hold them open
228+
// simultaneously to maximise the chance of a sibling's cleanup running while
229+
// ours is still alive.
230+
barrier.await(10, TimeUnit.SECONDS)
231+
232+
if (!inventory.exists()) {
233+
failures.add("inventory disappeared while builder was alive: ${inventory.absolutePath}")
234+
}
235+
if (!workDir.exists()) {
236+
failures.add("work dir disappeared while builder was alive: ${workDir.absolutePath}")
237+
}
238+
} catch (Throwable t) {
239+
failures.add("worker threw: ${t.message}")
240+
} finally {
241+
builder?.cleanupTempFiles()
242+
done.countDown()
243+
}
244+
} as Runnable)
245+
}
246+
247+
boolean finished = done.await(30, TimeUnit.SECONDS)
248+
pool.shutdownNow()
249+
250+
then:
251+
finished
252+
failures.isEmpty()
253+
254+
and: "no leftover ansible-exec-55555-builder-* directories remain after cleanup"
255+
baseTmpDir.listFiles({ f -> f.name.startsWith("ansible-exec-55555-builder-") } as FileFilter).length == 0
256+
}
257+
258+
def "debug mode preserves the per-builder directory"() {
259+
given:
260+
def builder = newBuilder("debug-123")
261+
// ansible-debug=true must short-circuit cleanup paths.
262+
Map<String, Object> jobConf = (Map<String, Object>) builder.getJobConf()
263+
jobConf.put("ansible-debug", "true")
264+
265+
File workDir = new File(builder.getExecutionSpecificTmpDir())
266+
File inventory = seedInventoryFile(builder)
267+
268+
when:
269+
builder.cleanupTempFiles()
270+
271+
then: "nothing is deleted in debug mode, mirroring legacy behaviour"
272+
inventory.exists()
273+
workDir.exists()
274+
workDir.name.startsWith("ansible-exec-debug-123-builder-")
275+
}
83276
}

0 commit comments

Comments
 (0)