fix: Refactor docker container state handling to properly restart when exited (#841)
Some checks are pending
Acc Tests / acc-test (TestAccDockerConfig, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerConfig, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerNetwork, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerNetwork, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerPlugin, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerPlugin, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerSecret, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerSecret, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerTag, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerTag, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerVolume, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (TestAccDockerVolume, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerContainer, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerContainer, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerImage, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerImage, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerRegistryImage, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerRegistryImage, 1.8.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerService, 0.15.x) (push) Waiting to run
Acc Tests / acc-test (true, TestAccDockerService, 1.8.x) (push) Waiting to run
Compile Binaries / compile-fast (push) Waiting to run
Compile Binaries / compile (push) Waiting to run
golangci-lint / lint (push) Waiting to run
Unit Tests / unit-test (push) Waiting to run
Website Checks / markdown-link-check (push) Waiting to run
Docs and Website Lint / website-generation (push) Waiting to run
Docs and Website Lint / website-lint-spellcheck-tffmt (push) Waiting to run
Docs and Website Lint / markdown-lint (push) Waiting to run

This commit is contained in:
Martin 2026-01-27 12:15:55 +01:00 committed by GitHub
parent 1ddcec5725
commit 0eeb0feb4a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 103 additions and 96 deletions

View file

@ -70,7 +70,7 @@ resource "docker_image" "ubuntu" {
- `memory_reservation` (Number) The memory-resveration for the container in MBs. Defaults to 0. Allows you to specify a soft limit smaller than `memory` which is activated when Docker detects contention or low memory on the host machine. If you use `memory-reservation`, it must be set lower than `memory` for it to take precedence. Because it is a soft limit, it doesn't guarantee that the container doesn't exceed the limit.
- `memory_swap` (Number) The total memory limit (memory + swap) for the container in MBs. This setting may compute to `-1` after `terraform apply` if the target host doesn't support memory swap, when that is the case docker will use a soft limitation.
- `mounts` (Block Set) Specification for mounts to be added to containers created as part of the service. (see [below for nested schema](#nestedblock--mounts))
- `must_run` (Boolean) If `true`, then the Docker container will be kept running. If `false`, then as long as the container exists, Terraform assumes it is successful. Defaults to `true`.
- `must_run` (Boolean) If `true`, then the Docker container will be kept running. If `false`, Terraform leaves the container alone. This attribute is also used to trigger a restart of a stopped container. If your container is stopped, Terraform will set `must_run` to `false` and this will trigger a change. Defaults to `true`.
- `network_mode` (String) Network mode of the container. Defaults to `bridge`. If your host OS is any other OS, you need to set this value explicitly, e.g. `nat` when your container will be running on an Windows host. See https://docs.docker.com/engine/network/ for more information.
- `networks_advanced` (Block Set) The networks the container is attached to (see [below for nested schema](#nestedblock--networks_advanced))
- `pid_mode` (String) he PID (Process) Namespace mode for the container. Either `container:<name|id>` or `host`.

View file

@ -5,6 +5,11 @@
Bump of minimum terraform version to `1.1.5` or newer. This is done as part of introducing the new `terraform-plugin-framework` to develop this provider.
## `docker_container`
Reworked handling of stopped containers: If a container is stopped (or exists for some other reason), Terraform now correctly shows a change on `plan` and restarts the container on `apply`. To trigger the change, the `must_run` attribute is exploited. `must_run` defaults to `true` and when a container is in a not running state, the provider sets `must_run` to `false` to trigger a state change.
This fixes the cases where a stopped container gets deleted during a `plan`
## `docker_network`
Removed attributes:

View file

@ -97,7 +97,7 @@ func resourceDockerContainer() *schema.Resource {
// An assumption is made that configured containers
// should be running; if not, they should not be in
// the configuration. Therefore a stopped container
// should be started. Set to false to have the
// should be restarted. Set to false to have the
// provider leave the container alone.
//
// Actively-debugged containers are likely to be
@ -109,7 +109,7 @@ func resourceDockerContainer() *schema.Resource {
// should be pristine when started.
"must_run": {
Type: schema.TypeBool,
Description: "If `true`, then the Docker container will be kept running. If `false`, then as long as the container exists, Terraform assumes it is successful. Defaults to `true`.",
Description: "If `true`, then the Docker container will be kept running. If `false`, Terraform leaves the container alone. This attribute is also used to trigger a restart of a stopped container. If your container is stopped, Terraform will set `must_run` to `false` and this will trigger a change. Defaults to `true`.",
Default: true,
Optional: true,
},

View file

@ -24,7 +24,6 @@ import (
"github.com/docker/go-connections/nat"
"github.com/docker/go-units"
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
)
@ -35,17 +34,9 @@ const (
)
var (
errContainerFailedToBeCreated = errors.New("container failed to be created")
errContainerFailedToBeDeleted = errors.New("container failed to be deleted")
errContainerExitedImmediately = errors.New("container exited immediately")
errContainerFailedToBeInRunningState = errors.New("container failed to be in running state")
errContainerFailedToBeInHealthyState = errors.New("container failed to be in healthy state")
)
// NOTE mavogel: we keep this global var for tracking
// the time in the create and read func
var creationTime time.Time
func resourceDockerContainerCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, err := meta.(*ProviderConfig).MakeClient(ctx, d)
if err != nil {
@ -555,7 +546,6 @@ func resourceDockerContainerCreate(ctx context.Context, d *schema.ResourceData,
}
if d.Get("start").(bool) {
creationTime = time.Now()
options := container.StartOptions{}
if err := client.ContainerStart(ctx, retContainer.ID, options); err != nil {
return diag.Errorf("Unable to start container: %s", err)
@ -685,6 +675,8 @@ func resourceDockerContainerRead(ctx context.Context, d *schema.ResourceData, me
return diag.FromErr(fmt.Errorf("failed to create Docker client: %w", err))
}
// First checking if container exists, for this we iterate over all containers
// Using ContainerInspect directly would return error if container does not exist, but also for other errors (e.g. permission denied)
apiContainer, err := fetchDockerContainer(ctx, d.Id(), client)
if err != nil {
return diag.FromErr(err)
@ -695,44 +687,26 @@ func resourceDockerContainerRead(ctx context.Context, d *schema.ResourceData, me
return nil
}
stateConf := &retry.StateChangeConf{
Pending: []string{"pending"},
Target: []string{"running"},
Refresh: resourceDockerContainerReadRefreshFunc(ctx, d, meta, client),
Timeout: time.Duration(containerReadRefreshTimeoutMilliseconds) * time.Millisecond,
MinTimeout: containerReadRefreshWaitBeforeRefreshes,
Delay: containerReadRefreshDelay,
}
containerRaw, err := stateConf.WaitForStateContext(ctx)
container, err := client.ContainerInspect(ctx, d.Id())
if err != nil {
if errors.Is(err, errContainerFailedToBeCreated) {
return resourceDockerContainerDelete(ctx, d, meta)
}
if errors.Is(err, errContainerExitedImmediately) {
if err := resourceDockerContainerDelete(ctx, d, meta); err != nil {
log.Printf("[ERROR] Container %s failed to be deleted: %v", apiContainer.ID, err)
return diag.FromErr(errContainerFailedToBeDeleted)
}
}
return diag.FromErr(err)
}
container := containerRaw.(container.InspectResponse)
jsonObj, _ := json.MarshalIndent(container, "", "\t")
log.Printf("[DEBUG] Docker container inspect from stateFunc: %s", jsonObj)
if !container.State.Running && d.Get("must_run").(bool) {
if err := resourceDockerContainerDelete(ctx, d, meta); err != nil {
log.Printf("[ERROR] Container %s failed to be deleted: %v", container.ID, err)
return err
}
log.Printf("[ERROR] Container %s failed to be in running state", container.ID)
return diag.FromErr(errContainerFailedToBeInRunningState)
}
// Check if container is stopped when it should be running
// If container is stopped and must_run is true in config, set must_run to false in state
// This creates a state drift that Terraform will detect and trigger an update
if !container.State.Running {
d.Set("exit_code", container.State.ExitCode)
// If must_run is configured as true but container is stopped, set it to false in state
// This will show as a change in terraform plan and trigger Update on apply
if d.Get("must_run").(bool) {
log.Printf("[WARN] Container %s is stopped but must_run is configured as true. Setting must_run to false in state to trigger update.", container.ID)
d.Set("must_run", false)
}
}
// Read Network Settings
@ -878,57 +852,31 @@ func resourceDockerContainerRead(ctx context.Context, d *schema.ResourceData, me
return nil
}
func resourceDockerContainerReadRefreshFunc(ctx context.Context,
d *schema.ResourceData, meta interface{}, client *client.Client) retry.StateRefreshFunc {
return func() (interface{}, string, error) {
containerID := d.Id()
var container container.InspectResponse
container, err := client.ContainerInspect(ctx, containerID)
if err != nil {
return container, "pending", err
}
jsonObj, _ := json.MarshalIndent(container, "", "\t")
log.Printf("[DEBUG] Docker container inspect: %s", jsonObj)
if container.State.Running ||
!container.State.Running && !d.Get("must_run").(bool) {
log.Printf("[DEBUG] Container %s is running: %v", containerID, container.State.Running)
return container, "running", nil
}
if creationTime.IsZero() { // We didn't just create it, so don't wait around
log.Printf("[DEBUG] Container %s was not created", containerID)
return container, "pending", errContainerFailedToBeCreated
}
finishTime, err := time.Parse(time.RFC3339, container.State.FinishedAt)
if err != nil {
log.Printf("[ERROR] Container %s finish time could not be parsed: %s", containerID, container.State.FinishedAt)
return container, "pending", err
}
if finishTime.After(creationTime) {
log.Printf("[INFO] Container %s exited immediately: started: %v - finished: %v", containerID, creationTime, finishTime)
return container, "pending", errContainerExitedImmediately
}
// TODO mavogel wait until all properties are exposed from the API
// dns = []
// dns_opts = []
// dns_search = []
// group_add = []
// id = "9e6d9e987923e2c3a99f17e8781c7ce3515558df0e45f8ab06f6adb2dda0de50"
// log_opts = {}
// name = "nginx"
// sysctls = {}
// tmpfs = {}
return container, "running", nil
}
}
func resourceDockerContainerUpdate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, err := meta.(*ProviderConfig).MakeClient(ctx, d)
if err != nil {
return diag.FromErr(fmt.Errorf("failed to create Docker client: %w", err))
}
// Check if must_run changed from false to true (container was stopped, now should be running)
if d.HasChange("must_run") {
old, new := d.GetChange("must_run")
if !old.(bool) && new.(bool) {
// must_run changed from false to true - need to start the container
log.Printf("[INFO] must_run changed from false to true, starting container %s", d.Id())
// Check if start is enabled
if d.Get("start").(bool) {
options := container.StartOptions{}
if err := client.ContainerStart(ctx, d.Id(), options); err != nil {
return diag.Errorf("Unable to start container: %s", err)
}
log.Printf("[INFO] Successfully started container %s", d.Id())
}
}
}
// Handle other attribute updates
attrs := []string{
"restart", "max_retry_count", "cpu_shares", "memory", "memory_reservation", "cpu_set", "memory_swap",
}
@ -966,10 +914,6 @@ func resourceDockerContainerUpdate(ctx context.Context, d *schema.ResourceData,
// QF1008: could remove embedded field "Resources" from selector
updateConfig.Resources.MemorySwap = a //nolint:staticcheck
}
client, err := meta.(*ProviderConfig).MakeClient(ctx, d)
if err != nil {
return diag.FromErr(fmt.Errorf("failed to create Docker client: %w", err))
}
_, err = client.ContainerUpdate(ctx, d.Id(), updateConfig)
if err != nil {
return diag.Errorf("Unable to update a container: %v", err)
@ -977,6 +921,7 @@ func resourceDockerContainerUpdate(ctx context.Context, d *schema.ResourceData,
break
}
}
return nil
}

View file

@ -5,6 +5,7 @@ import (
"bytes"
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
@ -1562,6 +1563,44 @@ func TestAccDockerContainer_exitcode(t *testing.T) {
})
}
func TestAccDockerContainer_RecreateWhenStopped(t *testing.T) {
var c container.InspectResponse
resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
ProviderFactories: providerFactories,
Steps: []resource.TestStep{
{
Config: loadTestConfiguration(t, RESOURCE, "docker_container", "testAccDockerContainerRecreate"),
Check: resource.ComposeTestCheckFunc(
testAccContainerRunning("docker_container.redis-container-2", &c),
resource.TestCheckResourceAttr("docker_container.redis-container-2", "name", "redis-2"),
),
},
{
PreConfig: func() {
ctx := context.Background()
client, _ := testAccProvider.Meta().(*ProviderConfig).MakeClient(ctx, nil)
err := client.ContainerStop(ctx, "redis-2", container.StopOptions{})
if err != nil {
log.Fatalf("During preconfig container stopping: Error stopping container: %s", err)
}
},
Config: loadTestConfiguration(t, RESOURCE, "docker_container", "testAccDockerContainerRecreate"),
PlanOnly: true,
ExpectNonEmptyPlan: true,
},
{
Config: loadTestConfiguration(t, RESOURCE, "docker_container", "testAccDockerContainerRecreate"),
Check: resource.ComposeTestCheckFunc(
testAccContainerRunning("docker_container.redis-container-2", &c),
resource.TestCheckResourceAttr("docker_container.redis-container-2", "name", "redis-2"),
),
},
},
})
}
func TestAccDockerContainer_ipv4address(t *testing.T) {
var c container.InspectResponse

View file

@ -0,0 +1,18 @@
data "docker_registry_image" "redis" {
name = "redis:7-alpine"
}
resource "docker_image" "redis" {
name = "redis:7-alpine"
keep_locally = true
pull_triggers = [data.docker_registry_image.redis.sha256_digest]
}
resource "docker_container" "redis-container-2" {
image = docker_image.redis.image_id
name = "redis-2"
hostname = "redis-2"
restart = "unless-stopped"
}