AWS SD: ECS Bridge Mode

Previously the AWS SD ECS Role only discovered instances that used
`awsvpc` network mode, which attaches a dedicated Elastic Network
Interface (ENI). This change adds in additional logic so that we
discover instances that are using `host` and `bridge` networking modes,
where the IP address is that of the EC2 instance that is hosting the
container. Also this change exposes a number of additional labels that
relate to the EC2 instance when the launch type is `EC2`.

Signed-off-by: matt-gp <small_minority@hotmail.com>
This commit is contained in:
matt-gp 2025-11-21 22:23:08 +00:00
parent 041228bfcd
commit 9b6e244b83
No known key found for this signature in database
3 changed files with 766 additions and 47 deletions

View file

@ -28,6 +28,7 @@ import (
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
"github.com/aws/aws-sdk-go-v2/service/ec2"
"github.com/aws/aws-sdk-go-v2/service/ecs"
"github.com/aws/aws-sdk-go-v2/service/ecs/types"
"github.com/aws/aws-sdk-go-v2/service/sts"
@ -44,31 +45,37 @@ import (
)
const (
ecsLabel = model.MetaLabelPrefix + "ecs_"
ecsLabelCluster = ecsLabel + "cluster"
ecsLabelClusterARN = ecsLabel + "cluster_arn"
ecsLabelService = ecsLabel + "service"
ecsLabelServiceARN = ecsLabel + "service_arn"
ecsLabelServiceStatus = ecsLabel + "service_status"
ecsLabelTaskGroup = ecsLabel + "task_group"
ecsLabelTaskARN = ecsLabel + "task_arn"
ecsLabelTaskDefinition = ecsLabel + "task_definition"
ecsLabelRegion = ecsLabel + "region"
ecsLabelAvailabilityZone = ecsLabel + "availability_zone"
ecsLabelAZID = ecsLabel + "availability_zone_id"
ecsLabelSubnetID = ecsLabel + "subnet_id"
ecsLabelIPAddress = ecsLabel + "ip_address"
ecsLabelLaunchType = ecsLabel + "launch_type"
ecsLabelDesiredStatus = ecsLabel + "desired_status"
ecsLabelLastStatus = ecsLabel + "last_status"
ecsLabelHealthStatus = ecsLabel + "health_status"
ecsLabelPlatformFamily = ecsLabel + "platform_family"
ecsLabelPlatformVersion = ecsLabel + "platform_version"
ecsLabelTag = ecsLabel + "tag_"
ecsLabelTagCluster = ecsLabelTag + "cluster_"
ecsLabelTagService = ecsLabelTag + "service_"
ecsLabelTagTask = ecsLabelTag + "task_"
ecsLabelSeparator = ","
ecsLabel = model.MetaLabelPrefix + "ecs_"
ecsLabelCluster = ecsLabel + "cluster"
ecsLabelClusterARN = ecsLabel + "cluster_arn"
ecsLabelService = ecsLabel + "service"
ecsLabelServiceARN = ecsLabel + "service_arn"
ecsLabelServiceStatus = ecsLabel + "service_status"
ecsLabelTaskGroup = ecsLabel + "task_group"
ecsLabelTaskARN = ecsLabel + "task_arn"
ecsLabelTaskDefinition = ecsLabel + "task_definition"
ecsLabelRegion = ecsLabel + "region"
ecsLabelAvailabilityZone = ecsLabel + "availability_zone"
ecsLabelSubnetID = ecsLabel + "subnet_id"
ecsLabelIPAddress = ecsLabel + "ip_address"
ecsLabelLaunchType = ecsLabel + "launch_type"
ecsLabelDesiredStatus = ecsLabel + "desired_status"
ecsLabelLastStatus = ecsLabel + "last_status"
ecsLabelHealthStatus = ecsLabel + "health_status"
ecsLabelPlatformFamily = ecsLabel + "platform_family"
ecsLabelPlatformVersion = ecsLabel + "platform_version"
ecsLabelTag = ecsLabel + "tag_"
ecsLabelTagCluster = ecsLabelTag + "cluster_"
ecsLabelTagService = ecsLabelTag + "service_"
ecsLabelTagTask = ecsLabelTag + "task_"
ecsLabelTagEC2 = ecsLabelTag + "ec2_"
ecsLabelNetworkMode = ecsLabel + "network_mode"
ecsLabelContainerInstanceARN = ecsLabel + "container_instance_arn"
ecsLabelEC2InstanceID = ecsLabel + "ec2_instance_id"
ecsLabelEC2InstanceType = ecsLabel + "ec2_instance_type"
ecsLabelEC2InstancePrivateIP = ecsLabel + "ec2_instance_private_ip"
ecsLabelEC2InstancePublicIP = ecsLabel + "ec2_instance_public_ip"
ecsLabelPublicIP = ecsLabel + "public_ip"
)
// DefaultECSSDConfig is the default ECS SD configuration.
@ -153,6 +160,12 @@ type ecsClient interface {
DescribeServices(context.Context, *ecs.DescribeServicesInput, ...func(*ecs.Options)) (*ecs.DescribeServicesOutput, error)
ListTasks(context.Context, *ecs.ListTasksInput, ...func(*ecs.Options)) (*ecs.ListTasksOutput, error)
DescribeTasks(context.Context, *ecs.DescribeTasksInput, ...func(*ecs.Options)) (*ecs.DescribeTasksOutput, error)
DescribeContainerInstances(context.Context, *ecs.DescribeContainerInstancesInput, ...func(*ecs.Options)) (*ecs.DescribeContainerInstancesOutput, error)
}
type ecsEC2Client interface {
DescribeInstances(context.Context, *ec2.DescribeInstancesInput, ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error)
DescribeNetworkInterfaces(context.Context, *ec2.DescribeNetworkInterfacesInput, ...func(*ec2.Options)) (*ec2.DescribeNetworkInterfacesOutput, error)
}
// ECSDiscovery periodically performs ECS-SD requests. It implements
@ -162,6 +175,7 @@ type ECSDiscovery struct {
logger *slog.Logger
cfg *ECSSDConfig
ecs ecsClient
ec2 ecsEC2Client
}
// NewECSDiscovery returns a new ECSDiscovery which periodically refreshes its targets.
@ -191,7 +205,7 @@ func NewECSDiscovery(conf *ECSSDConfig, opts discovery.DiscovererOptions) (*ECSD
}
func (d *ECSDiscovery) initEcsClient(ctx context.Context) error {
if d.ecs != nil {
if d.ecs != nil && d.ec2 != nil {
return nil
}
@ -240,6 +254,10 @@ func (d *ECSDiscovery) initEcsClient(ctx context.Context) error {
options.HTTPClient = client
})
d.ec2 = ec2.NewFromConfig(cfg, func(options *ec2.Options) {
options.HTTPClient = client
})
// Test credentials by making a simple API call
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
@ -458,6 +476,113 @@ func (d *ECSDiscovery) describeTasks(ctx context.Context, clusterARN string, tas
return tasks, errg.Wait()
}
// describeContainerInstances returns a map of container instance ARN to EC2 instance ID
// Uses batching to respect AWS API limits (100 container instances per request).
func (d *ECSDiscovery) describeContainerInstances(ctx context.Context, clusterARN string, containerInstanceARNs []string) (map[string]string, error) {
if len(containerInstanceARNs) == 0 {
return make(map[string]string), nil
}
containerInstToEC2 := make(map[string]string)
batchSize := 100 // AWS API limit
for _, batch := range batchSlice(containerInstanceARNs, batchSize) {
resp, err := d.ecs.DescribeContainerInstances(ctx, &ecs.DescribeContainerInstancesInput{
Cluster: aws.String(clusterARN),
ContainerInstances: batch,
})
if err != nil {
return nil, fmt.Errorf("could not describe container instances: %w", err)
}
for _, ci := range resp.ContainerInstances {
if ci.ContainerInstanceArn != nil && ci.Ec2InstanceId != nil {
containerInstToEC2[*ci.ContainerInstanceArn] = *ci.Ec2InstanceId
}
}
}
return containerInstToEC2, nil
}
// ec2InstanceInfo holds information retrieved from EC2 DescribeInstances.
type ec2InstanceInfo struct {
privateIP string
publicIP string
subnetID string
instanceType string
tags map[string]string
}
// describeEC2Instances returns a map of EC2 instance ID to instance information.
func (d *ECSDiscovery) describeEC2Instances(ctx context.Context, instanceIDs []string) (map[string]ec2InstanceInfo, error) {
if len(instanceIDs) == 0 {
return make(map[string]ec2InstanceInfo), nil
}
instanceInfo := make(map[string]ec2InstanceInfo)
resp, err := d.ec2.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
InstanceIds: instanceIDs,
})
if err != nil {
return nil, fmt.Errorf("could not describe EC2 instances: %w", err)
}
for _, reservation := range resp.Reservations {
for _, instance := range reservation.Instances {
if instance.InstanceId != nil && instance.PrivateIpAddress != nil {
info := ec2InstanceInfo{
privateIP: *instance.PrivateIpAddress,
tags: make(map[string]string),
}
if instance.PublicIpAddress != nil {
info.publicIP = *instance.PublicIpAddress
}
if instance.SubnetId != nil {
info.subnetID = *instance.SubnetId
}
if instance.InstanceType != "" {
info.instanceType = string(instance.InstanceType)
}
// Collect EC2 instance tags
for _, tag := range instance.Tags {
if tag.Key != nil && tag.Value != nil {
info.tags[*tag.Key] = *tag.Value
}
}
instanceInfo[*instance.InstanceId] = info
}
}
}
return instanceInfo, nil
}
// describeNetworkInterfaces returns a map of ENI ID to public IP address.
func (d *ECSDiscovery) describeNetworkInterfaces(ctx context.Context, eniIDs []string) (map[string]string, error) {
if len(eniIDs) == 0 {
return make(map[string]string), nil
}
eniToPublicIP := make(map[string]string)
resp, err := d.ec2.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{
NetworkInterfaceIds: eniIDs,
})
if err != nil {
return nil, fmt.Errorf("could not describe network interfaces: %w", err)
}
for _, eni := range resp.NetworkInterfaces {
if eni.NetworkInterfaceId != nil && eni.Association != nil && eni.Association.PublicIp != nil {
eniToPublicIP[*eni.NetworkInterfaceId] = *eni.Association.PublicIp
}
}
return eniToPublicIP, nil
}
func batchSlice[T any](a []T, size int) [][]T {
batches := make([][]T, 0, len(a)/size+1)
for i := 0; i < len(a); i += size {
@ -554,8 +679,76 @@ func (d *ECSDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
if tasks, exists := serviceTaskMap[serviceArn]; exists {
var serviceTargets []model.LabelSet
// Collect container instance ARNs for all EC2 tasks to get instance type
var containerInstanceARNs []string
taskToContainerInstance := make(map[string]string)
// Collect ENI IDs for awsvpc tasks to get public IPs
var eniIDs []string
taskToENI := make(map[string]string)
for _, task := range tasks {
// Find the ENI attachment to get the private IP address
// Collect container instance ARN for any task running on EC2
if task.ContainerInstanceArn != nil {
containerInstanceARNs = append(containerInstanceARNs, *task.ContainerInstanceArn)
taskToContainerInstance[*task.TaskArn] = *task.ContainerInstanceArn
}
// Collect ENI IDs from awsvpc tasks
for _, attachment := range task.Attachments {
if attachment.Type != nil && *attachment.Type == "ElasticNetworkInterface" {
for _, detail := range attachment.Details {
if detail.Name != nil && *detail.Name == "networkInterfaceId" && detail.Value != nil {
eniIDs = append(eniIDs, *detail.Value)
taskToENI[*task.TaskArn] = *detail.Value
break
}
}
break
}
}
}
// Batch describe container instances and EC2 instances to get instance type and other metadata
var containerInstToEC2 map[string]string
var ec2InstInfo map[string]ec2InstanceInfo
if len(containerInstanceARNs) > 0 {
var err error
containerInstToEC2, err = d.describeContainerInstances(ctx, clusterArn, containerInstanceARNs)
if err != nil {
d.logger.Error("Failed to describe container instances", "cluster", clusterArn, "error", err)
// Continue processing tasks
} else {
// Collect unique EC2 instance IDs
ec2InstanceIDs := make([]string, 0, len(containerInstToEC2))
for _, ec2ID := range containerInstToEC2 {
ec2InstanceIDs = append(ec2InstanceIDs, ec2ID)
}
// Batch describe EC2 instances
ec2InstInfo, err = d.describeEC2Instances(ctx, ec2InstanceIDs)
if err != nil {
d.logger.Error("Failed to describe EC2 instances", "cluster", clusterArn, "error", err)
}
}
}
// Batch describe ENIs to get public IPs for awsvpc tasks
var eniToPublicIP map[string]string
if len(eniIDs) > 0 {
var err error
eniToPublicIP, err = d.describeNetworkInterfaces(ctx, eniIDs)
if err != nil {
d.logger.Error("Failed to describe network interfaces", "cluster", clusterArn, "error", err)
// Continue processing without ENI public IPs
}
}
for _, task := range tasks {
var ipAddress, subnetID, publicIP string
var networkMode string
var ec2InstanceID, ec2InstanceType, ec2InstancePrivateIP, ec2InstancePublicIP string
// Try to get IP from ENI attachment (awsvpc mode)
var eniAttachment *types.Attachment
for _, attachment := range task.Attachments {
if attachment.Type != nil && *attachment.Type == "ElasticNetworkInterface" {
@ -563,19 +756,65 @@ func (d *ECSDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
break
}
}
if eniAttachment == nil {
continue
}
var ipAddress, subnetID string
for _, detail := range eniAttachment.Details {
switch *detail.Name {
case "privateIPv4Address":
ipAddress = *detail.Value
case "subnetId":
subnetID = *detail.Value
if eniAttachment != nil {
// awsvpc networking mode - get IP from ENI
networkMode = "awsvpc"
for _, detail := range eniAttachment.Details {
switch *detail.Name {
case "privateIPv4Address":
ipAddress = *detail.Value
case "subnetId":
subnetID = *detail.Value
}
}
// Get public IP from ENI if available
if eniID, ok := taskToENI[*task.TaskArn]; ok {
if eniPublicIP, ok := eniToPublicIP[eniID]; ok {
publicIP = eniPublicIP
}
}
} else if task.ContainerInstanceArn != nil {
// bridge/host networking mode - need to get EC2 instance IP and subnet
networkMode = "bridge"
containerInstARN, ok := taskToContainerInstance[*task.TaskArn]
if ok {
ec2InstanceID, ok = containerInstToEC2[containerInstARN]
if ok {
info, ok := ec2InstInfo[ec2InstanceID]
if ok {
ipAddress = info.privateIP
publicIP = info.publicIP
subnetID = info.subnetID
ec2InstanceType = info.instanceType
ec2InstancePrivateIP = info.privateIP
ec2InstancePublicIP = info.publicIP
} else {
d.logger.Debug("EC2 instance info not found", "instance", ec2InstanceID, "task", *task.TaskArn)
}
} else {
d.logger.Debug("Container instance not found in map", "arn", containerInstARN, "task", *task.TaskArn)
}
}
}
// Get EC2 instance metadata for awsvpc tasks running on EC2
// We want the instance type and the host IPs for advanced use cases
if networkMode == "awsvpc" && task.ContainerInstanceArn != nil {
containerInstARN, ok := taskToContainerInstance[*task.TaskArn]
if ok {
ec2InstanceID, ok = containerInstToEC2[containerInstARN]
if ok {
info, ok := ec2InstInfo[ec2InstanceID]
if ok {
ec2InstanceType = info.instanceType
ec2InstancePrivateIP = info.privateIP
ec2InstancePublicIP = info.publicIP
}
}
}
}
if ipAddress == "" {
continue
}
@ -589,13 +828,38 @@ func (d *ECSDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
ecsLabelTaskARN: model.LabelValue(*task.TaskArn),
ecsLabelTaskDefinition: model.LabelValue(*task.TaskDefinitionArn),
ecsLabelIPAddress: model.LabelValue(ipAddress),
ecsLabelSubnetID: model.LabelValue(subnetID),
ecsLabelRegion: model.LabelValue(d.cfg.Region),
ecsLabelLaunchType: model.LabelValue(task.LaunchType),
ecsLabelAvailabilityZone: model.LabelValue(*task.AvailabilityZone),
ecsLabelDesiredStatus: model.LabelValue(*task.DesiredStatus),
ecsLabelLastStatus: model.LabelValue(*task.LastStatus),
ecsLabelHealthStatus: model.LabelValue(task.HealthStatus),
ecsLabelNetworkMode: model.LabelValue(networkMode),
}
// Add subnet ID when available (awsvpc mode from ENI, bridge/host from EC2 instance)
if subnetID != "" {
labels[ecsLabelSubnetID] = model.LabelValue(subnetID)
}
// Add container instance and EC2 instance info for EC2 launch type
if task.ContainerInstanceArn != nil {
labels[ecsLabelContainerInstanceARN] = model.LabelValue(*task.ContainerInstanceArn)
}
if ec2InstanceID != "" {
labels[ecsLabelEC2InstanceID] = model.LabelValue(ec2InstanceID)
}
if ec2InstanceType != "" {
labels[ecsLabelEC2InstanceType] = model.LabelValue(ec2InstanceType)
}
if ec2InstancePrivateIP != "" {
labels[ecsLabelEC2InstancePrivateIP] = model.LabelValue(ec2InstancePrivateIP)
}
if ec2InstancePublicIP != "" {
labels[ecsLabelEC2InstancePublicIP] = model.LabelValue(ec2InstancePublicIP)
}
if publicIP != "" {
labels[ecsLabelPublicIP] = model.LabelValue(publicIP)
}
if task.PlatformFamily != nil {
@ -634,6 +898,15 @@ func (d *ECSDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
}
}
// Add EC2 instance tags (if running on EC2)
if ec2InstanceID != "" {
if info, ok := ec2InstInfo[ec2InstanceID]; ok {
for tagKey, tagValue := range info.tags {
labels[model.LabelName(ecsLabelTagEC2+strutil.SanitizeLabelName(tagKey))] = model.LabelValue(tagValue)
}
}
}
serviceTargets = append(serviceTargets, labels)
}

View file

@ -17,6 +17,8 @@ import (
"context"
"testing"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2Types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/aws/aws-sdk-go-v2/service/ecs"
ecsTypes "github.com/aws/aws-sdk-go-v2/service/ecs/types"
"github.com/prometheus/common/model"
@ -29,9 +31,12 @@ import (
type ecsDataStore struct {
region string
clusters []ecsTypes.Cluster
services []ecsTypes.Service
tasks []ecsTypes.Task
clusters []ecsTypes.Cluster
services []ecsTypes.Service
tasks []ecsTypes.Task
containerInstances []ecsTypes.ContainerInstance
ec2Instances map[string]ec2InstanceInfo // EC2 instance ID to instance info
eniPublicIPs map[string]string // ENI ID to public IP
}
func TestECSDiscoveryListClusterARNs(t *testing.T) {
@ -716,6 +721,7 @@ func TestECSDiscoveryRefresh(t *testing.T) {
Details: []ecsTypes.KeyValuePair{
{Name: strptr("subnetId"), Value: strptr("subnet-12345")},
{Name: strptr("privateIPv4Address"), Value: strptr("10.0.1.100")},
{Name: strptr("networkInterfaceId"), Value: strptr("eni-fargate-123")},
},
},
},
@ -724,6 +730,9 @@ func TestECSDiscoveryRefresh(t *testing.T) {
},
},
},
eniPublicIPs: map[string]string{
"eni-fargate-123": "52.1.2.3",
},
},
expected: []*targetgroup.Group{
{
@ -749,6 +758,8 @@ func TestECSDiscoveryRefresh(t *testing.T) {
"__meta_ecs_health_status": model.LabelValue("HEALTHY"),
"__meta_ecs_platform_family": model.LabelValue("Linux"),
"__meta_ecs_platform_version": model.LabelValue("1.4.0"),
"__meta_ecs_network_mode": model.LabelValue("awsvpc"),
"__meta_ecs_public_ip": model.LabelValue("52.1.2.3"),
"__meta_ecs_tag_cluster_Environment": model.LabelValue("test"),
"__meta_ecs_tag_service_App": model.LabelValue("web"),
"__meta_ecs_tag_task_Version": model.LabelValue("v1.0"),
@ -825,14 +836,345 @@ func TestECSDiscoveryRefresh(t *testing.T) {
},
},
},
{
name: "TaskWithBridgeNetworking",
ecsData: &ecsDataStore{
region: "us-west-2",
clusters: []ecsTypes.Cluster{
{
ClusterName: strptr("test-cluster"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/test-cluster"),
Status: strptr("ACTIVE"),
},
},
services: []ecsTypes.Service{
{
ServiceName: strptr("bridge-service"),
ServiceArn: strptr("arn:aws:ecs:us-west-2:123456789012:service/test-cluster/bridge-service"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/test-cluster"),
Status: strptr("ACTIVE"),
},
},
tasks: []ecsTypes.Task{
{
TaskArn: strptr("arn:aws:ecs:us-west-2:123456789012:task/test-cluster/task-bridge"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/test-cluster"),
TaskDefinitionArn: strptr("arn:aws:ecs:us-west-2:123456789012:task-definition/bridge-task:1"),
Group: strptr("service:bridge-service"),
LaunchType: ecsTypes.LaunchTypeEc2,
LastStatus: strptr("RUNNING"),
DesiredStatus: strptr("RUNNING"),
HealthStatus: ecsTypes.HealthStatusHealthy,
AvailabilityZone: strptr("us-west-2a"),
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/test-cluster/abc123"),
Attachments: []ecsTypes.Attachment{},
},
},
containerInstances: []ecsTypes.ContainerInstance{
{
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/test-cluster/abc123"),
Ec2InstanceId: strptr("i-1234567890abcdef0"),
Status: strptr("ACTIVE"),
},
},
ec2Instances: map[string]ec2InstanceInfo{
"i-1234567890abcdef0": {
privateIP: "10.0.1.50",
publicIP: "54.1.2.3",
subnetID: "subnet-bridge-1",
instanceType: "t3.medium",
tags: map[string]string{
"Name": "ecs-host-1",
"Environment": "production",
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "us-west-2",
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue("10.0.1.50:80"),
"__meta_ecs_cluster": model.LabelValue("test-cluster"),
"__meta_ecs_cluster_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:cluster/test-cluster"),
"__meta_ecs_service": model.LabelValue("bridge-service"),
"__meta_ecs_service_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:service/test-cluster/bridge-service"),
"__meta_ecs_service_status": model.LabelValue("ACTIVE"),
"__meta_ecs_task_group": model.LabelValue("service:bridge-service"),
"__meta_ecs_task_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task/test-cluster/task-bridge"),
"__meta_ecs_task_definition": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task-definition/bridge-task:1"),
"__meta_ecs_region": model.LabelValue("us-west-2"),
"__meta_ecs_availability_zone": model.LabelValue("us-west-2a"),
"__meta_ecs_ip_address": model.LabelValue("10.0.1.50"),
"__meta_ecs_subnet_id": model.LabelValue("subnet-bridge-1"),
"__meta_ecs_launch_type": model.LabelValue("EC2"),
"__meta_ecs_desired_status": model.LabelValue("RUNNING"),
"__meta_ecs_last_status": model.LabelValue("RUNNING"),
"__meta_ecs_health_status": model.LabelValue("HEALTHY"),
"__meta_ecs_network_mode": model.LabelValue("bridge"),
"__meta_ecs_container_instance_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:container-instance/test-cluster/abc123"),
"__meta_ecs_ec2_instance_id": model.LabelValue("i-1234567890abcdef0"),
"__meta_ecs_ec2_instance_type": model.LabelValue("t3.medium"),
"__meta_ecs_ec2_instance_private_ip": model.LabelValue("10.0.1.50"),
"__meta_ecs_ec2_instance_public_ip": model.LabelValue("54.1.2.3"),
"__meta_ecs_public_ip": model.LabelValue("54.1.2.3"),
"__meta_ecs_tag_ec2_Name": model.LabelValue("ecs-host-1"),
"__meta_ecs_tag_ec2_Environment": model.LabelValue("production"),
},
},
},
},
},
{
name: "MixedNetworkingModes",
ecsData: &ecsDataStore{
region: "us-west-2",
clusters: []ecsTypes.Cluster{
{
ClusterName: strptr("mixed-cluster"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
Status: strptr("ACTIVE"),
},
},
services: []ecsTypes.Service{
{
ServiceName: strptr("mixed-service"),
ServiceArn: strptr("arn:aws:ecs:us-west-2:123456789012:service/mixed-cluster/mixed-service"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
Status: strptr("ACTIVE"),
},
},
tasks: []ecsTypes.Task{
{
TaskArn: strptr("arn:aws:ecs:us-west-2:123456789012:task/mixed-cluster/task-awsvpc"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
TaskDefinitionArn: strptr("arn:aws:ecs:us-west-2:123456789012:task-definition/awsvpc-task:1"),
Group: strptr("service:mixed-service"),
LaunchType: ecsTypes.LaunchTypeFargate,
LastStatus: strptr("RUNNING"),
DesiredStatus: strptr("RUNNING"),
HealthStatus: ecsTypes.HealthStatusHealthy,
AvailabilityZone: strptr("us-west-2a"),
Attachments: []ecsTypes.Attachment{
{
Type: strptr("ElasticNetworkInterface"),
Details: []ecsTypes.KeyValuePair{
{Name: strptr("subnetId"), Value: strptr("subnet-12345")},
{Name: strptr("privateIPv4Address"), Value: strptr("10.0.2.100")},
{Name: strptr("networkInterfaceId"), Value: strptr("eni-mixed-awsvpc")},
},
},
},
},
{
TaskArn: strptr("arn:aws:ecs:us-west-2:123456789012:task/mixed-cluster/task-bridge"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
TaskDefinitionArn: strptr("arn:aws:ecs:us-west-2:123456789012:task-definition/bridge-task:1"),
Group: strptr("service:mixed-service"),
LaunchType: ecsTypes.LaunchTypeEc2,
LastStatus: strptr("RUNNING"),
DesiredStatus: strptr("RUNNING"),
HealthStatus: ecsTypes.HealthStatusHealthy,
AvailabilityZone: strptr("us-west-2b"),
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/mixed-cluster/xyz789"),
Attachments: []ecsTypes.Attachment{},
},
},
containerInstances: []ecsTypes.ContainerInstance{
{
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/mixed-cluster/xyz789"),
Ec2InstanceId: strptr("i-0987654321fedcba0"),
Status: strptr("ACTIVE"),
},
},
ec2Instances: map[string]ec2InstanceInfo{
"i-0987654321fedcba0": {
privateIP: "10.0.1.75",
publicIP: "54.2.3.4",
subnetID: "subnet-bridge-2",
instanceType: "t3.large",
tags: map[string]string{
"Name": "mixed-host",
"Team": "platform",
},
},
},
eniPublicIPs: map[string]string{
"eni-mixed-awsvpc": "52.2.3.4",
},
},
expected: []*targetgroup.Group{
{
Source: "us-west-2",
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue("10.0.2.100:80"),
"__meta_ecs_cluster": model.LabelValue("mixed-cluster"),
"__meta_ecs_cluster_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
"__meta_ecs_service": model.LabelValue("mixed-service"),
"__meta_ecs_service_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:service/mixed-cluster/mixed-service"),
"__meta_ecs_service_status": model.LabelValue("ACTIVE"),
"__meta_ecs_task_group": model.LabelValue("service:mixed-service"),
"__meta_ecs_task_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task/mixed-cluster/task-awsvpc"),
"__meta_ecs_task_definition": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task-definition/awsvpc-task:1"),
"__meta_ecs_region": model.LabelValue("us-west-2"),
"__meta_ecs_availability_zone": model.LabelValue("us-west-2a"),
"__meta_ecs_ip_address": model.LabelValue("10.0.2.100"),
"__meta_ecs_subnet_id": model.LabelValue("subnet-12345"),
"__meta_ecs_launch_type": model.LabelValue("FARGATE"),
"__meta_ecs_desired_status": model.LabelValue("RUNNING"),
"__meta_ecs_last_status": model.LabelValue("RUNNING"),
"__meta_ecs_health_status": model.LabelValue("HEALTHY"),
"__meta_ecs_network_mode": model.LabelValue("awsvpc"),
"__meta_ecs_public_ip": model.LabelValue("52.2.3.4"),
},
{
model.AddressLabel: model.LabelValue("10.0.1.75:80"),
"__meta_ecs_cluster": model.LabelValue("mixed-cluster"),
"__meta_ecs_cluster_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:cluster/mixed-cluster"),
"__meta_ecs_service": model.LabelValue("mixed-service"),
"__meta_ecs_service_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:service/mixed-cluster/mixed-service"),
"__meta_ecs_service_status": model.LabelValue("ACTIVE"),
"__meta_ecs_task_group": model.LabelValue("service:mixed-service"),
"__meta_ecs_task_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task/mixed-cluster/task-bridge"),
"__meta_ecs_task_definition": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task-definition/bridge-task:1"),
"__meta_ecs_region": model.LabelValue("us-west-2"),
"__meta_ecs_availability_zone": model.LabelValue("us-west-2b"),
"__meta_ecs_ip_address": model.LabelValue("10.0.1.75"),
"__meta_ecs_subnet_id": model.LabelValue("subnet-bridge-2"),
"__meta_ecs_launch_type": model.LabelValue("EC2"),
"__meta_ecs_desired_status": model.LabelValue("RUNNING"),
"__meta_ecs_last_status": model.LabelValue("RUNNING"),
"__meta_ecs_health_status": model.LabelValue("HEALTHY"),
"__meta_ecs_network_mode": model.LabelValue("bridge"),
"__meta_ecs_container_instance_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:container-instance/mixed-cluster/xyz789"),
"__meta_ecs_ec2_instance_id": model.LabelValue("i-0987654321fedcba0"),
"__meta_ecs_ec2_instance_type": model.LabelValue("t3.large"),
"__meta_ecs_ec2_instance_private_ip": model.LabelValue("10.0.1.75"),
"__meta_ecs_ec2_instance_public_ip": model.LabelValue("54.2.3.4"),
"__meta_ecs_public_ip": model.LabelValue("54.2.3.4"),
"__meta_ecs_tag_ec2_Name": model.LabelValue("mixed-host"),
"__meta_ecs_tag_ec2_Team": model.LabelValue("platform"),
},
},
},
},
},
{
name: "EC2WithAwsvpcNetworking",
ecsData: &ecsDataStore{
region: "us-west-2",
clusters: []ecsTypes.Cluster{
{
ClusterName: strptr("ec2-awsvpc-cluster"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/ec2-awsvpc-cluster"),
Status: strptr("ACTIVE"),
},
},
services: []ecsTypes.Service{
{
ServiceName: strptr("ec2-awsvpc-service"),
ServiceArn: strptr("arn:aws:ecs:us-west-2:123456789012:service/ec2-awsvpc-cluster/ec2-awsvpc-service"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/ec2-awsvpc-cluster"),
Status: strptr("ACTIVE"),
},
},
tasks: []ecsTypes.Task{
{
TaskArn: strptr("arn:aws:ecs:us-west-2:123456789012:task/ec2-awsvpc-cluster/task-ec2-awsvpc"),
ClusterArn: strptr("arn:aws:ecs:us-west-2:123456789012:cluster/ec2-awsvpc-cluster"),
TaskDefinitionArn: strptr("arn:aws:ecs:us-west-2:123456789012:task-definition/ec2-awsvpc-task:1"),
Group: strptr("service:ec2-awsvpc-service"),
LaunchType: ecsTypes.LaunchTypeEc2,
LastStatus: strptr("RUNNING"),
DesiredStatus: strptr("RUNNING"),
HealthStatus: ecsTypes.HealthStatusHealthy,
AvailabilityZone: strptr("us-west-2c"),
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/ec2-awsvpc-cluster/def456"),
// Has BOTH ENI attachment AND container instance ARN - should use ENI
Attachments: []ecsTypes.Attachment{
{
Type: strptr("ElasticNetworkInterface"),
Details: []ecsTypes.KeyValuePair{
{Name: strptr("subnetId"), Value: strptr("subnet-99999")},
{Name: strptr("privateIPv4Address"), Value: strptr("10.0.3.200")},
{Name: strptr("networkInterfaceId"), Value: strptr("eni-ec2-awsvpc")},
},
},
},
},
},
eniPublicIPs: map[string]string{
"eni-ec2-awsvpc": "52.3.4.5",
},
// Container instance data - IP should NOT be used, but instance type SHOULD be used
containerInstances: []ecsTypes.ContainerInstance{
{
ContainerInstanceArn: strptr("arn:aws:ecs:us-west-2:123456789012:container-instance/ec2-awsvpc-cluster/def456"),
Ec2InstanceId: strptr("i-ec2awsvpcinstance"),
Status: strptr("ACTIVE"),
},
},
ec2Instances: map[string]ec2InstanceInfo{
"i-ec2awsvpcinstance": {
privateIP: "10.0.9.99", // This IP should NOT be used (ENI IP is used instead)
publicIP: "54.3.4.5", // This public IP SHOULD be exposed
subnetID: "subnet-wrong", // This subnet should NOT be used (ENI subnet is used instead)
instanceType: "c5.2xlarge", // This instance type SHOULD be used
tags: map[string]string{
"Name": "ec2-awsvpc-host",
"Owner": "team-a",
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "us-west-2",
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue("10.0.3.200:80"),
"__meta_ecs_cluster": model.LabelValue("ec2-awsvpc-cluster"),
"__meta_ecs_cluster_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:cluster/ec2-awsvpc-cluster"),
"__meta_ecs_service": model.LabelValue("ec2-awsvpc-service"),
"__meta_ecs_service_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:service/ec2-awsvpc-cluster/ec2-awsvpc-service"),
"__meta_ecs_service_status": model.LabelValue("ACTIVE"),
"__meta_ecs_task_group": model.LabelValue("service:ec2-awsvpc-service"),
"__meta_ecs_task_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task/ec2-awsvpc-cluster/task-ec2-awsvpc"),
"__meta_ecs_task_definition": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:task-definition/ec2-awsvpc-task:1"),
"__meta_ecs_region": model.LabelValue("us-west-2"),
"__meta_ecs_availability_zone": model.LabelValue("us-west-2c"),
"__meta_ecs_ip_address": model.LabelValue("10.0.3.200"),
"__meta_ecs_subnet_id": model.LabelValue("subnet-99999"),
"__meta_ecs_launch_type": model.LabelValue("EC2"),
"__meta_ecs_desired_status": model.LabelValue("RUNNING"),
"__meta_ecs_last_status": model.LabelValue("RUNNING"),
"__meta_ecs_health_status": model.LabelValue("HEALTHY"),
"__meta_ecs_network_mode": model.LabelValue("awsvpc"),
"__meta_ecs_container_instance_arn": model.LabelValue("arn:aws:ecs:us-west-2:123456789012:container-instance/ec2-awsvpc-cluster/def456"),
"__meta_ecs_ec2_instance_id": model.LabelValue("i-ec2awsvpcinstance"),
"__meta_ecs_ec2_instance_type": model.LabelValue("c5.2xlarge"),
"__meta_ecs_ec2_instance_private_ip": model.LabelValue("10.0.9.99"),
"__meta_ecs_ec2_instance_public_ip": model.LabelValue("54.3.4.5"),
"__meta_ecs_public_ip": model.LabelValue("52.3.4.5"),
"__meta_ecs_tag_ec2_Name": model.LabelValue("ec2-awsvpc-host"),
"__meta_ecs_tag_ec2_Owner": model.LabelValue("team-a"),
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
client := newMockECSClient(tt.ecsData)
ecsClient := newMockECSClient(tt.ecsData)
ec2Client := newMockECSEC2Client(tt.ecsData.ec2Instances, tt.ecsData.eniPublicIPs)
d := &ECSDiscovery{
ecs: client,
ecs: ecsClient,
ec2: ec2Client,
cfg: &ECSSDConfig{
Region: tt.ecsData.region,
Port: 80,
@ -951,3 +1293,91 @@ func (m *mockECSClient) DescribeTasks(_ context.Context, input *ecs.DescribeTask
Tasks: tasks,
}, nil
}
func (m *mockECSClient) DescribeContainerInstances(_ context.Context, input *ecs.DescribeContainerInstancesInput, _ ...func(*ecs.Options)) (*ecs.DescribeContainerInstancesOutput, error) {
var containerInstances []ecsTypes.ContainerInstance
for _, ciArn := range input.ContainerInstances {
for _, ci := range m.ecsData.containerInstances {
if *ci.ContainerInstanceArn == ciArn {
containerInstances = append(containerInstances, ci)
break
}
}
}
return &ecs.DescribeContainerInstancesOutput{
ContainerInstances: containerInstances,
}, nil
}
// Mock EC2 client wrapper for ECS tests.
type mockECSEC2Client struct {
ec2Instances map[string]ec2InstanceInfo
eniPublicIPs map[string]string
}
func newMockECSEC2Client(ec2Instances map[string]ec2InstanceInfo, eniPublicIPs map[string]string) *mockECSEC2Client {
return &mockECSEC2Client{
ec2Instances: ec2Instances,
eniPublicIPs: eniPublicIPs,
}
}
func (m *mockECSEC2Client) DescribeInstances(_ context.Context, input *ec2.DescribeInstancesInput, _ ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
var reservations []ec2Types.Reservation
for _, instanceID := range input.InstanceIds {
if info, ok := m.ec2Instances[instanceID]; ok {
instance := ec2Types.Instance{
InstanceId: &instanceID,
PrivateIpAddress: &info.privateIP,
}
if info.publicIP != "" {
instance.PublicIpAddress = &info.publicIP
}
if info.subnetID != "" {
instance.SubnetId = &info.subnetID
}
if info.instanceType != "" {
instance.InstanceType = ec2Types.InstanceType(info.instanceType)
}
// Add tags
for tagKey, tagValue := range info.tags {
instance.Tags = append(instance.Tags, ec2Types.Tag{
Key: &tagKey,
Value: &tagValue,
})
}
reservation := ec2Types.Reservation{
Instances: []ec2Types.Instance{instance},
}
reservations = append(reservations, reservation)
}
}
return &ec2.DescribeInstancesOutput{
Reservations: reservations,
}, nil
}
func (m *mockECSEC2Client) DescribeNetworkInterfaces(_ context.Context, input *ec2.DescribeNetworkInterfacesInput, _ ...func(*ec2.Options)) (*ec2.DescribeNetworkInterfacesOutput, error) {
var networkInterfaces []ec2Types.NetworkInterface
for _, eniID := range input.NetworkInterfaceIds {
if publicIP, ok := m.eniPublicIPs[eniID]; ok {
eni := ec2Types.NetworkInterface{
NetworkInterfaceId: &eniID,
}
if publicIP != "" {
eni.Association = &ec2Types.NetworkInterfaceAssociation{
PublicIp: &publicIP,
}
}
networkInterfaces = append(networkInterfaces, eni)
}
}
return &ec2.DescribeNetworkInterfacesOutput{
NetworkInterfaces: networkInterfaces,
}, nil
}

View file

@ -919,11 +919,16 @@ The following meta labels are available on targets during [relabeling](#relabel_
#### `ecs`
The `ecs` role discovers targets from AWS ECS containers. The private IP address is used by default, but may be changed to
the public IP address with relabeling.
The `ecs` role discovers targets from AWS ECS containers.
The IAM credentials used must have the following permissions to discover
scrape targets:
ECS service discovery supports all ECS networking modes:
- **awsvpc mode** (Fargate and EC2 with ENI): Uses the task's private IP address from its elastic network interface
- **bridge mode** (EC2): Uses the EC2 host instance's private IP address
- **host mode** (EC2): Uses the EC2 host instance's private IP address
The private IP address is used by default, but may be changed to the public IP address with relabeling.
The IAM credentials used must have the following permissions to discover scrape targets:
- `ecs:ListClusters`
- `ecs:DescribeClusters`
@ -931,6 +936,9 @@ scrape targets:
- `ecs:DescribeServices`
- `ecs:ListTasks`
- `ecs:DescribeTasks`
- `ecs:DescribeContainerInstances` (required for EC2 launch type tasks)
- `ec2:DescribeInstances` (required for EC2 launch type tasks)
- `ec2:DescribeNetworkInterfaces` (required to get public IP for awsvpc mode tasks)
The following meta labels are available on targets during [relabeling](#relabel_config):
@ -952,9 +960,17 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_ecs_subnet_id`: the subnet ID where the task is running
* `__meta_ecs_availability_zone`: the availability zone where the task is running
* `__meta_ecs_region`: the AWS region
* `__meta_ecs_public_ip`: the public IP address (from ENI for awsvpc mode, from EC2 instance for bridge/host mode), if available
* `__meta_ecs_network_mode`: the network mode of the task (awsvpc or bridge)
* `__meta_ecs_container_instance_arn`: the ARN of the container instance (EC2 launch type only)
* `__meta_ecs_ec2_instance_id`: the EC2 instance ID (EC2 launch type only)
* `__meta_ecs_ec2_instance_type`: the EC2 instance type (EC2 launch type only)
* `__meta_ecs_ec2_instance_private_ip`: the private IP address of the EC2 instance (EC2 launch type only)
* `__meta_ecs_ec2_instance_public_ip`: the public IP address of the EC2 instance, if available (EC2 launch type only)
* `__meta_ecs_tag_cluster_<tagkey>`: each cluster tag value, keyed by tag name
* `__meta_ecs_tag_service_<tagkey>`: each service tag value, keyed by tag name
* `__meta_ecs_tag_task_<tagkey>`: each task tag value, keyed by tag name
* `__meta_ecs_tag_ec2_<tagkey>`: each EC2 instance tag value, keyed by tag name (EC2 launch type only)
See below for the configuration options for AWS discovery: