diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e399132cf..beeca084ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## 1.3.0-beta.0 / 2016-10-18 + +This is a breaking change to the Kubernetes service discovery. + +* [CHANGE] Rework Kubernetes SD. +* [FEATURE] Add support for interpolating `target_label`. +* [FEATURE] Add GCE metadata as Prometheus meta labels. +* [ENHANCEMENT] Add EC2 SD metrics. +* [ENHANCEMENT] Add Azure SD metrics. +* [ENHANCEMENT] Add fuzzy search to `/graph` textarea. +* [ENHANCEMENT] Always show instance labels on target page. +* [BUGFIX] Correctly handle on() in alerts. + ## 1.2.1 / 2016-10-10 * [BUGFIX] Count chunk evictions properly so that the server doesn't diff --git a/VERSION b/VERSION index 6085e94650..6989533d78 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.1 +1.3.0-beta.0 diff --git a/code-of-conduct.md b/code-of-conduct.md new file mode 100644 index 0000000000..9a1aff4127 --- /dev/null +++ b/code-of-conduct.md @@ -0,0 +1,3 @@ +## Prometheus Community Code of Conduct + +Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). diff --git a/config/config.go b/config/config.go index 4dd6578bcb..b02fdb5a69 100644 --- a/config/config.go +++ b/config/config.go @@ -31,6 +31,7 @@ var ( patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) + relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`) ) // Load parses the YAML input s into a Config. @@ -362,7 +363,6 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval } *c = *gc - return nil } @@ -907,6 +907,7 @@ type EC2SDConfig struct { SecretKey string `yaml:"secret_key,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Port int `yaml:"port"` + // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } @@ -936,6 +937,7 @@ type AzureSDConfig struct { ClientID string `yaml:"client_id,omitempty"` ClientSecret string `yaml:"client_secret,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` + // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } @@ -993,8 +995,9 @@ type RelabelConfig struct { Regex Regexp `yaml:"regex,omitempty"` // Modulus to take of the hash of concatenated values from the source labels. Modulus uint64 `yaml:"modulus,omitempty"` - // The label to which the resulting string is written in a replacement. - TargetLabel model.LabelName `yaml:"target_label,omitempty"` + // TargetLabel is the label to which the resulting string is written in a replacement. + // Regexp interpolation is allowed for the replace action. + TargetLabel string `yaml:"target_label,omitempty"` // Replacement is the regex replacement pattern to be used. Replacement string `yaml:"replacement,omitempty"` // Action is the action to be performed for the relabeling. @@ -1020,6 +1023,12 @@ func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) } + if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } + if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } return nil } diff --git a/config/config_test.go b/config/config_test.go index 44d5da4b2d..03fbfe79ee 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -207,6 +207,17 @@ var expectedConf = &Config{ Scheme: DefaultConsulSDConfig.Scheme, }, }, + + RelabelConfigs: []*RelabelConfig{ + { + SourceLabels: model.LabelNames{"__meta_sd_consul_tags"}, + Regex: MustNewRegexp("label:([^=]+)=([^,]+)"), + Separator: ",", + TargetLabel: "${1}", + Replacement: "${2}", + Action: RelabelReplace, + }, + }, }, { JobName: "service-z", @@ -364,7 +375,7 @@ func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. if _, err := LoadFile("testdata/global_timeout.good.yml"); err != nil { - t.Errorf("Error parsing %s: %s", "testdata/conf.good.yml", err) + t.Errorf("Error parsing %s: %s", "testdata/global_timeout.good.yml", err) } c, err := LoadFile("testdata/conf.good.yml") @@ -505,6 +516,34 @@ func TestEmptyGlobalBlock(t *testing.T) { } } +func TestTargetLabelValidity(t *testing.T) { + tests := []struct { + str string + valid bool + }{ + {"-label", false}, + {"label", true}, + {"label${1}", true}, + {"${1}label", true}, + {"${1}", true}, + {"${1}label", true}, + {"${", false}, + {"$", false}, + {"${}", false}, + {"foo${", false}, + {"$1", true}, + {"asd$2asd", true}, + {"-foo${1}bar-", false}, + {"_${1}_", true}, + {"foo${bar}foo", true}, + } + for _, test := range tests { + if relabelTarget.Match([]byte(test.str)) != test.valid { + t.Fatalf("Expected %q to be %v", test.str, test.valid) + } + } +} + func kubernetesSDHostURL() URL { tURL, _ := url.Parse("https://localhost:1234") return URL{URL: tURL} diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 8c2b5f4b67..e2387f074c 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -104,6 +104,13 @@ scrape_configs: - server: 'localhost:1234' services: ['nginx', 'cache', 'mysql'] + relabel_configs: + - source_labels: [__meta_sd_consul_tags] + separator: ',' + regex: label:([^=]+)=([^,]+) + target_label: ${1} + replacement: ${2} + - job_name: service-z tls_config: diff --git a/notifier/notifier.go b/notifier/notifier.go index a577e027e3..8f37cc8a52 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -290,6 +290,7 @@ func (n *Notifier) sendAll(alerts ...*model.Alert) int { } defer resp.Body.Close() + // Any HTTP status 2xx is OK. if resp.StatusCode/100 != 2 { return fmt.Errorf("bad response status %v", resp.Status) } diff --git a/promql/printer.go b/promql/printer.go index 21678a7d52..79141e06ef 100644 --- a/promql/printer.go +++ b/promql/printer.go @@ -163,7 +163,7 @@ func (node *BinaryExpr) String() string { matching := "" vm := node.VectorMatching - if vm != nil && len(vm.MatchingLabels) > 0 { + if vm != nil && (len(vm.MatchingLabels) > 0 || vm.On) { if vm.On { matching = fmt.Sprintf(" ON(%s)", vm.MatchingLabels) } else { diff --git a/promql/printer_test.go b/promql/printer_test.go index 52e53245bc..1715b54fcc 100644 --- a/promql/printer_test.go +++ b/promql/printer_test.go @@ -59,6 +59,10 @@ func TestExprString(t *testing.T) { inputs := []struct { in, out string }{ + { + in: `sum(task:errors:rate10s{job="s"}) BY ()`, + out: `sum(task:errors:rate10s{job="s"})`, + }, { in: `sum(task:errors:rate10s{job="s"}) BY (code)`, }, @@ -77,6 +81,9 @@ func TestExprString(t *testing.T) { { in: `count_values("value", task:errors:rate10s{job="s"})`, }, + { + in: `a - ON() c`, + }, { in: `a - ON(b) c`, }, @@ -92,6 +99,10 @@ func TestExprString(t *testing.T) { { in: `a - IGNORING(b) c`, }, + { + in: `a - IGNORING() c`, + out: `a - c`, + }, { in: `up > BOOL 0`, }, diff --git a/relabel/relabel.go b/relabel/relabel.go index 6ce37296c3..ff59045965 100644 --- a/relabel/relabel.go +++ b/relabel/relabel.go @@ -61,20 +61,20 @@ func relabel(labels model.LabelSet, cfg *config.RelabelConfig) model.LabelSet { if indexes == nil { break } - target := model.LabelName(cfg.Regex.ExpandString([]byte{}, string(cfg.TargetLabel), val, indexes)) + target := model.LabelName(cfg.Regex.ExpandString([]byte{}, cfg.TargetLabel, val, indexes)) if !target.IsValid() { - delete(labels, cfg.TargetLabel) + delete(labels, model.LabelName(cfg.TargetLabel)) break } res := cfg.Regex.ExpandString([]byte{}, cfg.Replacement, val, indexes) if len(res) == 0 { - delete(labels, cfg.TargetLabel) + delete(labels, model.LabelName(cfg.TargetLabel)) break } labels[target] = model.LabelValue(res) case config.RelabelHashMod: mod := sum64(md5.Sum([]byte(val))) % cfg.Modulus - labels[cfg.TargetLabel] = model.LabelValue(fmt.Sprintf("%d", mod)) + labels[model.LabelName(cfg.TargetLabel)] = model.LabelValue(fmt.Sprintf("%d", mod)) case config.RelabelLabelMap: out := make(model.LabelSet, len(labels)) // Take a copy to avoid infinite loops. diff --git a/relabel/relabel_test.go b/relabel/relabel_test.go index 4004fa96ff..28fa6e901b 100644 --- a/relabel/relabel_test.go +++ b/relabel/relabel_test.go @@ -38,7 +38,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "ch${1}-ch${1}", Action: config.RelabelReplace, @@ -61,7 +61,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a", "b"}, Regex: config.MustNewRegexp("f(.*);(.*)r"), - TargetLabel: model.LabelName("a"), + TargetLabel: "a", Separator: ";", Replacement: "b${1}${2}m", // boobam Action: config.RelabelReplace, @@ -69,7 +69,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"c", "a"}, Regex: config.MustNewRegexp("(b).*b(.*)ba(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "$1$2$2$3", Action: config.RelabelReplace, @@ -94,7 +94,7 @@ func TestRelabel(t *testing.T) { }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "ch$1-ch$1", Action: config.RelabelReplace, @@ -124,7 +124,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp(".*(b).*"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "$1", Action: config.RelabelReplace, @@ -202,7 +202,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f"), - TargetLabel: model.LabelName("b"), + TargetLabel: "b", Replacement: "bar", Action: config.RelabelReplace, }, @@ -220,7 +220,7 @@ func TestRelabel(t *testing.T) { relabel: []*config.RelabelConfig{ { SourceLabels: model.LabelNames{"c"}, - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Action: config.RelabelHashMod, Modulus: 1000, @@ -287,7 +287,7 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${2}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ @@ -305,7 +305,7 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${3}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ @@ -322,21 +322,21 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("${3}"), + TargetLabel: "${3}", }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("0${3}"), + TargetLabel: "0${3}", }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("-${3}"), + TargetLabel: "-${3}", }, }, output: model.LabelSet{ @@ -353,21 +353,21 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("(?:.+,|^)path:(/[^,]+).*"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("__metrics_path__"), + TargetLabel: "__metrics_path__", }, { SourceLabels: model.LabelNames{"__meta_sd_tags"}, Regex: config.MustNewRegexp("(?:.+,|^)job:([^,]+).*"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("job"), + TargetLabel: "job", }, { SourceLabels: model.LabelNames{"__meta_sd_tags"}, Regex: config.MustNewRegexp("(?:.+,|^)label:([^=]+)=([^,]+).*"), Action: config.RelabelReplace, Replacement: "${2}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ diff --git a/retrieval/discovery/azure.go b/retrieval/discovery/azure.go index 0f1334eb8f..a35f489b2e 100644 --- a/retrieval/discovery/azure.go +++ b/retrieval/discovery/azure.go @@ -23,6 +23,7 @@ import ( "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/go-autorest/autorest/azure" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -41,6 +42,26 @@ const ( azureLabelMachineTag = azureLabel + "machine_tag_" ) +var ( + azureSDScrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_azure_scrape_failures_total", + Help: "Number of Azure-SD scrape failures.", + }) + azureSDScrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_azure_scrape_duration_seconds", + Help: "The duration of a Azure-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(azureSDScrapeDuration) + prometheus.MustRegister(azureSDScrapeFailuresCount) +} + // AzureDiscovery periodically performs Azure-SD requests. It implements // the TargetProvider interface. type AzureDiscovery struct { @@ -135,8 +156,15 @@ func newAzureResourceFromID(id string) (azureResource, error) { }, nil } -func (ad *AzureDiscovery) refresh() (*config.TargetGroup, error) { - tg := &config.TargetGroup{} +func (ad *AzureDiscovery) refresh() (tg *config.TargetGroup, err error) { + t0 := time.Now() + defer func() { + azureSDScrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + azureSDScrapeFailuresCount.Inc() + } + }() + tg = &config.TargetGroup{} client, err := createAzureClient(*ad.cfg) if err != nil { return tg, fmt.Errorf("could not create Azure client: %s", err) diff --git a/retrieval/discovery/consul/consul.go b/retrieval/discovery/consul/consul.go index 75d81704b6..58b2da806c 100644 --- a/retrieval/discovery/consul/consul.go +++ b/retrieval/discovery/consul/consul.go @@ -21,6 +21,7 @@ import ( "time" consul "github.com/hashicorp/consul/api" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -48,8 +49,37 @@ const ( datacenterLabel = model.MetaLabelPrefix + "consul_dc" // serviceIDLabel is the name of the label containing the service ID. serviceIDLabel = model.MetaLabelPrefix + "consul_service_id" + + // Constants for instrumentation. + namespace = "prometheus" ) +var ( + rpcFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_failures_total", + Help: "The number of Consul RPC call failures.", + }) + rpcDuration = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_duration_seconds", + Help: "The duration of a Consul RPC call in seconds.", + }, + []string{"endpoint", "call"}, + ) +) + +func init() { + prometheus.MustRegister(rpcFailuresCount) + prometheus.MustRegister(rpcDuration) + + // Initialize metric vectors. + rpcDuration.WithLabelValues("catalog", "service") + rpcDuration.WithLabelValues("catalog", "services") +} + // Discovery retrieves target information from a Consul server // and updates them via watches. type Discovery struct { @@ -110,10 +140,12 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { var lastIndex uint64 for { catalog := cd.client.Catalog() + t0 := time.Now() srvs, meta, err := catalog.Services(&consul.QueryOptions{ WaitIndex: lastIndex, WaitTime: watchTimeout, }) + rpcDuration.WithLabelValues("catalog", "services").Observe(time.Since(t0).Seconds()) // We have to check the context at least once. The checks during channel sends // do not guarantee that. @@ -125,6 +157,7 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { if err != nil { log.Errorf("Error refreshing service list: %s", err) + rpcFailuresCount.Inc() time.Sleep(retryInterval) continue } @@ -202,10 +235,13 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG lastIndex := uint64(0) for { + t0 := time.Now() nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{ WaitIndex: lastIndex, WaitTime: watchTimeout, }) + rpcDuration.WithLabelValues("catalog", "service").Observe(time.Since(t0).Seconds()) + // Check the context before potentially falling in a continue-loop. select { case <-ctx.Done(): @@ -216,6 +252,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG if err != nil { log.Errorf("Error refreshing service %s: %s", srv.name, err) + rpcFailuresCount.Inc() time.Sleep(retryInterval) continue } diff --git a/retrieval/discovery/dns/dns.go b/retrieval/discovery/dns/dns.go index 960507a0c3..0869354ba5 100644 --- a/retrieval/discovery/dns/dns.go +++ b/retrieval/discovery/dns/dns.go @@ -42,13 +42,13 @@ var ( dnsSDLookupsCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "dns_sd_lookups_total", + Name: "sd_dns_lookups_total", Help: "The number of DNS-SD lookups.", }) dnsSDLookupFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "dns_sd_lookup_failures_total", + Name: "sd_dns_lookup_failures_total", Help: "The number of DNS-SD lookup failures.", }) ) diff --git a/retrieval/discovery/ec2.go b/retrieval/discovery/ec2.go index 1c376b5e3d..0d4d0fc2f9 100644 --- a/retrieval/discovery/ec2.go +++ b/retrieval/discovery/ec2.go @@ -22,6 +22,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/defaults" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -36,6 +37,7 @@ const ( ec2LabelAZ = ec2Label + "availability_zone" ec2LabelInstanceID = ec2Label + "instance_id" ec2LabelInstanceState = ec2Label + "instance_state" + ec2LabelInstanceType = ec2Label + "instance_type" ec2LabelPublicDNS = ec2Label + "public_dns_name" ec2LabelPublicIP = ec2Label + "public_ip" ec2LabelPrivateIP = ec2Label + "private_ip" @@ -45,6 +47,26 @@ const ( subnetSeparator = "," ) +var ( + ec2SDScrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_ec2_scrape_failures_total", + Help: "The number of EC2-SD scrape failures.", + }) + ec2SDScrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_ec2_scrape_duration_seconds", + Help: "The duration of a EC2-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(ec2SDScrapeFailuresCount) + prometheus.MustRegister(ec2SDScrapeDuration) +} + // EC2Discovery periodically performs EC2-SD requests. It implements // the TargetProvider interface. type EC2Discovery struct { @@ -99,12 +121,20 @@ func (ed *EC2Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup } } -func (ed *EC2Discovery) refresh() (*config.TargetGroup, error) { +func (ed *EC2Discovery) refresh() (tg *config.TargetGroup, err error) { + t0 := time.Now() + defer func() { + ec2SDScrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + ec2SDScrapeFailuresCount.Inc() + } + }() + ec2s := ec2.New(ed.aws) - tg := &config.TargetGroup{ + tg = &config.TargetGroup{ Source: *ed.aws.Region, } - if err := ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { + if err = ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { for _, r := range p.Reservations { for _, inst := range r.Instances { if inst.PrivateIpAddress == nil { @@ -124,6 +154,7 @@ func (ed *EC2Discovery) refresh() (*config.TargetGroup, error) { labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name) + labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType) if inst.VpcId != nil { labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId) diff --git a/retrieval/discovery/file.go b/retrieval/discovery/file.go index 40034944ad..ba7b9be916 100644 --- a/retrieval/discovery/file.go +++ b/retrieval/discovery/file.go @@ -21,6 +21,7 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -32,6 +33,26 @@ import ( const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath" +var ( + fileSDScanDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_file_scan_duration_seconds", + Help: "The duration of the File-SD scan in seconds.", + }) + fileSDReadErrorsCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_file_read_errors_total", + Help: "The number of File-SD read errors.", + }) +) + +func init() { + prometheus.MustRegister(fileSDScanDuration) + prometheus.MustRegister(fileSDReadErrorsCount) +} + // FileDiscovery provides service discovery functionality based // on files that contain target groups in JSON or YAML format. Refreshing // happens using file watches and periodic refreshes. @@ -173,10 +194,16 @@ func (fd *FileDiscovery) stop() { // refresh reads all files matching the discovery's patterns and sends the respective // updated target groups through the channel. func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) { + t0 := time.Now() + defer func() { + fileSDScanDuration.Observe(time.Since(t0).Seconds()) + }() + ref := map[string]int{} for _, p := range fd.listFiles() { tgroups, err := readFile(p) if err != nil { + fileSDReadErrorsCount.Inc() log.Errorf("Error reading file %q: %s", p, err) // Prevent deletion down below. ref[p] = fd.lastRefresh[p] diff --git a/retrieval/discovery/gce.go b/retrieval/discovery/gce.go index 16359ae316..23d19d1421 100644 --- a/retrieval/discovery/gce.go +++ b/retrieval/discovery/gce.go @@ -50,28 +50,21 @@ const ( ) var ( - gceSDScrapesCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "gce_sd_scrapes_total", - Help: "The number of GCE-SD scrapes.", - }) gceSDScrapeFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "gce_sd_scrape_failures_total", + Name: "sd_gce_scrape_failures_total", Help: "The number of GCE-SD scrape failures.", }) gceSDScrapeDuration = prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, - Name: "gce_sd_scrape_duration", + Name: "sd_gce_scrape_duration", Help: "The duration of a GCE-SD scrape in seconds.", }) ) func init() { - prometheus.MustRegister(gceSDScrapesCount) prometheus.MustRegister(gceSDScrapeFailuresCount) prometheus.MustRegister(gceSDScrapeDuration) } @@ -147,7 +140,6 @@ func (gd *GCEDiscovery) refresh() (tg *config.TargetGroup, err error) { t0 := time.Now() defer func() { gceSDScrapeDuration.Observe(time.Since(t0).Seconds()) - gceSDScrapesCount.Inc() if err != nil { gceSDScrapeFailuresCount.Inc() } diff --git a/retrieval/discovery/kubernetes/kubernetes.go b/retrieval/discovery/kubernetes/kubernetes.go index 9b9da790d4..ed228a5ad5 100644 --- a/retrieval/discovery/kubernetes/kubernetes.go +++ b/retrieval/discovery/kubernetes/kubernetes.go @@ -110,7 +110,7 @@ func (k *Kubernetes) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { rclient := k.client.Core().GetRESTClient() switch k.role { - case "endpoint": + case "endpoints": elw := cache.NewListWatchFromClient(rclient, "endpoints", api.NamespaceAll, nil) slw := cache.NewListWatchFromClient(rclient, "services", api.NamespaceAll, nil) plw := cache.NewListWatchFromClient(rclient, "pods", api.NamespaceAll, nil) diff --git a/retrieval/discovery/marathon/marathon.go b/retrieval/discovery/marathon/marathon.go index 55cfd30295..a1c8c6181c 100644 --- a/retrieval/discovery/marathon/marathon.go +++ b/retrieval/discovery/marathon/marathon.go @@ -24,6 +24,7 @@ import ( "golang.org/x/net/context" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" @@ -42,8 +43,31 @@ const ( imageLabel model.LabelName = metaLabelPrefix + "image" // taskLabel contains the mesos task name of the app instance. taskLabel model.LabelName = metaLabelPrefix + "task" + + // Constants for instrumentation. + namespace = "prometheus" ) +var ( + scrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_marathon_scrape_failures_total", + Help: "The number of Marathon-SD scrape failures.", + }) + scrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_marathon_scrape_duration_seconds", + Help: "The duration of a Marathon-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(scrapeFailuresCount) + prometheus.MustRegister(scrapeDuration) +} + const appListPath string = "/v2/apps/?embed=apps.tasks" // Discovery provides service discovery based on a Marathon instance. @@ -93,7 +117,15 @@ func (md *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { } } -func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) error { +func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) (err error) { + t0 := time.Now() + defer func() { + scrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + scrapeFailuresCount.Inc() + } + }() + targetMap, err := md.fetchTargetGroups() if err != nil { return err diff --git a/storage/local/chunk/varbit.go b/storage/local/chunk/varbit.go index f9d135e738..42de4adb21 100644 --- a/storage/local/chunk/varbit.go +++ b/storage/local/chunk/varbit.go @@ -518,7 +518,7 @@ func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) { return []Chunk{c}, nil } -// addLastSample isa a helper method only used by c.add() and in other helper +// addLastSample is a helper method only used by c.add() and in other helper // methods called by c.add(). It simply sets the given sample as the last sample // in the heador and declares the chunk closed. In other words, addLastSample // adds the very last sample added to this chunk ever, while setLastSample sets