From 3c8140f2e6e3b84b58e5787a4e7a6250c01e2177 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Tue, 18 Oct 2016 16:20:26 +0200 Subject: [PATCH 01/19] kubernetes: fix typo in endpoint switch case --- retrieval/discovery/kubernetes/kubernetes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retrieval/discovery/kubernetes/kubernetes.go b/retrieval/discovery/kubernetes/kubernetes.go index 9b9da790d4..ed228a5ad5 100644 --- a/retrieval/discovery/kubernetes/kubernetes.go +++ b/retrieval/discovery/kubernetes/kubernetes.go @@ -110,7 +110,7 @@ func (k *Kubernetes) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { rclient := k.client.Core().GetRESTClient() switch k.role { - case "endpoint": + case "endpoints": elw := cache.NewListWatchFromClient(rclient, "endpoints", api.NamespaceAll, nil) slw := cache.NewListWatchFromClient(rclient, "services", api.NamespaceAll, nil) plw := cache.NewListWatchFromClient(rclient, "pods", api.NamespaceAll, nil) From 182e17958ab53c9f27d4f01692d3abfd2e25de61 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Tue, 18 Oct 2016 20:14:38 +0200 Subject: [PATCH 02/19] Trivial spelling corrections and a small comment. --- config/config.go | 2 ++ config/config_test.go | 2 +- notifier/notifier.go | 1 + storage/local/chunk/varbit.go | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/config/config.go b/config/config.go index a340e0fed4..43fce96e9d 100644 --- a/config/config.go +++ b/config/config.go @@ -910,6 +910,7 @@ type EC2SDConfig struct { SecretKey string `yaml:"secret_key,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Port int `yaml:"port"` + // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } @@ -939,6 +940,7 @@ type AzureSDConfig struct { ClientID string `yaml:"client_id,omitempty"` ClientSecret string `yaml:"client_secret,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` + // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } diff --git a/config/config_test.go b/config/config_test.go index 17642f76ce..af456e5037 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -362,7 +362,7 @@ func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. if _, err := LoadFile("testdata/global_timeout.good.yml"); err != nil { - t.Errorf("Error parsing %s: %s", "testdata/conf.good.yml", err) + t.Errorf("Error parsing %s: %s", "testdata/global_timeout.good.yml", err) } c, err := LoadFile("testdata/conf.good.yml") diff --git a/notifier/notifier.go b/notifier/notifier.go index a577e027e3..8f37cc8a52 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -290,6 +290,7 @@ func (n *Notifier) sendAll(alerts ...*model.Alert) int { } defer resp.Body.Close() + // Any HTTP status 2xx is OK. if resp.StatusCode/100 != 2 { return fmt.Errorf("bad response status %v", resp.Status) } diff --git a/storage/local/chunk/varbit.go b/storage/local/chunk/varbit.go index f9d135e738..42de4adb21 100644 --- a/storage/local/chunk/varbit.go +++ b/storage/local/chunk/varbit.go @@ -518,7 +518,7 @@ func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) { return []Chunk{c}, nil } -// addLastSample isa a helper method only used by c.add() and in other helper +// addLastSample is a helper method only used by c.add() and in other helper // methods called by c.add(). It simply sets the given sample as the last sample // in the heador and declares the chunk closed. In other words, addLastSample // adds the very last sample added to this chunk ever, while setLastSample sets From 5a1e909b5d8a63421556e98347d3cbeba4384d26 Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 00:33:22 +0300 Subject: [PATCH 03/19] Make TargetLabel in RelabelConfig a string --- config/config.go | 10 +++++-- config/testdata/relabel_target_label.good.yml | 0 relabel/relabel.go | 8 ++--- relabel/relabel_test.go | 30 +++++++++---------- 4 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 config/testdata/relabel_target_label.good.yml diff --git a/config/config.go b/config/config.go index ee37ae1265..8b13aeecf4 100644 --- a/config/config.go +++ b/config/config.go @@ -31,6 +31,7 @@ var ( patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) + relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$\{?[\w]+}?)+\w*)+$`) ) // Load parses the YAML input s into a Config. @@ -355,7 +356,6 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval } *c = *gc - return nil } @@ -986,7 +986,7 @@ type RelabelConfig struct { // Modulus to take of the hash of concatenated values from the source labels. Modulus uint64 `yaml:"modulus,omitempty"` // The label to which the resulting string is written in a replacement. - TargetLabel model.LabelName `yaml:"target_label,omitempty"` + TargetLabel string `yaml:"target_label,omitempty"` // Replacement is the regex replacement pattern to be used. Replacement string `yaml:"replacement,omitempty"` // Action is the action to be performed for the relabeling. @@ -1012,6 +1012,12 @@ func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) } + if c.Action == RelabelReplace && !relabelTarget.Match([]byte(c.TargetLabel)) { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } + if c.Action == RelabelHashMod && !model.LabelNameRE.Match([]byte(c.TargetLabel)) { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } return nil } diff --git a/config/testdata/relabel_target_label.good.yml b/config/testdata/relabel_target_label.good.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/relabel/relabel.go b/relabel/relabel.go index 6ce37296c3..ff59045965 100644 --- a/relabel/relabel.go +++ b/relabel/relabel.go @@ -61,20 +61,20 @@ func relabel(labels model.LabelSet, cfg *config.RelabelConfig) model.LabelSet { if indexes == nil { break } - target := model.LabelName(cfg.Regex.ExpandString([]byte{}, string(cfg.TargetLabel), val, indexes)) + target := model.LabelName(cfg.Regex.ExpandString([]byte{}, cfg.TargetLabel, val, indexes)) if !target.IsValid() { - delete(labels, cfg.TargetLabel) + delete(labels, model.LabelName(cfg.TargetLabel)) break } res := cfg.Regex.ExpandString([]byte{}, cfg.Replacement, val, indexes) if len(res) == 0 { - delete(labels, cfg.TargetLabel) + delete(labels, model.LabelName(cfg.TargetLabel)) break } labels[target] = model.LabelValue(res) case config.RelabelHashMod: mod := sum64(md5.Sum([]byte(val))) % cfg.Modulus - labels[cfg.TargetLabel] = model.LabelValue(fmt.Sprintf("%d", mod)) + labels[model.LabelName(cfg.TargetLabel)] = model.LabelValue(fmt.Sprintf("%d", mod)) case config.RelabelLabelMap: out := make(model.LabelSet, len(labels)) // Take a copy to avoid infinite loops. diff --git a/relabel/relabel_test.go b/relabel/relabel_test.go index 4004fa96ff..28fa6e901b 100644 --- a/relabel/relabel_test.go +++ b/relabel/relabel_test.go @@ -38,7 +38,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "ch${1}-ch${1}", Action: config.RelabelReplace, @@ -61,7 +61,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a", "b"}, Regex: config.MustNewRegexp("f(.*);(.*)r"), - TargetLabel: model.LabelName("a"), + TargetLabel: "a", Separator: ";", Replacement: "b${1}${2}m", // boobam Action: config.RelabelReplace, @@ -69,7 +69,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"c", "a"}, Regex: config.MustNewRegexp("(b).*b(.*)ba(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "$1$2$2$3", Action: config.RelabelReplace, @@ -94,7 +94,7 @@ func TestRelabel(t *testing.T) { }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f(.*)"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "ch$1-ch$1", Action: config.RelabelReplace, @@ -124,7 +124,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp(".*(b).*"), - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Replacement: "$1", Action: config.RelabelReplace, @@ -202,7 +202,7 @@ func TestRelabel(t *testing.T) { { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("f"), - TargetLabel: model.LabelName("b"), + TargetLabel: "b", Replacement: "bar", Action: config.RelabelReplace, }, @@ -220,7 +220,7 @@ func TestRelabel(t *testing.T) { relabel: []*config.RelabelConfig{ { SourceLabels: model.LabelNames{"c"}, - TargetLabel: model.LabelName("d"), + TargetLabel: "d", Separator: ";", Action: config.RelabelHashMod, Modulus: 1000, @@ -287,7 +287,7 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${2}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ @@ -305,7 +305,7 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${3}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ @@ -322,21 +322,21 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("${3}"), + TargetLabel: "${3}", }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("0${3}"), + TargetLabel: "0${3}", }, { SourceLabels: model.LabelNames{"a"}, Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("-${3}"), + TargetLabel: "-${3}", }, }, output: model.LabelSet{ @@ -353,21 +353,21 @@ func TestRelabel(t *testing.T) { Regex: config.MustNewRegexp("(?:.+,|^)path:(/[^,]+).*"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("__metrics_path__"), + TargetLabel: "__metrics_path__", }, { SourceLabels: model.LabelNames{"__meta_sd_tags"}, Regex: config.MustNewRegexp("(?:.+,|^)job:([^,]+).*"), Action: config.RelabelReplace, Replacement: "${1}", - TargetLabel: model.LabelName("job"), + TargetLabel: "job", }, { SourceLabels: model.LabelNames{"__meta_sd_tags"}, Regex: config.MustNewRegexp("(?:.+,|^)label:([^=]+)=([^,]+).*"), Action: config.RelabelReplace, Replacement: "${2}", - TargetLabel: model.LabelName("${1}"), + TargetLabel: "${1}", }, }, output: model.LabelSet{ From 7a36af1c852777f265e125459639318f61717196 Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 00:42:49 +0300 Subject: [PATCH 04/19] add comment about interpolation --- config/config.go | 1 + 1 file changed, 1 insertion(+) diff --git a/config/config.go b/config/config.go index 5770f50217..efd0b1f584 100644 --- a/config/config.go +++ b/config/config.go @@ -988,6 +988,7 @@ type RelabelConfig struct { // Modulus to take of the hash of concatenated values from the source labels. Modulus uint64 `yaml:"modulus,omitempty"` // The label to which the resulting string is written in a replacement. + // regex interpolation is allowed for the replace action. TargetLabel string `yaml:"target_label,omitempty"` // Replacement is the regex replacement pattern to be used. Replacement string `yaml:"replacement,omitempty"` From 56907ba6e33646d6db9d2b76fbbf229879d8e4b4 Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 01:19:19 +0300 Subject: [PATCH 05/19] Add interpolation to good test config. Fix regex --- config/config.go | 2 +- config/config_test.go | 11 +++++++++++ config/testdata/conf.good.yml | 7 +++++++ config/testdata/relabel_target_label.good.yml | 0 4 files changed, 19 insertions(+), 1 deletion(-) delete mode 100644 config/testdata/relabel_target_label.good.yml diff --git a/config/config.go b/config/config.go index efd0b1f584..7b63c40617 100644 --- a/config/config.go +++ b/config/config.go @@ -31,7 +31,7 @@ var ( patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) - relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$\{?[\w]+}?)+\w*)+$`) + relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$\{?[\w]+\}?)+\w*)+$`) ) // Load parses the YAML input s into a Config. diff --git a/config/config_test.go b/config/config_test.go index 38d885f335..a8cff5ddf2 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -207,6 +207,17 @@ var expectedConf = &Config{ Scheme: DefaultConsulSDConfig.Scheme, }, }, + + RelabelConfigs: []*RelabelConfig{ + { + SourceLabels: model.LabelNames{"__meta_sd_consul_tags"}, + Regex: MustNewRegexp("label:([^=]+)=([^,]+)"), + Separator: ",", + TargetLabel: "${1}", + Replacement: "${2}", + Action: RelabelReplace, + }, + }, }, { JobName: "service-z", diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index a18deeddbb..ab38a56fac 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -104,6 +104,13 @@ scrape_configs: - server: 'localhost:1234' services: ['nginx', 'cache', 'mysql'] + relabel_configs: + - source_labels: [__meta_sd_consul_tags] + separator: ',' + regex: label:([^=]+)=([^,]+) + target_label: ${1} + replacement: ${2} + - job_name: service-z tls_config: diff --git a/config/testdata/relabel_target_label.good.yml b/config/testdata/relabel_target_label.good.yml deleted file mode 100644 index e69de29bb2..0000000000 From 163d5a897707066add272e57b0b55c4b862d6d12 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Wed, 19 Oct 2016 11:20:00 +0200 Subject: [PATCH 06/19] Add EC2 SD metrics (#2095) * Add EC2 SD metrics * Address review comments --- retrieval/discovery/ec2.go | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/retrieval/discovery/ec2.go b/retrieval/discovery/ec2.go index 1c376b5e3d..4050f6f419 100644 --- a/retrieval/discovery/ec2.go +++ b/retrieval/discovery/ec2.go @@ -22,6 +22,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/defaults" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -45,6 +46,26 @@ const ( subnetSeparator = "," ) +var ( + ec2SDScrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "ec2_sd_scape_failures_total", + Help: "The number of EC2-SD scrape failures.", + }) + ec2SDScrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "ec2_sd_scrape_duration_seconds", + Help: "The duration of a EC2-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(ec2SDScrapeFailuresCount) + prometheus.MustRegister(ec2SDScrapeDuration) +} + // EC2Discovery periodically performs EC2-SD requests. It implements // the TargetProvider interface. type EC2Discovery struct { @@ -99,12 +120,20 @@ func (ed *EC2Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup } } -func (ed *EC2Discovery) refresh() (*config.TargetGroup, error) { +func (ed *EC2Discovery) refresh() (tg *config.TargetGroup, err error) { + t0 := time.Now() + defer func() { + ec2SDScrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + ec2SDScrapeFailuresCount.Inc() + } + }() + ec2s := ec2.New(ed.aws) - tg := &config.TargetGroup{ + tg = &config.TargetGroup{ Source: *ed.aws.Region, } - if err := ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { + if err = ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { for _, r := range p.Reservations { for _, inst := range r.Instances { if inst.PrivateIpAddress == nil { From f867c1fd58c6ef0d8061d71089193de7b9598c9f Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 13:31:55 +0300 Subject: [PATCH 07/19] formating and text fixes, adjust regexp --- config/config.go | 6 +++--- config/testdata/conf.good.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/config.go b/config/config.go index 7b63c40617..2a99187121 100644 --- a/config/config.go +++ b/config/config.go @@ -31,7 +31,7 @@ var ( patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) - relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$\{?[\w]+\}?)+\w*)+$`) + relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`) ) // Load parses the YAML input s into a Config. @@ -987,8 +987,8 @@ type RelabelConfig struct { Regex Regexp `yaml:"regex,omitempty"` // Modulus to take of the hash of concatenated values from the source labels. Modulus uint64 `yaml:"modulus,omitempty"` - // The label to which the resulting string is written in a replacement. - // regex interpolation is allowed for the replace action. + // TargetLabel is the label to which the resulting string is written in a replacement. + // Regexp interpolation is allowed for the replace action. TargetLabel string `yaml:"target_label,omitempty"` // Replacement is the regex replacement pattern to be used. Replacement string `yaml:"replacement,omitempty"` diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index ab38a56fac..a075a3dfe9 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -106,7 +106,7 @@ scrape_configs: relabel_configs: - source_labels: [__meta_sd_consul_tags] - separator: ',' + separator: ',' regex: label:([^=]+)=([^,]+) target_label: ${1} replacement: ${2} From ec6524ce747f2424daae04a7f46ed98461d24bb2 Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 13:32:42 +0300 Subject: [PATCH 08/19] test the labelTarget regex to make sure it properly validates pre-interpolated label names. --- config/config_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/config/config_test.go b/config/config_test.go index a8cff5ddf2..4bbb3e9b43 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -512,6 +512,34 @@ func TestEmptyGlobalBlock(t *testing.T) { } } +func TestTargetLabelValidity(t *testing.T) { + tests := []struct { + str string + valid bool + }{ + {"-label", false}, + {"label", true}, + {"label${1}", true}, + {"${1}label", true}, + {"${1}", true}, + {"${1}label", true}, + {"${", false}, + {"$", false}, + {"${}", false}, + {"foo${", false}, + {"$1", true}, + {"asd$2asd", true}, + {"-foo${1}bar-", false}, + {"_${1}_", true}, + {"foo${bar}foo", true}, + } + for _, test := range tests { + if relabelTarget.Match([]byte(test.str)) != test.valid { + t.Fatalf("Expected %q to be %v", test.str, test.valid) + } + } +} + func kubernetesSDHostURL() URL { tURL, _ := url.Parse("https://localhost:1234") return URL{URL: tURL} From aabf4a419bfbc95112b437b7ba0be426c83d8a09 Mon Sep 17 00:00:00 2001 From: Matti Savolainen Date: Wed, 19 Oct 2016 16:30:52 +0300 Subject: [PATCH 09/19] use LabelNam.IsValid() instead of LabelNameRE and MatchString instead of Match --- config/config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.go b/config/config.go index 2a99187121..61775f6e7a 100644 --- a/config/config.go +++ b/config/config.go @@ -1015,10 +1015,10 @@ func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) } - if c.Action == RelabelReplace && !relabelTarget.Match([]byte(c.TargetLabel)) { + if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) { return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) } - if c.Action == RelabelHashMod && !model.LabelNameRE.Match([]byte(c.TargetLabel)) { + if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() { return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) } return nil From c4b4a58e3ac3ffd0b579ad8d9086431f0a2e1278 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 19 Oct 2016 18:38:26 +0100 Subject: [PATCH 10/19] Correctly handle on() in alerts. (#2096) Fixes #2082 --- promql/printer.go | 2 +- promql/printer_test.go | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/promql/printer.go b/promql/printer.go index 21678a7d52..79141e06ef 100644 --- a/promql/printer.go +++ b/promql/printer.go @@ -163,7 +163,7 @@ func (node *BinaryExpr) String() string { matching := "" vm := node.VectorMatching - if vm != nil && len(vm.MatchingLabels) > 0 { + if vm != nil && (len(vm.MatchingLabels) > 0 || vm.On) { if vm.On { matching = fmt.Sprintf(" ON(%s)", vm.MatchingLabels) } else { diff --git a/promql/printer_test.go b/promql/printer_test.go index 52e53245bc..1715b54fcc 100644 --- a/promql/printer_test.go +++ b/promql/printer_test.go @@ -59,6 +59,10 @@ func TestExprString(t *testing.T) { inputs := []struct { in, out string }{ + { + in: `sum(task:errors:rate10s{job="s"}) BY ()`, + out: `sum(task:errors:rate10s{job="s"})`, + }, { in: `sum(task:errors:rate10s{job="s"}) BY (code)`, }, @@ -77,6 +81,9 @@ func TestExprString(t *testing.T) { { in: `count_values("value", task:errors:rate10s{job="s"})`, }, + { + in: `a - ON() c`, + }, { in: `a - ON(b) c`, }, @@ -92,6 +99,10 @@ func TestExprString(t *testing.T) { { in: `a - IGNORING(b) c`, }, + { + in: `a - IGNORING() c`, + out: `a - c`, + }, { in: `up > BOOL 0`, }, From 9eefce6811b4137f322869db565d71bb69d92257 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 19 Oct 2016 21:39:19 +0200 Subject: [PATCH 11/19] Add CNCF code of conduct as the Prometheus code of conduct --- code-of-conduct.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 code-of-conduct.md diff --git a/code-of-conduct.md b/code-of-conduct.md new file mode 100644 index 0000000000..9a1aff4127 --- /dev/null +++ b/code-of-conduct.md @@ -0,0 +1,3 @@ +## Prometheus Community Code of Conduct + +Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). From 00e486a05b2e8ac4b73c4fc99e317a12eeab8693 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Thu, 20 Oct 2016 09:23:50 +0200 Subject: [PATCH 12/19] Add Azure-SD metrics (#2099) --- retrieval/discovery/azure.go | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/retrieval/discovery/azure.go b/retrieval/discovery/azure.go index 0f1334eb8f..0ffae172a2 100644 --- a/retrieval/discovery/azure.go +++ b/retrieval/discovery/azure.go @@ -23,6 +23,7 @@ import ( "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/go-autorest/autorest/azure" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -41,6 +42,26 @@ const ( azureLabelMachineTag = azureLabel + "machine_tag_" ) +var ( + azureSDScrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "azure_sd_scrape_failures_total", + Help: "Number of Azure-SD scrape failures.", + }) + azureSDScrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "azure_sd_scrape_duration_seconds", + Help: "The duration of a Azure-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(azureSDScrapeDuration) + prometheus.MustRegister(azureSDScrapeFailuresCount) +} + // AzureDiscovery periodically performs Azure-SD requests. It implements // the TargetProvider interface. type AzureDiscovery struct { @@ -135,8 +156,15 @@ func newAzureResourceFromID(id string) (azureResource, error) { }, nil } -func (ad *AzureDiscovery) refresh() (*config.TargetGroup, error) { - tg := &config.TargetGroup{} +func (ad *AzureDiscovery) refresh() (tg *config.TargetGroup, err error) { + t0 := time.Now() + defer func() { + azureSDScrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + azureSDScrapeFailuresCount.Inc() + } + }() + tg = &config.TargetGroup{} client, err := createAzureClient(*ad.cfg) if err != nil { return tg, fmt.Errorf("could not create Azure client: %s", err) From 255a8c8b4cdcb2b55b9b785bd41cfb7169075f85 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Thu, 20 Oct 2016 10:01:00 +0200 Subject: [PATCH 13/19] Fix small typo in EC2 SD metric name (#2100) --- retrieval/discovery/ec2.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retrieval/discovery/ec2.go b/retrieval/discovery/ec2.go index 4050f6f419..68c1dd886d 100644 --- a/retrieval/discovery/ec2.go +++ b/retrieval/discovery/ec2.go @@ -50,7 +50,7 @@ var ( ec2SDScrapeFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "ec2_sd_scape_failures_total", + Name: "ec2_sd_scrape_failures_total", Help: "The number of EC2-SD scrape failures.", }) ec2SDScrapeDuration = prometheus.NewSummary( From 26a0fa9f98508b494ceeb1c8a33af2f740f7e8ce Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Tue, 18 Oct 2016 15:12:32 +0200 Subject: [PATCH 14/19] Cut v1.3.0-beta.0 --- CHANGELOG.md | 13 +++++++++++++ VERSION | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e399132cf..beeca084ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## 1.3.0-beta.0 / 2016-10-18 + +This is a breaking change to the Kubernetes service discovery. + +* [CHANGE] Rework Kubernetes SD. +* [FEATURE] Add support for interpolating `target_label`. +* [FEATURE] Add GCE metadata as Prometheus meta labels. +* [ENHANCEMENT] Add EC2 SD metrics. +* [ENHANCEMENT] Add Azure SD metrics. +* [ENHANCEMENT] Add fuzzy search to `/graph` textarea. +* [ENHANCEMENT] Always show instance labels on target page. +* [BUGFIX] Correctly handle on() in alerts. + ## 1.2.1 / 2016-10-10 * [BUGFIX] Count chunk evictions properly so that the server doesn't diff --git a/VERSION b/VERSION index 6085e94650..6989533d78 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.1 +1.3.0-beta.0 From 0c692276160a1a345ad837a2834dbe188dbc3ca7 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Fri, 21 Oct 2016 09:59:43 +0200 Subject: [PATCH 15/19] Add Consul-SD metrics (#2097) * Add Consul-SD metrics * Remove unnecessary metric and add labels to summary. * Do not stutter --- retrieval/discovery/consul/consul.go | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/retrieval/discovery/consul/consul.go b/retrieval/discovery/consul/consul.go index 75d81704b6..58b2da806c 100644 --- a/retrieval/discovery/consul/consul.go +++ b/retrieval/discovery/consul/consul.go @@ -21,6 +21,7 @@ import ( "time" consul "github.com/hashicorp/consul/api" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -48,8 +49,37 @@ const ( datacenterLabel = model.MetaLabelPrefix + "consul_dc" // serviceIDLabel is the name of the label containing the service ID. serviceIDLabel = model.MetaLabelPrefix + "consul_service_id" + + // Constants for instrumentation. + namespace = "prometheus" ) +var ( + rpcFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_failures_total", + Help: "The number of Consul RPC call failures.", + }) + rpcDuration = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_duration_seconds", + Help: "The duration of a Consul RPC call in seconds.", + }, + []string{"endpoint", "call"}, + ) +) + +func init() { + prometheus.MustRegister(rpcFailuresCount) + prometheus.MustRegister(rpcDuration) + + // Initialize metric vectors. + rpcDuration.WithLabelValues("catalog", "service") + rpcDuration.WithLabelValues("catalog", "services") +} + // Discovery retrieves target information from a Consul server // and updates them via watches. type Discovery struct { @@ -110,10 +140,12 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { var lastIndex uint64 for { catalog := cd.client.Catalog() + t0 := time.Now() srvs, meta, err := catalog.Services(&consul.QueryOptions{ WaitIndex: lastIndex, WaitTime: watchTimeout, }) + rpcDuration.WithLabelValues("catalog", "services").Observe(time.Since(t0).Seconds()) // We have to check the context at least once. The checks during channel sends // do not guarantee that. @@ -125,6 +157,7 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { if err != nil { log.Errorf("Error refreshing service list: %s", err) + rpcFailuresCount.Inc() time.Sleep(retryInterval) continue } @@ -202,10 +235,13 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG lastIndex := uint64(0) for { + t0 := time.Now() nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{ WaitIndex: lastIndex, WaitTime: watchTimeout, }) + rpcDuration.WithLabelValues("catalog", "service").Observe(time.Since(t0).Seconds()) + // Check the context before potentially falling in a continue-loop. select { case <-ctx.Done(): @@ -216,6 +252,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG if err != nil { log.Errorf("Error refreshing service %s: %s", srv.name, err) + rpcFailuresCount.Inc() time.Sleep(retryInterval) continue } From 552ab61fa1f1598408ca891d39ac957f1c61d9d4 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Fri, 21 Oct 2016 10:18:28 +0200 Subject: [PATCH 16/19] Change SD metric names to make logical grouping more visible. (#2102) --- retrieval/discovery/azure.go | 4 ++-- retrieval/discovery/dns/dns.go | 4 ++-- retrieval/discovery/ec2.go | 4 ++-- retrieval/discovery/gce.go | 12 ++---------- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/retrieval/discovery/azure.go b/retrieval/discovery/azure.go index 0ffae172a2..a35f489b2e 100644 --- a/retrieval/discovery/azure.go +++ b/retrieval/discovery/azure.go @@ -46,13 +46,13 @@ var ( azureSDScrapeFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "azure_sd_scrape_failures_total", + Name: "sd_azure_scrape_failures_total", Help: "Number of Azure-SD scrape failures.", }) azureSDScrapeDuration = prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, - Name: "azure_sd_scrape_duration_seconds", + Name: "sd_azure_scrape_duration_seconds", Help: "The duration of a Azure-SD scrape in seconds.", }) ) diff --git a/retrieval/discovery/dns/dns.go b/retrieval/discovery/dns/dns.go index 960507a0c3..0869354ba5 100644 --- a/retrieval/discovery/dns/dns.go +++ b/retrieval/discovery/dns/dns.go @@ -42,13 +42,13 @@ var ( dnsSDLookupsCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "dns_sd_lookups_total", + Name: "sd_dns_lookups_total", Help: "The number of DNS-SD lookups.", }) dnsSDLookupFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "dns_sd_lookup_failures_total", + Name: "sd_dns_lookup_failures_total", Help: "The number of DNS-SD lookup failures.", }) ) diff --git a/retrieval/discovery/ec2.go b/retrieval/discovery/ec2.go index 68c1dd886d..63635e1f23 100644 --- a/retrieval/discovery/ec2.go +++ b/retrieval/discovery/ec2.go @@ -50,13 +50,13 @@ var ( ec2SDScrapeFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "ec2_sd_scrape_failures_total", + Name: "sd_ec2_scrape_failures_total", Help: "The number of EC2-SD scrape failures.", }) ec2SDScrapeDuration = prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, - Name: "ec2_sd_scrape_duration_seconds", + Name: "sd_ec2_scrape_duration_seconds", Help: "The duration of a EC2-SD scrape in seconds.", }) ) diff --git a/retrieval/discovery/gce.go b/retrieval/discovery/gce.go index 16359ae316..23d19d1421 100644 --- a/retrieval/discovery/gce.go +++ b/retrieval/discovery/gce.go @@ -50,28 +50,21 @@ const ( ) var ( - gceSDScrapesCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "gce_sd_scrapes_total", - Help: "The number of GCE-SD scrapes.", - }) gceSDScrapeFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, - Name: "gce_sd_scrape_failures_total", + Name: "sd_gce_scrape_failures_total", Help: "The number of GCE-SD scrape failures.", }) gceSDScrapeDuration = prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, - Name: "gce_sd_scrape_duration", + Name: "sd_gce_scrape_duration", Help: "The duration of a GCE-SD scrape in seconds.", }) ) func init() { - prometheus.MustRegister(gceSDScrapesCount) prometheus.MustRegister(gceSDScrapeFailuresCount) prometheus.MustRegister(gceSDScrapeDuration) } @@ -147,7 +140,6 @@ func (gd *GCEDiscovery) refresh() (tg *config.TargetGroup, err error) { t0 := time.Now() defer func() { gceSDScrapeDuration.Observe(time.Since(t0).Seconds()) - gceSDScrapesCount.Inc() if err != nil { gceSDScrapeFailuresCount.Inc() } From 36de16390027b6615bc6855e8baf6811a763baee Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Fri, 21 Oct 2016 12:12:19 +0200 Subject: [PATCH 17/19] Add File-SD metrics (#2103) * Add File-SD metrics * Count read errors, not scan errors. --- retrieval/discovery/file.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/retrieval/discovery/file.go b/retrieval/discovery/file.go index 40034944ad..ba7b9be916 100644 --- a/retrieval/discovery/file.go +++ b/retrieval/discovery/file.go @@ -21,6 +21,7 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "golang.org/x/net/context" @@ -32,6 +33,26 @@ import ( const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath" +var ( + fileSDScanDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_file_scan_duration_seconds", + Help: "The duration of the File-SD scan in seconds.", + }) + fileSDReadErrorsCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_file_read_errors_total", + Help: "The number of File-SD read errors.", + }) +) + +func init() { + prometheus.MustRegister(fileSDScanDuration) + prometheus.MustRegister(fileSDReadErrorsCount) +} + // FileDiscovery provides service discovery functionality based // on files that contain target groups in JSON or YAML format. Refreshing // happens using file watches and periodic refreshes. @@ -173,10 +194,16 @@ func (fd *FileDiscovery) stop() { // refresh reads all files matching the discovery's patterns and sends the respective // updated target groups through the channel. func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) { + t0 := time.Now() + defer func() { + fileSDScanDuration.Observe(time.Since(t0).Seconds()) + }() + ref := map[string]int{} for _, p := range fd.listFiles() { tgroups, err := readFile(p) if err != nil { + fileSDReadErrorsCount.Inc() log.Errorf("Error reading file %q: %s", p, err) // Prevent deletion down below. ref[p] = fd.lastRefresh[p] From 296644adeb9a73b1439ee7858a26608668ca3bd6 Mon Sep 17 00:00:00 2001 From: Mitsuhiro Tanda Date: Fri, 21 Oct 2016 19:13:47 +0900 Subject: [PATCH 18/19] Expose ec2_instance_type (#2107) --- retrieval/discovery/ec2.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/retrieval/discovery/ec2.go b/retrieval/discovery/ec2.go index 63635e1f23..0d4d0fc2f9 100644 --- a/retrieval/discovery/ec2.go +++ b/retrieval/discovery/ec2.go @@ -37,6 +37,7 @@ const ( ec2LabelAZ = ec2Label + "availability_zone" ec2LabelInstanceID = ec2Label + "instance_id" ec2LabelInstanceState = ec2Label + "instance_state" + ec2LabelInstanceType = ec2Label + "instance_type" ec2LabelPublicDNS = ec2Label + "public_dns_name" ec2LabelPublicIP = ec2Label + "public_ip" ec2LabelPrivateIP = ec2Label + "private_ip" @@ -153,6 +154,7 @@ func (ed *EC2Discovery) refresh() (tg *config.TargetGroup, err error) { labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name) + labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType) if inst.VpcId != nil { labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId) From f002fe186abd04ecd9cd8961c24bc039f134ea13 Mon Sep 17 00:00:00 2001 From: Dominik Schulz Date: Fri, 21 Oct 2016 12:14:53 +0200 Subject: [PATCH 19/19] Add Marathon-SD metrics. (#2106) --- retrieval/discovery/marathon/marathon.go | 34 +++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/retrieval/discovery/marathon/marathon.go b/retrieval/discovery/marathon/marathon.go index d6bf7b4449..a5961516d8 100644 --- a/retrieval/discovery/marathon/marathon.go +++ b/retrieval/discovery/marathon/marathon.go @@ -24,6 +24,7 @@ import ( "golang.org/x/net/context" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" @@ -41,8 +42,31 @@ const ( imageLabel model.LabelName = metaLabelPrefix + "image" // taskLabel contains the mesos task name of the app instance. taskLabel model.LabelName = metaLabelPrefix + "task" + + // Constants for instrumentation. + namespace = "prometheus" ) +var ( + scrapeFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_marathon_scrape_failures_total", + Help: "The number of Marathon-SD scrape failures.", + }) + scrapeDuration = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_marathon_scrape_duration_seconds", + Help: "The duration of a Marathon-SD scrape in seconds.", + }) +) + +func init() { + prometheus.MustRegister(scrapeFailuresCount) + prometheus.MustRegister(scrapeDuration) +} + const appListPath string = "/v2/apps/?embed=apps.tasks" // Discovery provides service discovery based on a Marathon instance. @@ -70,7 +94,15 @@ func (md *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { } } -func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) error { +func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) (err error) { + t0 := time.Now() + defer func() { + scrapeDuration.Observe(time.Since(t0).Seconds()) + if err != nil { + scrapeFailuresCount.Inc() + } + }() + targetMap, err := md.fetchTargetGroups() if err != nil { return err