From 96bdd68589bc7aaa3b3766498f81527bc37904a2 Mon Sep 17 00:00:00 2001 From: bwplotka Date: Thu, 15 Jan 2026 10:01:28 +0000 Subject: [PATCH] refactor(appenderV2): add AppendableV2 support for scrape Signed-off-by: bwplotka --- scrape/helpers_test.go | 13 +- scrape/manager.go | 5 +- scrape/scrape.go | 63 +- scrape/scrape_append_v2.go | 2022 ++----------------------------- scrape/scrape_append_v2_test.go | 1798 ++++----------------------- scrape/scrape_test.go | 52 +- scrape/target.go | 99 ++ util/teststorage/appender.go | 12 +- 8 files changed, 507 insertions(+), 3557 deletions(-) diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index dd5179b360..cd5eabb3af 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -50,13 +50,19 @@ func withAppendable(appendable storage.Appendable) func(sl *scrapeLoop) { } } +func withAppendableV2(appendableV2 storage.AppendableV2) func(sl *scrapeLoop) { + return func(sl *scrapeLoop) { + sl.appendableV2 = appendableV2 + } +} + // newTestScrapeLoop is the initial scrape loop for all tests. // It returns scrapeLoop and mock scraper you can customize. // // It's recommended to use withXYZ functions for simple option customizations, e.g: // // appTest := teststorage.NewAppendable() -// sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) +// sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) // // However, when changing more than one scrapeLoop options it's more readable to have one explicit opt function: // @@ -64,7 +70,7 @@ func withAppendable(appendable storage.Appendable) func(sl *scrapeLoop) { // appTest := teststorage.NewAppendable() // sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { // sl.ctx = ctx -// sl.appendable = appTest +// sl.appendableV2 = appTest // // Since we're writing samples directly below we need to provide a protocol fallback. // sl.fallbackScrapeProtocol = "text/plain" // }) @@ -84,8 +90,6 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo timeout: 1 * time.Hour, sampleMutator: nopMutator, reportSampleMutator: nopMutator, - - appendable: teststorage.NewAppendable(), buffers: pool.New(1e3, 1e6, 3, func(sz int) any { return make([]byte, 0, sz) }), metrics: metrics, maxSchema: histogram.ExponentialSchemaMax, @@ -98,6 +102,7 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo for _, o := range opts { o(sl) } + // Validate user opts for convenience. require.Nil(t, sl.parentCtx, "newTestScrapeLoop does not support injecting non-nil parent context") require.Nil(t, sl.appenderCtx, "newTestScrapeLoop does not support injecting non-nil appender context") diff --git a/scrape/manager.go b/scrape/manager.go index a2297aa824..ef226ad507 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -114,7 +114,8 @@ type Manager struct { opts *Options logger *slog.Logger - appendable storage.Appendable + appendable storage.Appendable + appendableV2 storage.AppendableV2 graceShut chan struct{} @@ -196,7 +197,7 @@ func (m *Manager) reload() { continue } m.metrics.targetScrapePools.Inc() - sp, err := newScrapePool(scrapeConfig, m.appendable, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) + sp, err := newScrapePool(scrapeConfig, m.appendable, m.appendableV2, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) if err != nil { m.metrics.targetScrapePoolsFailed.Inc() m.logger.Error("error creating new scrape pool", "err", err, "scrape_pool", setName) diff --git a/scrape/scrape.go b/scrape/scrape.go index 58df858b3d..2d298c2ac5 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -82,11 +82,12 @@ type FailureLogger interface { // scrapePool manages scrapes for sets of targets. type scrapePool struct { - appendable storage.Appendable - logger *slog.Logger - ctx context.Context - cancel context.CancelFunc - options *Options + appendable storage.Appendable + appendableV2 storage.AppendableV2 + logger *slog.Logger + ctx context.Context + cancel context.CancelFunc + options *Options // mtx must not be taken after targetMtx. mtx sync.Mutex @@ -139,6 +140,7 @@ type scrapeLoopAppendAdapter interface { func newScrapePool( cfg *config.ScrapeConfig, appendable storage.Appendable, + appendableV2 storage.AppendableV2, offsetSeed uint64, logger *slog.Logger, buffers *pool.Pool, @@ -171,6 +173,7 @@ func newScrapePool( ctx, cancel := context.WithCancel(context.Background()) sp := &scrapePool{ appendable: appendable, + appendableV2: appendableV2, logger: logger, ctx: ctx, cancel: cancel, @@ -842,11 +845,12 @@ type scrapeLoop struct { scraper scraper // Static params per scrapePool. - appendable storage.Appendable - buffers *pool.Pool - offsetSeed uint64 - symbolTable *labels.SymbolTable - metrics *scrapeMetrics + appendable storage.Appendable + appendableV2 storage.AppendableV2 + buffers *pool.Pool + offsetSeed uint64 + symbolTable *labels.SymbolTable + metrics *scrapeMetrics // Options from config.ScrapeConfig. sampleLimit int @@ -1190,11 +1194,12 @@ func newScrapeLoop(opts scrapeLoopOptions) *scrapeLoop { scraper: opts.scraper, // Static params per scrapePool. - appendable: opts.sp.appendable, - buffers: opts.sp.buffers, - offsetSeed: opts.sp.offsetSeed, - symbolTable: opts.sp.symbolTable, - metrics: opts.sp.metrics, + appendable: opts.sp.appendable, + appendableV2: opts.sp.appendableV2, + buffers: opts.sp.buffers, + offsetSeed: opts.sp.offsetSeed, + symbolTable: opts.sp.symbolTable, + metrics: opts.sp.metrics, // config.ScrapeConfig. sampleLimit: int(opts.sp.config.SampleLimit), @@ -1303,7 +1308,9 @@ mainLoop: } func (sl *scrapeLoop) appender() scrapeLoopAppendAdapter { - // NOTE(bwplotka): Add AppenderV2 implementation, see https://github.com/prometheus/prometheus/issues/17632. + if sl.appendableV2 != nil { + return &scrapeLoopAppenderV2{scrapeLoop: sl, AppenderV2: sl.appendableV2.AppenderV2(sl.appenderCtx)} + } return &scrapeLoopAppender{scrapeLoop: sl, Appender: sl.appendable.Appender(sl.appenderCtx)} } @@ -1637,7 +1644,7 @@ loop: break } switch et { - // TODO(bwplotka): Consider changing parser to give metadata at once instead of type, help and unit in separation, ideally on `Series()/Histogram() + // TODO(bwplotka): Consider changing parser to give metadata at once instead of type, help and unit in separation, ideally on `Series()/Histogram()` // otherwise we can expose metadata without series on metadata API. case textparse.EntryType: // TODO(bwplotka): Build meta entry directly instead of locking and updating the map. This will @@ -1753,7 +1760,7 @@ loop: } } - sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) + sampleAdded, err = sl.checkAddError(met, nil, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if !errors.Is(err, storage.ErrNotFound) { sl.l.Debug("Unexpected error", "series", string(met), "err", err) @@ -1942,7 +1949,8 @@ func isSeriesPartOfFamily(mName string, mfName []byte, typ model.MetricType) boo // during normal operation (e.g., accidental cardinality explosion, sudden traffic spikes). // Current case ordering prevents exercising other cases when limits are exceeded. // Remaining error cases typically occur only a few times, often during initial setup. -func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (sampleAdded bool, _ error) { +func (sl *scrapeLoop) checkAddError(met []byte, exemplars []exemplar.Exemplar, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (sampleAdded bool, _ error) { + var pErr *storage.AppendPartialError switch { case err == nil: return true, nil @@ -1973,6 +1981,23 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke return false, nil case errors.Is(err, storage.ErrNotFound): return false, storage.ErrNotFound + case errors.As(err, &pErr): + outOfOrderExemplars := 0 + for _, e := range pErr.ExemplarErrors { + if errors.Is(e, storage.ErrOutOfOrderExemplar) { + outOfOrderExemplars++ + } + // Since exemplar storage is still experimental, we don't fail or check other errors. + // Debug log is emitted in TSDB already. + } + if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { + // Only report out of order exemplars if all are out of order, otherwise this was a partial update + // to some existing set of exemplars. + appErrs.numExemplarOutOfOrder += outOfOrderExemplars + sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) + sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) + } + return true, nil default: return false, err } diff --git a/scrape/scrape_append_v2.go b/scrape/scrape_append_v2.go index 58df858b3d..8b33b2397c 100644 --- a/scrape/scrape_append_v2.go +++ b/scrape/scrape_append_v2.go @@ -14,1534 +14,60 @@ package scrape import ( - "bufio" - "bytes" - "context" "errors" "fmt" "io" - "log/slog" "math" - "net/http" - "net/http/httptrace" - "reflect" "slices" - "strconv" - "strings" - "sync" "time" - "unsafe" - "github.com/klauspost/compress/gzip" - config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/promslog" - "github.com/prometheus/common/version" - "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" - "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/trace" - "go.uber.org/atomic" - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/model/relabel" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/util/logging" - "github.com/prometheus/prometheus/util/namevalidationutil" - "github.com/prometheus/prometheus/util/pool" ) -var aOptionRejectEarlyOOO = storage.AppendOptions{DiscardOutOfOrder: true} - -// ScrapeTimestampTolerance is the tolerance for scrape appends timestamps -// alignment, to enable better compression at the TSDB level. -// See https://github.com/prometheus/prometheus/issues/7846 -var ScrapeTimestampTolerance = 2 * time.Millisecond - -// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above. -var AlignScrapeTimestamps = true - -var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", model.MetricNameLabel) - -var _ FailureLogger = (*logging.JSONFileLogger)(nil) - -// FailureLogger is an interface that can be used to log all failed -// scrapes. -type FailureLogger interface { - slog.Handler - io.Closer -} - -// scrapePool manages scrapes for sets of targets. -type scrapePool struct { - appendable storage.Appendable - logger *slog.Logger - ctx context.Context - cancel context.CancelFunc - options *Options - - // mtx must not be taken after targetMtx. - mtx sync.Mutex - config *config.ScrapeConfig - client *http.Client - loops map[uint64]loop - - symbolTable *labels.SymbolTable - lastSymbolTableCheck time.Time - initialSymbolTableLen int - - targetMtx sync.Mutex - // activeTargets and loops must always be synchronized to have the same - // set of hashes. - activeTargets map[uint64]*Target - droppedTargets []*Target // Subject to KeepDroppedTargets limit. - droppedTargetsCount int // Count of all dropped targets. - - // newLoop injection for testing purposes. - injectTestNewLoop func(scrapeLoopOptions) loop - - metrics *scrapeMetrics - buffers *pool.Pool - offsetSeed uint64 - - scrapeFailureLogger FailureLogger - scrapeFailureLoggerMtx sync.RWMutex -} - -type labelLimits struct { - labelLimit int - labelNameLengthLimit int - labelValueLengthLimit int -} - -const maxAheadTime = 10 * time.Minute - -// returning an empty label set is interpreted as "drop". -type labelsMutator func(labels.Labels) labels.Labels - -// scrapeLoopAppendAdapter allows support for multiple storage.Appender versions. -type scrapeLoopAppendAdapter interface { - Commit() error - Rollback() error - - addReportSample(s reportSample, t int64, v float64, b *labels.Builder, rejectOOO bool) error - append(b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) -} - -func newScrapePool( - cfg *config.ScrapeConfig, - appendable storage.Appendable, - offsetSeed uint64, - logger *slog.Logger, - buffers *pool.Pool, - options *Options, - metrics *scrapeMetrics, -) (*scrapePool, error) { - if logger == nil { - logger = promslog.NewNopLogger() - } - if buffers == nil { - buffers = pool.New(1e3, 1e6, 3, func(sz int) any { return make([]byte, 0, sz) }) - } - - client, err := newScrapeClient(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...) - if err != nil { - return nil, err - } - - // Validate scheme so we don't need to do it later. - // We also do it on scrapePool.reload(...) - // TODO(bwplotka): Can we move it to scrape config validation? - if err := namevalidationutil.CheckNameValidationScheme(cfg.MetricNameValidationScheme); err != nil { - return nil, errors.New("newScrapePool: MetricNameValidationScheme must be set in scrape configuration") - } - if _, err = config.ToEscapingScheme(cfg.MetricNameEscapingScheme, cfg.MetricNameValidationScheme); err != nil { - return nil, fmt.Errorf("invalid metric name escaping scheme, %w", err) - } - - symbols := labels.NewSymbolTable() - ctx, cancel := context.WithCancel(context.Background()) - sp := &scrapePool{ - appendable: appendable, - logger: logger, - ctx: ctx, - cancel: cancel, - options: options, - config: cfg, - client: client, - loops: map[uint64]loop{}, - symbolTable: symbols, - lastSymbolTableCheck: time.Now(), - activeTargets: map[uint64]*Target{}, - metrics: metrics, - buffers: buffers, - offsetSeed: offsetSeed, - } - sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) - return sp, nil -} - -func (sp *scrapePool) newLoop(opts scrapeLoopOptions) loop { - if sp.injectTestNewLoop != nil { - return sp.injectTestNewLoop(opts) - } - return newScrapeLoop(opts) -} - -func (sp *scrapePool) ActiveTargets() []*Target { - sp.targetMtx.Lock() - defer sp.targetMtx.Unlock() - - var tActive []*Target - for _, t := range sp.activeTargets { - tActive = append(tActive, t) - } - return tActive -} - -// Return dropped targets, subject to KeepDroppedTargets limit. -func (sp *scrapePool) DroppedTargets() []*Target { - sp.targetMtx.Lock() - defer sp.targetMtx.Unlock() - return sp.droppedTargets -} - -func (sp *scrapePool) DroppedTargetsCount() int { - sp.targetMtx.Lock() - defer sp.targetMtx.Unlock() - return sp.droppedTargetsCount -} - -func (sp *scrapePool) SetScrapeFailureLogger(l FailureLogger) { - sp.scrapeFailureLoggerMtx.Lock() - defer sp.scrapeFailureLoggerMtx.Unlock() - if l != nil { - l = slog.New(l).With("job_name", sp.config.JobName).Handler().(FailureLogger) - } - sp.scrapeFailureLogger = l - - sp.targetMtx.Lock() - defer sp.targetMtx.Unlock() - for _, s := range sp.loops { - s.setScrapeFailureLogger(sp.scrapeFailureLogger) - } -} - -func (sp *scrapePool) getScrapeFailureLogger() FailureLogger { - sp.scrapeFailureLoggerMtx.RLock() - defer sp.scrapeFailureLoggerMtx.RUnlock() - return sp.scrapeFailureLogger -} - -// stop terminates all scrape loops and returns after they all terminated. -func (sp *scrapePool) stop() { - sp.mtx.Lock() - defer sp.mtx.Unlock() - sp.cancel() - var wg sync.WaitGroup - - sp.targetMtx.Lock() - - for fp, l := range sp.loops { - wg.Add(1) - - go func(l loop) { - l.stop() - wg.Done() - }(l) - - delete(sp.loops, fp) - delete(sp.activeTargets, fp) - } - - sp.targetMtx.Unlock() - - wg.Wait() - sp.client.CloseIdleConnections() - - if sp.config != nil { - sp.metrics.targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetScrapePoolSymbolTableItems.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetSyncIntervalLengthHistogram.DeleteLabelValues(sp.config.JobName) - sp.metrics.targetSyncFailed.DeleteLabelValues(sp.config.JobName) - } -} - -// reload the scrape pool with the given scrape configuration. The target state is preserved -// but all scrape loops are restarted with the new scrape configuration. -// This method returns after all scrape loops that were stopped have stopped scraping. -func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { - sp.mtx.Lock() - defer sp.mtx.Unlock() - sp.metrics.targetScrapePoolReloads.Inc() - start := time.Now() - - client, err := newScrapeClient(cfg.HTTPClientConfig, cfg.JobName, sp.options.HTTPClientOptions...) - if err != nil { - sp.metrics.targetScrapePoolReloadsFailed.Inc() - return err - } - - reuseCache := reusableCache(sp.config, cfg) - sp.config = cfg - oldClient := sp.client - sp.client = client - - // Validate scheme so we don't need to do it later. - if err := namevalidationutil.CheckNameValidationScheme(cfg.MetricNameValidationScheme); err != nil { - return errors.New("scrapePool.reload: MetricNameValidationScheme must be set in scrape configuration") - } - if _, err = config.ToEscapingScheme(cfg.MetricNameEscapingScheme, cfg.MetricNameValidationScheme); err != nil { - return fmt.Errorf("scrapePool.reload: invalid metric name escaping scheme, %w", err) - } - sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) - - sp.restartLoops(reuseCache) - oldClient.CloseIdleConnections() - sp.metrics.targetReloadIntervalLength.WithLabelValues(time.Duration(sp.config.ScrapeInterval).String()).Observe( - time.Since(start).Seconds(), - ) - return nil -} - -func (sp *scrapePool) restartLoops(reuseCache bool) { - var wg sync.WaitGroup - sp.targetMtx.Lock() - - forcedErr := sp.refreshTargetLimitErr() - for fp, oldLoop := range sp.loops { - var cache *scrapeCache - if oc := oldLoop.getCache(); reuseCache && oc != nil { - oldLoop.disableEndOfRunStalenessMarkers() - cache = oc - } else { - cache = newScrapeCache(sp.metrics) - } - - t := sp.activeTargets[fp] - targetInterval, targetTimeout, err := t.intervalAndTimeout( - time.Duration(sp.config.ScrapeInterval), - time.Duration(sp.config.ScrapeTimeout), - ) - escapingScheme, _ := config.ToEscapingScheme(sp.config.MetricNameEscapingScheme, sp.config.MetricNameValidationScheme) - newLoop := sp.newLoop(scrapeLoopOptions{ - target: t, - scraper: &targetScraper{ - Target: t, - client: sp.client, - timeout: targetTimeout, - bodySizeLimit: int64(sp.config.BodySizeLimit), - acceptHeader: acceptHeader(sp.config.ScrapeProtocols, escapingScheme), - acceptEncodingHeader: acceptEncodingHeader(sp.config.EnableCompression), - metrics: sp.metrics, - }, - cache: cache, - interval: targetInterval, - timeout: targetTimeout, - sp: sp, - }) - if err != nil { - newLoop.setForcedError(err) - } - wg.Add(1) - - go func(oldLoop, newLoop loop) { - oldLoop.stop() - wg.Done() - - newLoop.setForcedError(forcedErr) - newLoop.setScrapeFailureLogger(sp.getScrapeFailureLogger()) - newLoop.run(nil) - }(oldLoop, newLoop) - - sp.loops[fp] = newLoop - } - - sp.targetMtx.Unlock() - - wg.Wait() -} - -// Must be called with sp.mtx held. -func (sp *scrapePool) checkSymbolTable() { - // Here we take steps to clear out the symbol table if it has grown a lot. - // After waiting some time for things to settle, we take the size of the symbol-table. - // If, after some more time, the table has grown to twice that size, we start a new one. - const minTimeToCleanSymbolTable = 5 * time.Minute - if time.Since(sp.lastSymbolTableCheck) > minTimeToCleanSymbolTable { - if sp.initialSymbolTableLen == 0 { - sp.initialSymbolTableLen = sp.symbolTable.Len() - } else if sp.symbolTable.Len() > 2*sp.initialSymbolTableLen { - sp.symbolTable = labels.NewSymbolTable() - sp.initialSymbolTableLen = 0 - sp.restartLoops(false) // To drop all caches. - } - sp.lastSymbolTableCheck = time.Now() - } -} - -// Sync converts target groups into actual scrape targets and synchronizes -// the currently running scraper with the resulting set and returns all scraped and dropped targets. -func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { - sp.mtx.Lock() - defer sp.mtx.Unlock() - start := time.Now() - - sp.targetMtx.Lock() - var all []*Target - var targets []*Target - lb := labels.NewBuilderWithSymbolTable(sp.symbolTable) - sp.droppedTargets = []*Target{} - sp.droppedTargetsCount = 0 - for _, tg := range tgs { - targets, failures := TargetsFromGroup(tg, sp.config, targets, lb) - for _, err := range failures { - sp.logger.Error("Creating target failed", "err", err) - } - sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures))) - for _, t := range targets { - // Replicate .Labels().IsEmpty() with a loop here to avoid generating garbage. - nonEmpty := false - t.LabelsRange(func(labels.Label) { nonEmpty = true }) - switch { - case nonEmpty: - all = append(all, t) - default: - if sp.config.KeepDroppedTargets == 0 || uint(len(sp.droppedTargets)) < sp.config.KeepDroppedTargets { - sp.droppedTargets = append(sp.droppedTargets, t) - } - sp.droppedTargetsCount++ - } - } - } - sp.metrics.targetScrapePoolSymbolTableItems.WithLabelValues(sp.config.JobName).Set(float64(sp.symbolTable.Len())) - sp.targetMtx.Unlock() - sp.sync(all) - sp.checkSymbolTable() - - sp.metrics.targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe( - time.Since(start).Seconds(), - ) - sp.metrics.targetSyncIntervalLengthHistogram.WithLabelValues(sp.config.JobName).Observe( - time.Since(start).Seconds(), - ) - sp.metrics.targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc() -} - -// sync takes a list of potentially duplicated targets, deduplicates them, starts -// scrape loops for new targets, and stops scrape loops for disappeared targets. -// It returns after all stopped scrape loops terminated. -func (sp *scrapePool) sync(targets []*Target) { - uniqueLoops := make(map[uint64]loop) - - sp.targetMtx.Lock() - escapingScheme, _ := config.ToEscapingScheme(sp.config.MetricNameEscapingScheme, sp.config.MetricNameValidationScheme) - for _, t := range targets { - hash := t.hash() - - if _, ok := sp.activeTargets[hash]; !ok { - // The scrape interval and timeout labels are set to the config's values initially, - // so whether changed via relabeling or not, they'll exist and hold the correct values - // for every target. - var err error - targetInterval, targetTimeout, err := t.intervalAndTimeout( - time.Duration(sp.config.ScrapeInterval), - time.Duration(sp.config.ScrapeTimeout), - ) - l := sp.newLoop(scrapeLoopOptions{ - target: t, - scraper: &targetScraper{ - Target: t, - client: sp.client, - timeout: targetTimeout, - bodySizeLimit: int64(sp.config.BodySizeLimit), - acceptHeader: acceptHeader(sp.config.ScrapeProtocols, escapingScheme), - acceptEncodingHeader: acceptEncodingHeader(sp.config.EnableCompression), - metrics: sp.metrics, - }, - cache: newScrapeCache(sp.metrics), - interval: targetInterval, - timeout: targetTimeout, - sp: sp, - }) - if err != nil { - l.setForcedError(err) - } - l.setScrapeFailureLogger(sp.scrapeFailureLogger) - - sp.activeTargets[hash] = t - sp.loops[hash] = l - - uniqueLoops[hash] = l - } else { - // This might be a duplicated target. - if _, ok := uniqueLoops[hash]; !ok { - uniqueLoops[hash] = nil - } - // Need to keep the most updated ScrapeConfig for - // displaying labels in the Service Discovery web page. - sp.activeTargets[hash].SetScrapeConfig(sp.config, t.tLabels, t.tgLabels) - } - } - - var wg sync.WaitGroup - - // Stop and remove old targets and scraper loops. - for hash := range sp.activeTargets { - if _, ok := uniqueLoops[hash]; !ok { - wg.Add(1) - go func(l loop) { - l.stop() - wg.Done() - }(sp.loops[hash]) - - delete(sp.loops, hash) - delete(sp.activeTargets, hash) - } - } - - sp.targetMtx.Unlock() - - sp.metrics.targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops))) - forcedErr := sp.refreshTargetLimitErr() - for _, l := range sp.loops { - l.setForcedError(forcedErr) - } - for _, l := range uniqueLoops { - if l != nil { - go l.run(nil) - } - } - // Wait for all potentially stopped scrapers to terminate. - // This covers the case of flapping targets. If the server is under high load, a new scraper - // may be active and tries to insert. The old scraper that didn't terminate yet could still - // be inserting a previous sample set. - wg.Wait() -} - -// refreshTargetLimitErr returns an error that can be passed to the scrape loops -// if the number of targets exceeds the configured limit. -func (sp *scrapePool) refreshTargetLimitErr() error { - if sp.config == nil || sp.config.TargetLimit == 0 { - return nil - } - if l := len(sp.activeTargets); l > int(sp.config.TargetLimit) { - sp.metrics.targetScrapePoolExceededTargetLimit.Inc() - return fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit) - } - return nil -} - -func (sp *scrapePool) disableEndOfRunStalenessMarkers(targets []*Target) { - sp.mtx.Lock() - defer sp.mtx.Unlock() - for i := range targets { - if l, ok := sp.loops[targets[i].hash()]; ok { - l.disableEndOfRunStalenessMarkers() - } - } -} - -func verifyLabelLimits(lset labels.Labels, limits *labelLimits) error { - if limits == nil { - return nil - } - - met := lset.Get(model.MetricNameLabel) - if limits.labelLimit > 0 { - nbLabels := lset.Len() - if nbLabels > limits.labelLimit { - return fmt.Errorf("label_limit exceeded (metric: %.50s, number of labels: %d, limit: %d)", met, nbLabels, limits.labelLimit) - } - } - - if limits.labelNameLengthLimit == 0 && limits.labelValueLengthLimit == 0 { - return nil - } - - return lset.Validate(func(l labels.Label) error { - if limits.labelNameLengthLimit > 0 { - nameLength := len(l.Name) - if nameLength > limits.labelNameLengthLimit { - return fmt.Errorf("label_name_length_limit exceeded (metric: %.50s, label name: %.50s, length: %d, limit: %d)", met, l.Name, nameLength, limits.labelNameLengthLimit) - } - } - - if limits.labelValueLengthLimit > 0 { - valueLength := len(l.Value) - if valueLength > limits.labelValueLengthLimit { - return fmt.Errorf("label_value_length_limit exceeded (metric: %.50s, label name: %.50s, value: %.50q, length: %d, limit: %d)", met, l.Name, l.Value, valueLength, limits.labelValueLengthLimit) - } - } - return nil - }) -} - -func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels { - lb := labels.NewBuilder(lset) - - if honor { - target.LabelsRange(func(l labels.Label) { - if !lset.Has(l.Name) { - lb.Set(l.Name, l.Value) - } - }) - } else { - var conflictingExposedLabels []labels.Label - target.LabelsRange(func(l labels.Label) { - existingValue := lset.Get(l.Name) - if existingValue != "" { - conflictingExposedLabels = append(conflictingExposedLabels, labels.Label{Name: l.Name, Value: existingValue}) - } - // It is now safe to set the target label. - lb.Set(l.Name, l.Value) - }) - - if len(conflictingExposedLabels) > 0 { - resolveConflictingExposedLabels(lb, conflictingExposedLabels) - } - } - - if keep := relabel.ProcessBuilder(lb, rc...); !keep { - return labels.EmptyLabels() - } - - return lb.Labels() -} - -func resolveConflictingExposedLabels(lb *labels.Builder, conflictingExposedLabels []labels.Label) { - slices.SortStableFunc(conflictingExposedLabels, func(a, b labels.Label) int { - return len(a.Name) - len(b.Name) - }) - - for _, l := range conflictingExposedLabels { - newName := l.Name - for { - newName = model.ExportedLabelPrefix + newName - if lb.Get(newName) == "" { - lb.Set(newName, l.Value) - break - } - } - } -} - -func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels { - lb := labels.NewBuilder(lset) - - target.LabelsRange(func(l labels.Label) { - lb.Set(model.ExportedLabelPrefix+l.Name, lset.Get(l.Name)) - lb.Set(l.Name, l.Value) - }) - - return lb.Labels() -} - // appenderWithLimits returns an appender with additional validation. -func appenderWithLimits(app storage.Appender, sampleLimit, bucketLimit int, maxSchema int32) storage.Appender { - app = &timeLimitAppender{ - Appender: app, - maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)), +func appenderV2WithLimits(app storage.AppenderV2, sampleLimit, bucketLimit int, maxSchema int32) storage.AppenderV2 { + app = &timeLimitAppenderV2{ + AppenderV2: app, + maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)), } // The sampleLimit is applied after metrics are potentially dropped via relabeling. if sampleLimit > 0 { - app = &limitAppender{ - Appender: app, - limit: sampleLimit, + app = &limitAppenderV2{ + AppenderV2: app, + limit: sampleLimit, } } if bucketLimit > 0 { - app = &bucketLimitAppender{ - Appender: app, - limit: bucketLimit, + app = &bucketLimitAppenderV2{ + AppenderV2: app, + limit: bucketLimit, } } if maxSchema < histogram.ExponentialSchemaMax { - app = &maxSchemaAppender{ - Appender: app, - maxSchema: maxSchema, + app = &maxSchemaAppenderV2{ + AppenderV2: app, + maxSchema: maxSchema, } } return app } -// A scraper retrieves samples and accepts a status report at the end. -type scraper interface { - scrape(ctx context.Context) (*http.Response, error) - readResponse(ctx context.Context, resp *http.Response, w io.Writer) (string, error) - Report(start time.Time, dur time.Duration, err error) - offset(interval time.Duration, offsetSeed uint64) time.Duration -} - -// targetScraper implements the scraper interface for a target. -type targetScraper struct { - *Target - - client *http.Client - req *http.Request - timeout time.Duration - - gzipr *gzip.Reader - buf *bufio.Reader - - bodySizeLimit int64 - acceptHeader string - acceptEncodingHeader string - - metrics *scrapeMetrics -} - -var errBodySizeLimit = errors.New("body size limit exceeded") - -// acceptHeader transforms preference from the options into specific header values as -// https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines. -// No validation is here, we expect scrape protocols to be validated already. -func acceptHeader(sps []config.ScrapeProtocol, scheme model.EscapingScheme) string { - var vals []string - weight := len(config.ScrapeProtocolsHeaders) + 1 - for _, sp := range sps { - val := config.ScrapeProtocolsHeaders[sp] - // Escaping header is only valid for newer versions of the text formats. - if sp == config.PrometheusText1_0_0 || sp == config.OpenMetricsText1_0_0 { - val += ";" + model.EscapingKey + "=" + scheme.String() - } - val += fmt.Sprintf(";q=0.%d", weight) - vals = append(vals, val) - weight-- - } - // Default match anything. - vals = append(vals, fmt.Sprintf("*/*;q=0.%d", weight)) - return strings.Join(vals, ",") -} - -func acceptEncodingHeader(enableCompression bool) string { - if enableCompression { - return "gzip" - } - return "identity" -} - -var UserAgent = version.PrometheusUserAgent() - -func (s *targetScraper) scrape(ctx context.Context) (*http.Response, error) { - if s.req == nil { - req, err := http.NewRequest(http.MethodGet, s.URL().String(), nil) - if err != nil { - return nil, err - } - req.Header.Add("Accept", s.acceptHeader) - req.Header.Add("Accept-Encoding", s.acceptEncodingHeader) - req.Header.Set("User-Agent", UserAgent) - req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", strconv.FormatFloat(s.timeout.Seconds(), 'f', -1, 64)) - - s.req = req - } - ctx, span := otel.Tracer("").Start(ctx, "Scrape", trace.WithSpanKind(trace.SpanKindClient)) - defer span.End() - - return s.client.Do(s.req.WithContext(ctx)) -} - -func (s *targetScraper) readResponse(_ context.Context, resp *http.Response, w io.Writer) (string, error) { - defer func() { - io.Copy(io.Discard, resp.Body) - resp.Body.Close() - }() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("server returned HTTP status %s", resp.Status) - } - - if s.bodySizeLimit <= 0 { - s.bodySizeLimit = math.MaxInt64 - } - if resp.Header.Get("Content-Encoding") != "gzip" { - n, err := io.Copy(w, io.LimitReader(resp.Body, s.bodySizeLimit)) - if err != nil { - return "", err - } - if n >= s.bodySizeLimit { - s.metrics.targetScrapeExceededBodySizeLimit.Inc() - return "", errBodySizeLimit - } - return resp.Header.Get("Content-Type"), nil - } - - if s.gzipr == nil { - s.buf = bufio.NewReader(resp.Body) - var err error - s.gzipr, err = gzip.NewReader(s.buf) - if err != nil { - return "", err - } - } else { - s.buf.Reset(resp.Body) - if err := s.gzipr.Reset(s.buf); err != nil { - return "", err - } - } - - n, err := io.Copy(w, io.LimitReader(s.gzipr, s.bodySizeLimit)) - s.gzipr.Close() - if err != nil { - return "", err - } - if n >= s.bodySizeLimit { - s.metrics.targetScrapeExceededBodySizeLimit.Inc() - return "", errBodySizeLimit - } - return resp.Header.Get("Content-Type"), nil -} - -// A loop can run and be stopped again. It must not be reused after it was stopped. -type loop interface { - run(errc chan<- error) - setForcedError(err error) - setScrapeFailureLogger(FailureLogger) - stop() - getCache() *scrapeCache - disableEndOfRunStalenessMarkers() -} - -type cacheEntry struct { - ref storage.SeriesRef - lastIter uint64 - hash uint64 - lset labels.Labels -} - -type scrapeLoop struct { - // Parameters. - ctx context.Context - cancel func() - stopped chan struct{} - parentCtx context.Context - appenderCtx context.Context - l *slog.Logger - cache *scrapeCache - - interval time.Duration - timeout time.Duration - sampleMutator labelsMutator - reportSampleMutator labelsMutator - scraper scraper - - // Static params per scrapePool. - appendable storage.Appendable - buffers *pool.Pool - offsetSeed uint64 - symbolTable *labels.SymbolTable - metrics *scrapeMetrics - - // Options from config.ScrapeConfig. - sampleLimit int - bucketLimit int - maxSchema int32 - labelLimits *labelLimits - honorLabels bool - honorTimestamps bool - trackTimestampsStaleness bool - enableNativeHistogramScraping bool - alwaysScrapeClassicHist bool - convertClassicHistToNHCB bool - fallbackScrapeProtocol string - enableCompression bool - mrc []*relabel.Config - validationScheme model.ValidationScheme - - // Options from scrape.Options. - enableSTZeroIngestion bool - enableTypeAndUnitLabels bool - reportExtraMetrics bool - appendMetadataToWAL bool - passMetadataInContext bool - skipOffsetting bool // For testability. - - // error injection through setForcedError. - forcedErr error - forcedErrMtx sync.Mutex - - // Special logger set on setScrapeFailureLogger - scrapeFailureLoggerMtx sync.RWMutex - scrapeFailureLogger FailureLogger - - // Locally cached data. - lastScrapeSize int - disabledEndOfRunStalenessMarkers atomic.Bool -} - -// scrapeCache tracks mappings of exposed metric strings to label sets and -// storage references. Additionally, it tracks staleness of series between -// scrapes. -// Cache is meant to be used per a single target. -type scrapeCache struct { - iter uint64 // Current scrape iteration. - - // How many series and metadata entries there were at the last success. - successfulCount int - - // Parsed string to an entry with information about the actual label set - // and its storage reference. - series map[string]*cacheEntry - - // Cache of dropped metric strings and their iteration. The iteration must - // be a pointer so we can update it. - droppedSeries map[string]*uint64 - - // Series that were seen in the current and previous scrape, for staleness detection. - seriesCur map[storage.SeriesRef]*cacheEntry - seriesPrev map[storage.SeriesRef]*cacheEntry - - // TODO(bwplotka): Consider moving metadata caching to head. See - // https://github.com/prometheus/prometheus/issues/17619. - metaMtx sync.Mutex // Mutex is needed due to api touching it when metadata is queried. - metadata map[string]*metaEntry // metadata by metric family name. - - metrics *scrapeMetrics -} - -// metaEntry holds meta information about a metric. -type metaEntry struct { - metadata.Metadata - - lastIter uint64 // Last scrape iteration the entry was observed at. - lastIterChange uint64 // Last scrape iteration the entry was changed at. -} - -func (m *metaEntry) size() int { - // The attribute lastIter although part of the struct it is not metadata. - return len(m.Help) + len(m.Unit) + len(m.Type) -} - -func newScrapeCache(metrics *scrapeMetrics) *scrapeCache { - return &scrapeCache{ - series: map[string]*cacheEntry{}, - droppedSeries: map[string]*uint64{}, - seriesCur: map[storage.SeriesRef]*cacheEntry{}, - seriesPrev: map[storage.SeriesRef]*cacheEntry{}, - metadata: map[string]*metaEntry{}, - metrics: metrics, - } -} - -func (c *scrapeCache) iterDone(flushCache bool) { - c.metaMtx.Lock() - count := len(c.series) + len(c.droppedSeries) + len(c.metadata) - c.metaMtx.Unlock() - - switch { - case flushCache: - c.successfulCount = count - case count > c.successfulCount*2+1000: - // If a target had varying labels in scrapes that ultimately failed, - // the caches would grow indefinitely. Force a flush when this happens. - // We use the heuristic that this is a doubling of the cache size - // since the last scrape, and allow an additional 1000 in case - // initial scrapes all fail. - flushCache = true - c.metrics.targetScrapeCacheFlushForced.Inc() - } - - if flushCache { - // All caches may grow over time through series churn - // or multiple string representations of the same metric. Clean up entries - // that haven't appeared in the last scrape. - for s, e := range c.series { - if c.iter != e.lastIter { - delete(c.series, s) - } - } - for s, iter := range c.droppedSeries { - if c.iter != *iter { - delete(c.droppedSeries, s) - } - } - c.metaMtx.Lock() - for m, e := range c.metadata { - // Keep metadata around for 10 scrapes after its metric disappeared. - if c.iter-e.lastIter > 10 { - delete(c.metadata, m) - } - } - c.metaMtx.Unlock() - } - - // Swap current and previous series then clear the new current, to save allocations. - c.seriesPrev, c.seriesCur = c.seriesCur, c.seriesPrev - clear(c.seriesCur) - - c.iter++ -} - -func (c *scrapeCache) get(met []byte) (*cacheEntry, bool, bool) { - e, ok := c.series[string(met)] - if !ok { - return nil, false, false - } - alreadyScraped := e.lastIter == c.iter - e.lastIter = c.iter - return e, true, alreadyScraped -} - -func (c *scrapeCache) addRef(met []byte, ref storage.SeriesRef, lset labels.Labels, hash uint64) (ce *cacheEntry) { - if ref == 0 { - return nil - } - ce = &cacheEntry{ref: ref, lastIter: c.iter, lset: lset, hash: hash} - c.series[string(met)] = ce - return ce -} - -func (c *scrapeCache) addDropped(met []byte) { - iter := c.iter - c.droppedSeries[string(met)] = &iter -} - -func (c *scrapeCache) getDropped(met []byte) bool { - iterp, ok := c.droppedSeries[string(met)] - if ok { - *iterp = c.iter - } - return ok -} - -func (c *scrapeCache) trackStaleness(ref storage.SeriesRef, ce *cacheEntry) { - c.seriesCur[ref] = ce -} - -func (c *scrapeCache) forEachStale(f func(storage.SeriesRef, labels.Labels) bool) { - for ref, ce := range c.seriesPrev { - if _, ok := c.seriesCur[ref]; !ok { - if !f(ce.ref, ce.lset) { - break - } - } - } -} - -func yoloString(b []byte) string { - return unsafe.String(unsafe.SliceData(b), len(b)) -} - -func (c *scrapeCache) setType(mfName []byte, t model.MetricType) ([]byte, *metaEntry) { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - e, ok := c.metadata[string(mfName)] - if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} - c.metadata[string(mfName)] = e - } - if e.Type != t { - e.Type = t - e.lastIterChange = c.iter - } - e.lastIter = c.iter - return mfName, e -} - -func (c *scrapeCache) setHelp(mfName, help []byte) ([]byte, *metaEntry) { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - e, ok := c.metadata[string(mfName)] - if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} - c.metadata[string(mfName)] = e - } - if e.Help != string(help) { - e.Help = string(help) - e.lastIterChange = c.iter - } - e.lastIter = c.iter - return mfName, e -} - -func (c *scrapeCache) setUnit(mfName, unit []byte) ([]byte, *metaEntry) { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - e, ok := c.metadata[string(mfName)] - if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} - c.metadata[string(mfName)] = e - } - if e.Unit != string(unit) { - e.Unit = string(unit) - e.lastIterChange = c.iter - } - e.lastIter = c.iter - return mfName, e -} - -// GetMetadata returns metadata given the metric family name. -func (c *scrapeCache) GetMetadata(mfName string) (MetricMetadata, bool) { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - m, ok := c.metadata[mfName] - if !ok { - return MetricMetadata{}, false - } - return MetricMetadata{ - MetricFamily: mfName, - Type: m.Type, - Help: m.Help, - Unit: m.Unit, - }, true -} - -// ListMetadata lists metadata. -func (c *scrapeCache) ListMetadata() []MetricMetadata { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - res := make([]MetricMetadata, 0, len(c.metadata)) - - for m, e := range c.metadata { - res = append(res, MetricMetadata{ - MetricFamily: m, - Type: e.Type, - Help: e.Help, - Unit: e.Unit, - }) - } - return res -} - -// SizeMetadata returns the size of the metadata cache. -func (c *scrapeCache) SizeMetadata() (s int) { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - for _, e := range c.metadata { - s += e.size() - } - - return s -} - -// LengthMetadata returns the number of metadata entries in the cache. -func (c *scrapeCache) LengthMetadata() int { - c.metaMtx.Lock() - defer c.metaMtx.Unlock() - - return len(c.metadata) -} - -// scrapeLoopOptions contains static options that do not change per scrapePool lifecycle. -type scrapeLoopOptions struct { - target *Target - scraper scraper - cache *scrapeCache - interval, timeout time.Duration - - sp *scrapePool -} - -// newScrapeLoop constructs new scrapeLoop. -// NOTE: Technically this could be a scrapePool method, but it's a standalone function to make it clear scrapeLoop -// can be used outside scrapePool lifecycle (e.g. in tests). -func newScrapeLoop(opts scrapeLoopOptions) *scrapeLoop { - // Update the targets retrieval function for metadata to a new target. - opts.target.SetMetadataStore(opts.cache) - - appenderCtx := opts.sp.ctx - if opts.sp.options.PassMetadataInContext { - // Store the cache and target in the context. This is then used by downstream OTel Collector - // to lookup the metadata required to process the samples. Not used by Prometheus itself. - // TODO(gouthamve) We're using a dedicated context because using the parentCtx caused a memory - // leak. We should ideally fix the main leak. See: https://github.com/prometheus/prometheus/pull/10590 - // TODO(bwplotka): Remove once OpenTelemetry collector uses AppenderV2 (add issue) - appenderCtx = ContextWithMetricMetadataStore(appenderCtx, opts.cache) - appenderCtx = ContextWithTarget(appenderCtx, opts.target) - } - - ctx, cancel := context.WithCancel(opts.sp.ctx) - return &scrapeLoop{ - ctx: ctx, - cancel: cancel, - stopped: make(chan struct{}), - parentCtx: opts.sp.ctx, - appenderCtx: appenderCtx, - l: opts.sp.logger.With("target", opts.target), - cache: opts.cache, - - interval: opts.interval, - timeout: opts.timeout, - sampleMutator: func(l labels.Labels) labels.Labels { - return mutateSampleLabels(l, opts.target, opts.sp.config.HonorLabels, opts.sp.config.MetricRelabelConfigs) - }, - reportSampleMutator: func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, opts.target) }, - scraper: opts.scraper, - - // Static params per scrapePool. - appendable: opts.sp.appendable, - buffers: opts.sp.buffers, - offsetSeed: opts.sp.offsetSeed, - symbolTable: opts.sp.symbolTable, - metrics: opts.sp.metrics, - - // config.ScrapeConfig. - sampleLimit: int(opts.sp.config.SampleLimit), - bucketLimit: int(opts.sp.config.NativeHistogramBucketLimit), - maxSchema: pickSchema(opts.sp.config.NativeHistogramMinBucketFactor), - labelLimits: &labelLimits{ - labelLimit: int(opts.sp.config.LabelLimit), - labelNameLengthLimit: int(opts.sp.config.LabelNameLengthLimit), - labelValueLengthLimit: int(opts.sp.config.LabelValueLengthLimit), - }, - honorLabels: opts.sp.config.HonorLabels, - honorTimestamps: opts.sp.config.HonorTimestamps, - trackTimestampsStaleness: opts.sp.config.TrackTimestampsStaleness, - enableNativeHistogramScraping: opts.sp.config.ScrapeNativeHistogramsEnabled(), - alwaysScrapeClassicHist: opts.sp.config.AlwaysScrapeClassicHistogramsEnabled(), - convertClassicHistToNHCB: opts.sp.config.ConvertClassicHistogramsToNHCBEnabled(), - fallbackScrapeProtocol: opts.sp.config.ScrapeFallbackProtocol.HeaderMediaType(), - enableCompression: opts.sp.config.EnableCompression, - mrc: opts.sp.config.MetricRelabelConfigs, - reportExtraMetrics: opts.sp.config.ExtraScrapeMetricsEnabled(), - validationScheme: opts.sp.config.MetricNameValidationScheme, - - // scrape.Options. - enableSTZeroIngestion: opts.sp.options.EnableStartTimestampZeroIngestion, - enableTypeAndUnitLabels: opts.sp.options.EnableTypeAndUnitLabels, - appendMetadataToWAL: opts.sp.options.AppendMetadata, - passMetadataInContext: opts.sp.options.PassMetadataInContext, - skipOffsetting: opts.sp.options.skipOffsetting, - } -} - -func (sl *scrapeLoop) setScrapeFailureLogger(l FailureLogger) { - sl.scrapeFailureLoggerMtx.Lock() - defer sl.scrapeFailureLoggerMtx.Unlock() - if ts, ok := sl.scraper.(fmt.Stringer); ok && l != nil { - l = slog.New(l).With("target", ts.String()).Handler().(FailureLogger) - } - sl.scrapeFailureLogger = l -} - -func (sl *scrapeLoop) run(errc chan<- error) { - if !sl.skipOffsetting { - select { - case <-time.After(sl.scraper.offset(sl.interval, sl.offsetSeed)): - // Continue after a scraping offset. - case <-sl.ctx.Done(): - close(sl.stopped) - return - } - } - - var last time.Time - - alignedScrapeTime := time.Now().Round(0) - ticker := time.NewTicker(sl.interval) - defer ticker.Stop() - -mainLoop: - for { - select { - case <-sl.parentCtx.Done(): - close(sl.stopped) - return - case <-sl.ctx.Done(): - break mainLoop - default: - } - - // Temporary workaround for a jitter in go timers that causes disk space - // increase in TSDB. - // See https://github.com/prometheus/prometheus/issues/7846 - // Calling Round ensures the time used is the wall clock, as otherwise .Sub - // and .Add on time.Time behave differently (see time package docs). - scrapeTime := time.Now().Round(0) - if AlignScrapeTimestamps { - // Tolerance is clamped to maximum 1% of the scrape interval. - tolerance := min(sl.interval/100, ScrapeTimestampTolerance) - // For some reason, a tick might have been skipped, in which case we - // would call alignedScrapeTime.Add(interval) multiple times. - for scrapeTime.Sub(alignedScrapeTime) >= sl.interval { - alignedScrapeTime = alignedScrapeTime.Add(sl.interval) - } - // Align the scrape time if we are in the tolerance boundaries. - if scrapeTime.Sub(alignedScrapeTime) <= tolerance { - scrapeTime = alignedScrapeTime - } - } - - last = sl.scrapeAndReport(last, scrapeTime, errc) - - select { - case <-sl.parentCtx.Done(): - close(sl.stopped) - return - case <-sl.ctx.Done(): - break mainLoop - case <-ticker.C: - } - } - - close(sl.stopped) - - if !sl.disabledEndOfRunStalenessMarkers.Load() { - sl.endOfRunStaleness(last, ticker, sl.interval) - } -} - -func (sl *scrapeLoop) appender() scrapeLoopAppendAdapter { - // NOTE(bwplotka): Add AppenderV2 implementation, see https://github.com/prometheus/prometheus/issues/17632. - return &scrapeLoopAppender{scrapeLoop: sl, Appender: sl.appendable.Appender(sl.appenderCtx)} -} - -// scrapeAndReport performs a scrape and then appends the result to the storage -// together with reporting metrics, by using as few appenders as possible. -// In the happy scenario, a single appender is used. -// This function uses sl.appenderCtx instead of sl.ctx on purpose. A scrape should -// only be cancelled on shutdown, not on reloads. -func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- error) time.Time { - start := time.Now() - - // Only record after the first scrape. - if !last.IsZero() { - sl.metrics.targetIntervalLength.WithLabelValues(sl.interval.String()).Observe( - time.Since(last).Seconds(), - ) - sl.metrics.targetIntervalLengthHistogram.WithLabelValues(sl.interval.String()).Observe( - time.Since(last).Seconds(), - ) - } - - var total, added, seriesAdded, bytesRead int - var err, appErr, scrapeErr error - - app := sl.appender() - defer func() { - if err != nil { - _ = app.Rollback() - return - } - err = app.Commit() - if sl.reportExtraMetrics { - totalDuration := time.Since(start) - // Record total scrape duration metric. - sl.metrics.targetScrapeDuration.Observe(totalDuration.Seconds()) - } - if err != nil { - sl.l.Error("Scrape commit failed", "err", err) - } - }() - - defer func() { - if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytesRead, scrapeErr); err != nil { - sl.l.Warn("Appending scrape report failed", "err", err) - } - }() - - if forcedErr := sl.getForcedError(); forcedErr != nil { - scrapeErr = forcedErr - // Add stale markers. - if _, _, _, err := app.append([]byte{}, "", appendTime); err != nil { - _ = app.Rollback() - app = sl.appender() - sl.l.Warn("Append failed", "err", err) - } - if errc != nil { - select { - case errc <- forcedErr: - case <-sl.ctx.Done(): - } - } - - return start - } - - var contentType string - var resp *http.Response - var b []byte - var buf *bytes.Buffer - scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout) - resp, scrapeErr = sl.scraper.scrape(scrapeCtx) - if scrapeErr == nil { - b = sl.buffers.Get(sl.lastScrapeSize).([]byte) - defer sl.buffers.Put(b) - buf = bytes.NewBuffer(b) - contentType, scrapeErr = sl.scraper.readResponse(scrapeCtx, resp, buf) - } - cancel() - - if scrapeErr == nil { - b = buf.Bytes() - // NOTE: There were issues with misbehaving clients in the past - // that occasionally returned empty results. We don't want those - // to falsely reset our buffer size. - if len(b) > 0 { - sl.lastScrapeSize = len(b) - } - bytesRead = len(b) - } else { - sl.l.Debug("Scrape failed", "err", scrapeErr) - sl.scrapeFailureLoggerMtx.RLock() - if sl.scrapeFailureLogger != nil { - slog.New(sl.scrapeFailureLogger).Error(scrapeErr.Error()) - } - sl.scrapeFailureLoggerMtx.RUnlock() - if errc != nil { - select { - case errc <- scrapeErr: - case <-sl.ctx.Done(): - } - } - if errors.Is(scrapeErr, errBodySizeLimit) { - bytesRead = -1 - } - } - - // A failed scrape is the same as an empty scrape, - // we still call sl.append to trigger stale markers. - total, added, seriesAdded, appErr = app.append(b, contentType, appendTime) - if appErr != nil { - _ = app.Rollback() - app = sl.appender() - sl.l.Debug("Append failed", "err", appErr) - // The append failed, probably due to a parse error or sample limit. - // Call sl.append again with an empty scrape to trigger stale markers. - if _, _, _, err := app.append([]byte{}, "", appendTime); err != nil { - _ = app.Rollback() - app = sl.appender() - sl.l.Warn("Append failed", "err", err) - } - } - - if scrapeErr == nil { - scrapeErr = appErr - } - - return start -} - -func (sl *scrapeLoop) setForcedError(err error) { - sl.forcedErrMtx.Lock() - defer sl.forcedErrMtx.Unlock() - sl.forcedErr = err -} - -func (sl *scrapeLoop) getForcedError() error { - sl.forcedErrMtx.Lock() - defer sl.forcedErrMtx.Unlock() - return sl.forcedErr -} - -func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, interval time.Duration) { - // Scraping has stopped. We want to write stale markers but - // the target may be recreated, so we wait just over 2 scrape intervals - // before creating them. - // If the context is canceled, we presume the server is shutting down - // and will restart where is was. We do not attempt to write stale markers - // in this case. - - if last.IsZero() { - // There never was a scrape, so there will be no stale markers. - return - } - - // Wait for when the next scrape would have been, record its timestamp. - var staleTime time.Time - select { - case <-sl.parentCtx.Done(): - return - case <-ticker.C: - staleTime = time.Now() - } - - // Wait for when the next scrape would have been, if the target was recreated - // samples should have been ingested by now. - select { - case <-sl.parentCtx.Done(): - return - case <-ticker.C: - } - - // Wait for an extra 10% of the interval, just to be safe. - select { - case <-sl.parentCtx.Done(): - return - case <-time.After(interval / 10): - } - - // Check if end-of-run staleness markers have been disabled while we were waiting. - if sl.disabledEndOfRunStalenessMarkers.Load() { - return - } - - // Call sl.append again with an empty scrape to trigger stale markers. - // If the target has since been recreated and scraped, the - // stale markers will be out of order and ignored. - // sl.context would have been cancelled, hence using sl.appenderCtx. - app := sl.appender() - var err error - defer func() { - if err != nil { - _ = app.Rollback() - return - } - err = app.Commit() - if err != nil { - sl.l.Warn("Stale commit failed", "err", err) - } - }() - if _, _, _, err = app.append([]byte{}, "", staleTime); err != nil { - _ = app.Rollback() - app = sl.appender() - sl.l.Warn("Stale append failed", "err", err) - } - if err = sl.reportStale(app, staleTime); err != nil { - sl.l.Warn("Stale report failed", "err", err) - } -} - -// Stop the scraping. May still write data and stale markers after it has -// returned. Cancel the context to stop all writes. -func (sl *scrapeLoop) stop() { - sl.cancel() - <-sl.stopped -} - -func (sl *scrapeLoop) disableEndOfRunStalenessMarkers() { - sl.disabledEndOfRunStalenessMarkers.Store(true) -} - -func (sl *scrapeLoop) getCache() *scrapeCache { - return sl.cache -} - -type appendErrors struct { - numOutOfOrder int - numDuplicates int - numOutOfBounds int - numExemplarOutOfOrder int -} - -// Update the stale markers. -func (sl *scrapeLoop) updateStaleMarkers(app storage.Appender, defTime int64) (err error) { +func (sl *scrapeLoop) updateStaleMarkersV2(app storage.AppenderV2, defTime int64) (err error) { sl.cache.forEachStale(func(ref storage.SeriesRef, lset labels.Labels) bool { // Series no longer exposed, mark it stale. - app.SetOptions(&aOptionRejectEarlyOOO) - _, err = app.Append(ref, lset, defTime, math.Float64frombits(value.StaleNaN)) - app.SetOptions(nil) + _, err = app.Append(ref, lset, 0, defTime, math.Float64frombits(value.StaleNaN), nil, nil, storage.AOptions{RejectOutOfOrder: true}) switch { case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): // Do not count these in logging, as this is expected if a target @@ -1553,20 +79,20 @@ func (sl *scrapeLoop) updateStaleMarkers(app storage.Appender, defTime int64) (e return err } -type scrapeLoopAppender struct { +type scrapeLoopAppenderV2 struct { *scrapeLoop - storage.Appender + storage.AppenderV2 } -var _ scrapeLoopAppendAdapter = &scrapeLoopAppender{} +var _ scrapeLoopAppendAdapter = &scrapeLoopAppenderV2{} -func (sl *scrapeLoopAppender) append(b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { +func (sl *scrapeLoopAppenderV2) append(b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { defTime := timestamp.FromTime(ts) if len(b) == 0 { // Empty scrape. Just update the stale makers and swap the cache (but don't flush it). - err = sl.updateStaleMarkers(sl.Appender, defTime) + err = sl.updateStaleMarkersV2(sl.AppenderV2, defTime) sl.cache.iterDone(false) return total, added, seriesAdded, err } @@ -1609,7 +135,7 @@ func (sl *scrapeLoopAppender) append(b []byte, contentType string, ts time.Time) exemplars := make([]exemplar.Exemplar, 0, 1) // Take an appender with limits. - app := appenderWithLimits(sl.Appender, sl.sampleLimit, sl.bucketLimit, sl.maxSchema) + app := appenderV2WithLimits(sl.AppenderV2, sl.sampleLimit, sl.bucketLimit, sl.maxSchema) defer func() { if err != nil { @@ -1714,56 +240,86 @@ loop: } } + exemplars = exemplars[:0] // Reset and reuse the exemplar slice. + if seriesAlreadyScraped && parsedTimestamp == nil { err = storage.ErrDuplicateSampleForTimestamp } else { + st := int64(0) if sl.enableSTZeroIngestion { - if stMs := p.StartTimestamp(); stMs != 0 { + // p.StartTimestamp() tend to be expensive (e.g. OM1). Do it only if we care. + st = p.StartTimestamp() + } + + for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { + if !e.HasTs { if isHistogram { - if h != nil { - ref, err = app.AppendHistogramSTZeroSample(ref, lset, t, stMs, h, nil) - } else { - ref, err = app.AppendHistogramSTZeroSample(ref, lset, t, stMs, nil, fh) - } - } else { - ref, err = app.AppendSTZeroSample(ref, lset, t, stMs) + // We drop exemplars for native histograms if they don't have a timestamp. + // Missing timestamps are deliberately not supported as we want to start + // enforcing timestamps for exemplars as otherwise proper deduplication + // is inefficient and purely based on heuristics: we cannot distinguish + // between repeated exemplars and new instances with the same values. + // This is done silently without logs as it is not an error but out of spec. + // This does not affect classic histograms so that behaviour is unchanged. + e = exemplar.Exemplar{} // Reset for the next fetch. + continue } - if err != nil && !errors.Is(err, storage.ErrOutOfOrderST) { // OOO is a common case, ignoring completely for now. - // ST is an experimental feature. For now, we don't need to fail the - // scrape on errors updating the created timestamp, log debug. - sl.l.Debug("Error when appending ST in scrape loop", "series", string(met), "ct", stMs, "t", t, "err", err) + e.Ts = t + } + exemplars = append(exemplars, e) + e = exemplar.Exemplar{} // Reset for the next fetch. + } + + // Prepare append call. + appOpts := storage.AOptions{ + MetricFamilyName: yoloString(lastMFName), + } + if len(exemplars) > 0 { + // Sort so that checking for duplicates / out of order is more efficient during validation. + // TODO(bwplotka): Double check if this is even true now. + slices.SortFunc(exemplars, exemplar.Compare) + appOpts.Exemplars = exemplars + } + + // TODO(bwplotka): This mimicks the scrape appender v1 flow. Once we remove v1 + // flow we should rename "appendMetadataToWAL" flag to "passMetadata" because for v2 flow + // the metadata storage detail is behind the appendableV2 contract. + // + // When passing metadata now, we no longer need to check if metadata changed as per v2 contract. We could + // consider no flag and always attach the metadata. Unfortunately because of the limitation of the OpenMetrics 1.0 + // (hopefully fixed in OpenMetrics 2.0) there are edge cases for known to unknown metadata series switch that + // is expensive to detect. As a result we keep this opt-in for now (could be also scrape option). + if sl.appendMetadataToWAL && lastMeta != nil { + if !seriesCached || lastMeta.lastIterChange != sl.cache.iter { + // In majority cases we can trust that the current series/histogram is matching the lastMeta and lastMFName. + // However, optional TYPE etc metadata and broken OM text can break this, detect those cases here. + // TODO(bwplotka): Consider moving this to parser as many parser users end up doing this (e.g. ST and NHCB parsing). + if !isSeriesPartOfFamily(lset.Get(model.MetricNameLabel), lastMFName, lastMeta.Type) { + lastMeta = nil } } + if lastMeta != nil { + appOpts.Metadata = lastMeta.Metadata + } } - if isHistogram { - if h != nil { - ref, err = app.AppendHistogram(ref, lset, t, h, nil) - } else { - ref, err = app.AppendHistogram(ref, lset, t, nil, fh) - } - } else { - ref, err = app.Append(ref, lset, t, val) - } + // Append sample to the storage. + ref, err = app.Append(ref, lset, st, t, val, h, fh, appOpts) } - - if err == nil { - if (parsedTimestamp == nil || sl.trackTimestampsStaleness) && ce != nil { - sl.cache.trackStaleness(ce.ref, ce) - } - } - - sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) + sampleAdded, err = sl.checkAddError(met, exemplars, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if !errors.Is(err, storage.ErrNotFound) { sl.l.Debug("Unexpected error", "series", string(met), "err", err) } break loop } + if (parsedTimestamp == nil || sl.trackTimestampsStaleness) && ce != nil { + sl.cache.trackStaleness(ce.ref, ce) + } - // If series wasn't cached (is new, not seen on previous scrape) we need need to add it to the scrape cache. + // If series wasn't cached (is new, not seen on previous scrape) we need to add it to the scrape cache. // But we only do this for series that were appended to TSDB without errors. - // If a series was new but we didn't append it due to sample_limit or other errors then we don't need + // If a series was new, but we didn't append it due to sample_limit or other errors then we don't need // it in the scrape cache because we don't need to emit StaleNaNs for it when it disappears. if !seriesCached && sampleAdded { ce = sl.cache.addRef(met, ref, lset, hash) @@ -1782,62 +338,6 @@ loop: // We still report duplicated samples here since this number should be the exact number // of time series exposed on a scrape after relabelling. added++ - exemplars = exemplars[:0] // Reset and reuse the exemplar slice. - for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { - if !e.HasTs { - if isHistogram { - // We drop exemplars for native histograms if they don't have a timestamp. - // Missing timestamps are deliberately not supported as we want to start - // enforcing timestamps for exemplars as otherwise proper deduplication - // is inefficient and purely based on heuristics: we cannot distinguish - // between repeated exemplars and new instances with the same values. - // This is done silently without logs as it is not an error but out of spec. - // This does not affect classic histograms so that behaviour is unchanged. - e = exemplar.Exemplar{} // Reset for next time round loop. - continue - } - e.Ts = t - } - exemplars = append(exemplars, e) - e = exemplar.Exemplar{} // Reset for next time round loop. - } - // Sort so that checking for duplicates / out of order is more efficient during validation. - slices.SortFunc(exemplars, exemplar.Compare) - outOfOrderExemplars := 0 - for _, e := range exemplars { - _, exemplarErr := app.AppendExemplar(ref, lset, e) - switch { - case exemplarErr == nil: - // Do nothing. - case errors.Is(exemplarErr, storage.ErrOutOfOrderExemplar): - outOfOrderExemplars++ - default: - // Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors. - sl.l.Debug("Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) - } - } - if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { - // Only report out of order exemplars if all are out of order, otherwise this was a partial update - // to some existing set of exemplars. - appErrs.numExemplarOutOfOrder += outOfOrderExemplars - sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) - sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) - } - - if sl.appendMetadataToWAL && lastMeta != nil { - // Is it new series OR did metadata change for this family? - if !seriesCached || lastMeta.lastIterChange == sl.cache.iter { - // In majority cases we can trust that the current series/histogram is matching the lastMeta and lastMFName. - // However, optional TYPE etc metadata and broken OM text can break this, detect those cases here. - // TODO(bwplotka): Consider moving this to parser as many parser users end up doing this (e.g. ST and NHCB parsing). - if isSeriesPartOfFamily(lset.Get(model.MetricNameLabel), lastMFName, lastMeta.Type) { - if _, merr := app.UpdateMetadata(ref, lset, lastMeta.Metadata); merr != nil { - // No need to fail the scrape on errors appending metadata. - sl.l.Debug("Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", lastMeta.Metadata), "err", merr) - } - } - } - } } if sampleLimitErr != nil { if err == nil { @@ -1866,269 +366,12 @@ loop: sl.l.Warn("Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } if err == nil { - err = sl.updateStaleMarkers(app, defTime) + err = sl.updateStaleMarkersV2(app, defTime) } return total, added, seriesAdded, err } -func isSeriesPartOfFamily(mName string, mfName []byte, typ model.MetricType) bool { - mfNameStr := yoloString(mfName) - if !strings.HasPrefix(mName, mfNameStr) { // Fast path. - return false - } - - var ( - gotMFName string - ok bool - ) - switch typ { - case model.MetricTypeCounter: - // Prometheus allows _total, cut it from mf name to support this case. - mfNameStr, _ = strings.CutSuffix(mfNameStr, "_total") - - gotMFName, ok = strings.CutSuffix(mName, "_total") - if !ok { - gotMFName = mName - } - case model.MetricTypeHistogram: - gotMFName, ok = strings.CutSuffix(mName, "_bucket") - if !ok { - gotMFName, ok = strings.CutSuffix(mName, "_sum") - if !ok { - gotMFName, ok = strings.CutSuffix(mName, "_count") - if !ok { - gotMFName = mName - } - } - } - case model.MetricTypeGaugeHistogram: - gotMFName, ok = strings.CutSuffix(mName, "_bucket") - if !ok { - gotMFName, ok = strings.CutSuffix(mName, "_gsum") - if !ok { - gotMFName, ok = strings.CutSuffix(mName, "_gcount") - if !ok { - gotMFName = mName - } - } - } - case model.MetricTypeSummary: - gotMFName, ok = strings.CutSuffix(mName, "_sum") - if !ok { - gotMFName, ok = strings.CutSuffix(mName, "_count") - if !ok { - gotMFName = mName - } - } - case model.MetricTypeInfo: - // Technically prometheus text does not support info type, but we might - // accidentally allow info type in prom parse, so support metric family names - // with the _info explicitly too. - mfNameStr, _ = strings.CutSuffix(mfNameStr, "_info") - - gotMFName, ok = strings.CutSuffix(mName, "_info") - if !ok { - gotMFName = mName - } - default: - gotMFName = mName - } - return mfNameStr == gotMFName -} - -// Adds samples to the appender, checking the error, and then returns the # of samples added, -// whether the caller should continue to process more samples, and any sample or bucket limit errors. -// Switch error cases for Sample and Bucket limits are checked first since they're more common -// during normal operation (e.g., accidental cardinality explosion, sudden traffic spikes). -// Current case ordering prevents exercising other cases when limits are exceeded. -// Remaining error cases typically occur only a few times, often during initial setup. -func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (sampleAdded bool, _ error) { - switch { - case err == nil: - return true, nil - case errors.Is(err, errSampleLimit): - // Keep on parsing output if we hit the limit, so we report the correct - // total number of samples scraped. - *sampleLimitErr = err - return false, nil - case errors.Is(err, errBucketLimit): - // Keep on parsing output if we hit the limit, so we report the bucket - // total number of samples scraped. - *bucketLimitErr = err - return false, nil - case errors.Is(err, storage.ErrOutOfOrderSample): - appErrs.numOutOfOrder++ - sl.l.Debug("Out of order sample", "series", string(met)) - sl.metrics.targetScrapeSampleOutOfOrder.Inc() - return false, nil - case errors.Is(err, storage.ErrDuplicateSampleForTimestamp): - appErrs.numDuplicates++ - sl.l.Debug("Duplicate sample for timestamp", "series", string(met)) - sl.metrics.targetScrapeSampleDuplicate.Inc() - return false, nil - case errors.Is(err, storage.ErrOutOfBounds): - appErrs.numOutOfBounds++ - sl.l.Debug("Out of bounds metric", "series", string(met)) - sl.metrics.targetScrapeSampleOutOfBounds.Inc() - return false, nil - case errors.Is(err, storage.ErrNotFound): - return false, storage.ErrNotFound - default: - return false, err - } -} - -// reportSample represents automatically generated timeseries documented in -// https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series -type reportSample struct { - metadata.Metadata - name []byte -} - -// The constants are suffixed with the invalid \xff unicode rune to avoid collisions -// with scraped metrics in the cache. -var ( - scrapeHealthMetric = reportSample{ - name: []byte("up" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "Health of the scrape target. 1 means the target is healthy, 0 if the scrape failed.", - Unit: "targets", - }, - } - scrapeDurationMetric = reportSample{ - name: []byte("scrape_duration_seconds" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "Duration of the last scrape in seconds.", - Unit: "seconds", - }, - } - scrapeSamplesMetric = reportSample{ - name: []byte("scrape_samples_scraped" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "Number of samples last scraped.", - Unit: "samples", - }, - } - samplesPostRelabelMetric = reportSample{ - name: []byte("scrape_samples_post_metric_relabeling" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "Number of samples remaining after metric relabeling was applied.", - Unit: "samples", - }, - } - scrapeSeriesAddedMetric = reportSample{ - name: []byte("scrape_series_added" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "Number of series in the last scrape.", - Unit: "series", - }, - } - scrapeTimeoutMetric = reportSample{ - name: []byte("scrape_timeout_seconds" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "The configured scrape timeout for a target.", - Unit: "seconds", - }, - } - scrapeSampleLimitMetric = reportSample{ - name: []byte("scrape_sample_limit" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "The configured sample limit for a target. Returns zero if there is no limit configured.", - Unit: "samples", - }, - } - scrapeBodySizeBytesMetric = reportSample{ - name: []byte("scrape_body_size_bytes" + "\xff"), - Metadata: metadata.Metadata{ - Type: model.MetricTypeGauge, - Help: "The uncompressed size of the last scrape response, if successful. Scrapes failing because body_size_limit is exceeded report -1, other scrape failures report 0.", - Unit: "bytes", - }, - } -) - -func (sl *scrapeLoop) report(app scrapeLoopAppendAdapter, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) { - sl.scraper.Report(start, duration, scrapeErr) - - ts := timestamp.FromTime(start) - - var health float64 - if scrapeErr == nil { - health = 1 - } - b := labels.NewBuilderWithSymbolTable(sl.symbolTable) - - if err = app.addReportSample(scrapeHealthMetric, ts, health, b, false); err != nil { - return err - } - if err = app.addReportSample(scrapeDurationMetric, ts, duration.Seconds(), b, false); err != nil { - return err - } - if err = app.addReportSample(scrapeSamplesMetric, ts, float64(scraped), b, false); err != nil { - return err - } - if err = app.addReportSample(samplesPostRelabelMetric, ts, float64(added), b, false); err != nil { - return err - } - if err = app.addReportSample(scrapeSeriesAddedMetric, ts, float64(seriesAdded), b, false); err != nil { - return err - } - if sl.reportExtraMetrics { - if err = app.addReportSample(scrapeTimeoutMetric, ts, sl.timeout.Seconds(), b, false); err != nil { - return err - } - if err = app.addReportSample(scrapeSampleLimitMetric, ts, float64(sl.sampleLimit), b, false); err != nil { - return err - } - if err = app.addReportSample(scrapeBodySizeBytesMetric, ts, float64(bytes), b, false); err != nil { - return err - } - } - return err -} - -func (sl *scrapeLoop) reportStale(app scrapeLoopAppendAdapter, start time.Time) (err error) { - ts := timestamp.FromTime(start) - stale := math.Float64frombits(value.StaleNaN) - b := labels.NewBuilder(labels.EmptyLabels()) - - if err = app.addReportSample(scrapeHealthMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(scrapeDurationMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(scrapeSamplesMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(samplesPostRelabelMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(scrapeSeriesAddedMetric, ts, stale, b, true); err != nil { - return err - } - if sl.reportExtraMetrics { - if err = app.addReportSample(scrapeTimeoutMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(scrapeSampleLimitMetric, ts, stale, b, true); err != nil { - return err - } - if err = app.addReportSample(scrapeBodySizeBytesMetric, ts, stale, b, true); err != nil { - return err - } - } - return err -} - -func (sl *scrapeLoopAppender) addReportSample(s reportSample, t int64, v float64, b *labels.Builder, rejectOOO bool) (err error) { +func (sl *scrapeLoopAppenderV2) addReportSample(s reportSample, t int64, v float64, b *labels.Builder, rejectOOO bool) (err error) { ce, ok, _ := sl.cache.get(s.name) var ref storage.SeriesRef var lset labels.Labels @@ -2144,25 +387,15 @@ func (sl *scrapeLoopAppender) addReportSample(s reportSample, t int64, v float64 lset = sl.reportSampleMutator(b.Labels()) } - // This will be improved in AppenderV2. - if rejectOOO { - sl.SetOptions(&aOptionRejectEarlyOOO) - ref, err = sl.Append(ref, lset, t, v) - sl.SetOptions(nil) - } else { - ref, err = sl.Append(ref, lset, t, v) - } - + ref, err = sl.Append(ref, lset, 0, t, v, nil, nil, storage.AOptions{ + MetricFamilyName: yoloString(s.name), + Metadata: s.Metadata, + RejectOutOfOrder: rejectOOO, + }) switch { case err == nil: if !ok { sl.cache.addRef(s.name, ref, lset, lset.Hash()) - // We only need to add metadata once a scrape target appears. - if sl.appendMetadataToWAL { - if _, merr := sl.UpdateMetadata(ref, lset, s.Metadata); merr != nil { - sl.l.Debug("Error when appending metadata in addReportSample", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", s.Metadata), "err", merr) - } - } } return nil case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): @@ -2173,80 +406,3 @@ func (sl *scrapeLoopAppender) addReportSample(s reportSample, t int64, v float64 return err } } - -// zeroConfig returns a new scrape config that only contains configuration items -// that alter metrics. -func zeroConfig(c *config.ScrapeConfig) *config.ScrapeConfig { - z := *c - // We zero out the fields that for sure don't affect scrape. - z.ScrapeInterval = 0 - z.ScrapeTimeout = 0 - z.SampleLimit = 0 - z.HTTPClientConfig = config_util.HTTPClientConfig{} - return &z -} - -// reusableCache compares two scrape config and tells whether the cache is still -// valid. -func reusableCache(r, l *config.ScrapeConfig) bool { - if r == nil || l == nil { - return false - } - return reflect.DeepEqual(zeroConfig(r), zeroConfig(l)) -} - -// CtxKey is a dedicated type for keys of context-embedded values propagated -// with the scrape context. -type ctxKey int - -// Valid CtxKey values. -const ( - ctxKeyMetadata ctxKey = iota + 1 - ctxKeyTarget -) - -func ContextWithMetricMetadataStore(ctx context.Context, s MetricMetadataStore) context.Context { - return context.WithValue(ctx, ctxKeyMetadata, s) -} - -func MetricMetadataStoreFromContext(ctx context.Context) (MetricMetadataStore, bool) { - s, ok := ctx.Value(ctxKeyMetadata).(MetricMetadataStore) - return s, ok -} - -func ContextWithTarget(ctx context.Context, t *Target) context.Context { - return context.WithValue(ctx, ctxKeyTarget, t) -} - -func TargetFromContext(ctx context.Context) (*Target, bool) { - t, ok := ctx.Value(ctxKeyTarget).(*Target) - return t, ok -} - -func pickSchema(bucketFactor float64) int32 { - if bucketFactor <= 1 { - bucketFactor = 1.00271 - } - floor := math.Floor(-math.Log2(math.Log2(bucketFactor))) - switch { - case floor >= float64(histogram.ExponentialSchemaMax): - return histogram.ExponentialSchemaMax - case floor <= float64(histogram.ExponentialSchemaMin): - return histogram.ExponentialSchemaMin - default: - return int32(floor) - } -} - -func newScrapeClient(cfg config_util.HTTPClientConfig, name string, optFuncs ...config_util.HTTPClientOption) (*http.Client, error) { - client, err := config_util.NewClientFromConfig(cfg, name, optFuncs...) - if err != nil { - return nil, fmt.Errorf("error creating HTTP client: %w", err) - } - client.Transport = otelhttp.NewTransport( - client.Transport, - otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { - return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) - })) - return client, nil -} diff --git a/scrape/scrape_append_v2_test.go b/scrape/scrape_append_v2_test.go index c2b2ae132c..e5f799a7fc 100644 --- a/scrape/scrape_append_v2_test.go +++ b/scrape/scrape_append_v2_test.go @@ -15,20 +15,14 @@ package scrape import ( "bytes" - "compress/gzip" "context" - "encoding/binary" "errors" "fmt" "io" - "maps" "math" "net/http" "net/http/httptest" "net/url" - "os" - "sort" - "strconv" "strings" "sync" "testing" @@ -36,22 +30,14 @@ import ( "time" "github.com/gogo/protobuf/proto" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" - "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/propagation" - sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.uber.org/atomic" - "go.uber.org/goleak" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -71,46 +57,30 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) -func TestMain(m *testing.M) { - testutil.TolerantVerifyLeak(m) -} - -func newTestRegistryAndScrapeMetrics(t testing.TB) (*prometheus.Registry, *scrapeMetrics) { - reg := prometheus.NewRegistry() - metrics, err := newScrapeMetrics(reg) - require.NoError(t, err) - return reg, metrics -} - -func newTestScrapeMetrics(t testing.TB) *scrapeMetrics { - _, metrics := newTestRegistryAndScrapeMetrics(t) - return metrics -} - -func TestNewScrapePool(t *testing.T) { +func TestNewScrapePool_AppendV2(t *testing.T) { var ( app = teststorage.NewAppendable() cfg = &config.ScrapeConfig{ MetricNameValidationScheme: model.UTF8Validation, MetricNameEscapingScheme: model.AllowUTF8, } - sp, err = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err = newScrapePool(cfg, nil, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) ) require.NoError(t, err) - a, ok := sp.appendable.(*teststorage.Appendable) + a, ok := sp.appendableV2.(*teststorage.Appendable) require.True(t, ok, "Failure to append.") - require.Equal(t, app, a, "Wrong sample appender.") + require.Equal(t, app, a, "Wrong sample AppenderV2.") require.Equal(t, cfg, sp.config, "Wrong scrape config.") } -func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { +func TestStorageHandlesOutOfOrderTimestamps_AppendV2(t *testing.T) { // Test with default OutOfOrderTimeWindow (0) t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - runScrapeLoopTest(t, s, false) + runScrapeLoopTestAppendV2(t, s, false) }) // Test with specific OutOfOrderTimeWindow (600000) @@ -118,12 +88,12 @@ func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { s := teststorage.New(t, 600000) t.Cleanup(func() { _ = s.Close() }) - runScrapeLoopTest(t, s, true) + runScrapeLoopTestAppendV2(t, s, true) }) } -func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { - sl, _ := newTestScrapeLoop(t, withAppendable(s)) +func runScrapeLoopTestAppendV2(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + sl, _ := newTestScrapeLoop(t, withAppendableV2(s)) // Current time for generating timestamps. now := time.Now() @@ -191,7 +161,7 @@ func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrde } // Regression test against https://github.com/prometheus/prometheus/issues/15831. -func TestScrapeAppend_MetadataUpdate(t *testing.T) { +func TestScrapeAppendV2_MetadataPassed(t *testing.T) { const ( scrape1 = `# TYPE test_metric counter # HELP test_metric some help text @@ -215,7 +185,7 @@ test_metric2{foo="bar"} 22 ) appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() @@ -228,12 +198,15 @@ test_metric2{foo="bar"} 22 }, appTest.ResultMetadata()) appTest.ResultReset() - // Next (the same) scrape should not new metadata entries. + // Next (the same) scrape should pass new metadata entries as per always-on metadata Appendable V2 contract. app = sl.appender() _, _, _, err = app.append([]byte(scrape1), "application/openmetrics-text", now.Add(15*time.Second)) require.NoError(t, err) require.NoError(t, app.Commit()) - require.Empty(t, appTest.ResultMetadata()) + testutil.RequireEqual(t, []sample{ + {L: labels.FromStrings("__name__", "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}}, + {L: labels.FromStrings("__name__", "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}}, + }, appTest.ResultMetadata()) appTest.ResultReset() app = sl.appender() @@ -247,9 +220,9 @@ test_metric2{foo="bar"} 22 appTest.ResultReset() } -func TestScrapeReportMetadata(t *testing.T) { +func TestScrapeReportMetadata_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) app := sl.appender() now := time.Now() @@ -264,7 +237,7 @@ func TestScrapeReportMetadata(t *testing.T) { }, appTest.ResultMetadata()) } -func TestIsSeriesPartOfFamily(t *testing.T) { +func TestIsSeriesPartOfFamily_AppendV2(t *testing.T) { t.Run("counter", func(t *testing.T) { require.True(t, isSeriesPartOfFamily("http_requests_total", []byte("http_requests_total"), model.MetricTypeCounter)) // Prometheus text style. require.True(t, isSeriesPartOfFamily("http_requests_total", []byte("http_requests"), model.MetricTypeCounter)) // OM text style. @@ -312,7 +285,7 @@ func TestIsSeriesPartOfFamily(t *testing.T) { }) } -func TestDroppedTargetsList(t *testing.T) { +func TestDroppedTargetsList_AppendV2(t *testing.T) { var ( app = teststorage.NewAppendable() cfg = &config.ScrapeConfig{ @@ -337,7 +310,7 @@ func TestDroppedTargetsList(t *testing.T) { }, }, } - sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, nil, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}" expectedLength = 2 ) @@ -355,461 +328,65 @@ func TestDroppedTargetsList(t *testing.T) { require.Equal(t, expectedLength, sp.droppedTargetsCount) } -// TestDiscoveredLabelsUpdate checks that DiscoveredLabels are updated -// even when new labels don't affect the target `hash`. -func TestDiscoveredLabelsUpdate(t *testing.T) { - sp := newTestScrapePool(t, nil) - - // These are used when syncing so need this to avoid a panic. - sp.config = &config.ScrapeConfig{ - ScrapeInterval: model.Duration(1), - ScrapeTimeout: model.Duration(1), - MetricNameValidationScheme: model.UTF8Validation, - MetricNameEscapingScheme: model.AllowUTF8, - } - sp.activeTargets = make(map[uint64]*Target) - t1 := &Target{ - tLabels: model.LabelSet{"label": "name"}, - scrapeConfig: sp.config, - } - sp.activeTargets[t1.hash()] = t1 - - t2 := &Target{ - tLabels: model.LabelSet{"labelNew": "nameNew"}, - scrapeConfig: sp.config, - } - sp.sync([]*Target{t2}) - - lb := labels.NewBuilder(labels.EmptyLabels()) - require.Equal(t, t2.DiscoveredLabels(lb), sp.activeTargets[t1.hash()].DiscoveredLabels(lb)) -} - -type testLoop struct { - startFunc func(interval, timeout time.Duration, errc chan<- error) - stopFunc func() - forcedErr error - forcedErrMtx sync.Mutex - runOnce bool - interval time.Duration - timeout time.Duration -} - -func (*testLoop) setScrapeFailureLogger(FailureLogger) { -} - -func (l *testLoop) run(errc chan<- error) { - if l.runOnce { - panic("loop must be started only once") - } - l.runOnce = true - l.startFunc(l.interval, l.timeout, errc) -} - -func (*testLoop) disableEndOfRunStalenessMarkers() { -} - -func (l *testLoop) setForcedError(err error) { - l.forcedErrMtx.Lock() - defer l.forcedErrMtx.Unlock() - l.forcedErr = err -} - -func (l *testLoop) getForcedError() error { - l.forcedErrMtx.Lock() - defer l.forcedErrMtx.Unlock() - return l.forcedErr -} - -func (l *testLoop) stop() { - l.stopFunc() -} - -func (*testLoop) getCache() *scrapeCache { - return nil -} - -func TestScrapePoolStop(t *testing.T) { - t.Parallel() - sp := newTestScrapePool(t, nil) - - var mtx sync.Mutex - stopped := map[uint64]bool{} - numTargets := 20 - - // Stopping the scrape pool must call stop() on all scrape loops, - // clean them and the respective targets up. It must wait until each loop's - // stop function returned before returning itself. - - for i := range numTargets { - t := &Target{ - labels: labels.FromStrings(model.AddressLabel, fmt.Sprintf("example.com:%d", i)), - scrapeConfig: &config.ScrapeConfig{}, - } - l := &testLoop{} - d := time.Duration((i+1)*20) * time.Millisecond - l.stopFunc = func() { - time.Sleep(d) - - mtx.Lock() - stopped[t.hash()] = true - mtx.Unlock() - } - - sp.activeTargets[t.hash()] = t - sp.loops[t.hash()] = l - } - - done := make(chan struct{}) - stopTime := time.Now() - - go func() { - sp.stop() - close(done) - }() - - select { - case <-time.After(5 * time.Second): - require.Fail(t, "scrapeLoop.stop() did not return as expected") - case <-done: - // This should have taken at least as long as the last target slept. - require.GreaterOrEqual(t, time.Since(stopTime), time.Duration(numTargets*20)*time.Millisecond, "scrapeLoop.stop() exited before all targets stopped") - } - - mtx.Lock() - require.Len(t, stopped, numTargets, "Unexpected number of stopped loops") - mtx.Unlock() - - require.Empty(t, sp.activeTargets, "Targets were not cleared on stopping: %d left", len(sp.activeTargets)) - require.Empty(t, sp.loops, "Loops were not cleared on stopping: %d left", len(sp.loops)) -} - -// TestScrapePoolReload tests reloading logic, so: -// * all loops are reloaded, reusing cache if scrape config changed. -// * reloaded loops are stopped before new ones are started. -// * new scrapeLoops are configured with the updated scrape config. -func TestScrapePoolReload(t *testing.T) { - t.Parallel() - - var ( - mtx sync.Mutex - numTargets = 20 - stopped = map[uint64]bool{} - ) - - cfg0 := &config.ScrapeConfig{} - cfg1 := &config.ScrapeConfig{ - ScrapeInterval: model.Duration(3 * time.Second), - ScrapeTimeout: model.Duration(2 * time.Second), - MetricNameValidationScheme: model.UTF8Validation, - MetricNameEscapingScheme: model.AllowUTF8, - - // Test a few example options. - SampleLimit: 123, - ScrapeFallbackProtocol: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", - } - newLoopCfg1 := func(opts scrapeLoopOptions) loop { - // Test cfg1 is being used. - require.Equal(t, cfg1, opts.sp.config) - - // Inject out testLoop that allows mocking start and stop. - l := &testLoop{interval: opts.interval, timeout: opts.timeout} - - // On start, expect previous loop instances for the same target to be stopped. - l.startFunc = func(interval, timeout time.Duration, _ chan<- error) { - // Ensure cfg1 interval and timeout are correctly configured. - require.Equal(t, time.Duration(cfg1.ScrapeInterval), interval, "Unexpected scrape interval") - require.Equal(t, time.Duration(cfg1.ScrapeTimeout), timeout, "Unexpected scrape timeout") - - mtx.Lock() - targetScraper := opts.scraper.(*targetScraper) - require.True(t, stopped[targetScraper.hash()], "Scrape loop for %v not stopped yet", targetScraper) - mtx.Unlock() - } - return l - } - - // Create test pool. - reg, metrics := newTestRegistryAndScrapeMetrics(t) - sp := newTestScrapePool(t, newLoopCfg1) - sp.metrics = metrics - - // Prefill pool with 20 loops, simulating 20 scrape targets. - for i := range numTargets { - t := &Target{ - labels: labels.FromStrings(model.AddressLabel, fmt.Sprintf("example.com:%d", i)), - scrapeConfig: cfg0, - } - l := &testLoop{} - d := time.Duration((i+1)*20) * time.Millisecond - l.stopFunc = func() { - time.Sleep(d) // Sleep uneven time on stop. - - mtx.Lock() - stopped[t.hash()] = true - mtx.Unlock() - } - - sp.activeTargets[t.hash()] = t - sp.loops[t.hash()] = l - } - - beforeTargets := map[uint64]*Target{} - maps.Copy(beforeTargets, sp.activeTargets) - - // Reloading a scrape pool with a new scrape configuration must stop all scrape - // loops and start new ones. A new loop must not be started before the preceding - // one terminated. - require.NoError(t, sp.reload(cfg1)) - var stoppedCount int - mtx.Lock() - stoppedCount = len(stopped) - mtx.Unlock() - require.Equal(t, numTargets, stoppedCount, "Unexpected number of stopped loops") - require.Equal(t, sp.activeTargets, beforeTargets, "Reloading affected target states unexpectedly") - require.Len(t, sp.loops, numTargets, "Unexpected number of loops after reload") - - // Check if prometheus_target_reload_length_seconds points to cfg1.ScrapeInterval. - got, err := gatherLabels(reg, "prometheus_target_reload_length_seconds") - require.NoError(t, err) - expectedName, expectedValue := "interval", cfg1.ScrapeInterval.String() - require.Equal(t, [][]*dto.LabelPair{{{Name: &expectedName, Value: &expectedValue}}}, got) - require.Equal(t, 1.0, prom_testutil.ToFloat64(sp.metrics.targetScrapePoolReloads)) -} - -func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { - reloadCfg := &config.ScrapeConfig{ - ScrapeInterval: model.Duration(3 * time.Second), - ScrapeTimeout: model.Duration(2 * time.Second), - MetricNameValidationScheme: model.UTF8Validation, - MetricNameEscapingScheme: model.AllowUTF8, - } - newLoop := func(opts scrapeLoopOptions) loop { - l := &testLoop{interval: opts.interval, timeout: opts.timeout} - l.startFunc = func(interval, timeout time.Duration, _ chan<- error) { - require.Equal(t, 5*time.Second, interval, "Unexpected scrape interval") - require.Equal(t, 3*time.Second, timeout, "Unexpected scrape timeout") - } - return l - } - reg, metrics := newTestRegistryAndScrapeMetrics(t) - sp := newTestScrapePool(t, newLoop) - sp.activeTargets[1] = &Target{ - labels: labels.FromStrings(model.ScrapeIntervalLabel, "5s", model.ScrapeTimeoutLabel, "3s"), - } - sp.metrics = metrics - sp.loops[1] = noopLoop() - - err := sp.reload(reloadCfg) - if err != nil { - t.Fatalf("unable to reload configuration: %s", err) - } - // Check that the reload metric is labeled with the pool interval, not the overridden interval. - got, err := gatherLabels(reg, "prometheus_target_reload_length_seconds") - require.NoError(t, err) - expectedName, expectedValue := "interval", "3s" - require.Equal(t, [][]*dto.LabelPair{{{Name: &expectedName, Value: &expectedValue}}}, got) -} - -// Gather metrics from the provided Gatherer with specified familyName, -// and return all sets of name/value pairs. -func gatherLabels(g prometheus.Gatherer, familyName string) ([][]*dto.LabelPair, error) { - families, err := g.Gather() - if err != nil { - return nil, err - } - ret := make([][]*dto.LabelPair, 0) - for _, f := range families { - if f.GetName() == familyName { - for _, m := range f.GetMetric() { - ret = append(ret, m.GetLabel()) - } - break - } - } - return ret, nil -} - -func TestScrapePoolTargetLimit(t *testing.T) { - var wg sync.WaitGroup - // On starting to run, new loops created on reload check whether their preceding - // equivalents have been stopped. - newLoop := func(scrapeLoopOptions) loop { - wg.Add(1) - l := &testLoop{ - startFunc: func(_, _ time.Duration, _ chan<- error) { - wg.Done() - }, - stopFunc: func() {}, - } - return l - } - - sp := newTestScrapePool(t, newLoop) - - var tgs []*targetgroup.Group - for i := range 50 { - tgs = append(tgs, - &targetgroup.Group{ - Targets: []model.LabelSet{ - {model.AddressLabel: model.LabelValue(fmt.Sprintf("127.0.0.1:%d", 9090+i))}, - }, - }, - ) - } - - var limit uint - reloadWithLimit := func(l uint) { - limit = l - require.NoError(t, sp.reload(&config.ScrapeConfig{ - ScrapeInterval: model.Duration(3 * time.Second), - ScrapeTimeout: model.Duration(2 * time.Second), - MetricNameValidationScheme: model.UTF8Validation, - MetricNameEscapingScheme: model.AllowUTF8, - TargetLimit: l, - })) - } - - var targets int - loadTargets := func(n int) { - targets = n - sp.Sync(tgs[:n]) - } - - validateIsRunning := func() { - wg.Wait() - for _, l := range sp.loops { - require.True(t, l.(*testLoop).runOnce, "loop should be running") - } - } - - validateErrorMessage := func(shouldErr bool) { - for _, l := range sp.loops { - lerr := l.(*testLoop).getForcedError() - if shouldErr { - require.Error(t, lerr, "error was expected for %d targets with a limit of %d", targets, limit) - require.EqualError(t, lerr, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit)) - } else { - require.NoError(t, lerr) - } - } - } - - reloadWithLimit(0) - loadTargets(50) - validateIsRunning() - - // Simulate an initial config with a limit. - sp.config.TargetLimit = 30 - limit = 30 - loadTargets(50) - validateIsRunning() - validateErrorMessage(true) - - reloadWithLimit(50) - validateIsRunning() - validateErrorMessage(false) - - reloadWithLimit(40) - validateIsRunning() - validateErrorMessage(true) - - loadTargets(30) - validateIsRunning() - validateErrorMessage(false) - - loadTargets(40) - validateIsRunning() - validateErrorMessage(false) - - loadTargets(41) - validateIsRunning() - validateErrorMessage(true) - - reloadWithLimit(0) - validateIsRunning() - validateErrorMessage(false) - - reloadWithLimit(51) - validateIsRunning() - validateErrorMessage(false) - - tgs = append(tgs, - &targetgroup.Group{ - Targets: []model.LabelSet{ - {model.AddressLabel: "127.0.0.1:1090"}, - }, - }, - &targetgroup.Group{ - Targets: []model.LabelSet{ - {model.AddressLabel: "127.0.0.1:1090"}, - }, - }, - ) - - sp.Sync(tgs) - validateIsRunning() - validateErrorMessage(false) -} - -func TestScrapePoolAppenderWithLimits(t *testing.T) { +func TestScrapePoolAppenderWithLimits_AppendV2(t *testing.T) { // Create a unique value, to validate the correct chain of appenders. - baseAppender := struct{ storage.Appender }{} - appendable := appendableFunc(func(context.Context) storage.Appender { return baseAppender }) + baseAppender := struct{ storage.AppenderV2 }{} + appendable := appendableV2Func(func(context.Context) storage.AppenderV2 { return baseAppender }) - sl, _ := newTestScrapeLoop(t, withAppendable(appendable)) - wrapped := appenderWithLimits(sl.appendable.Appender(context.Background()), 0, 0, histogram.ExponentialSchemaMax) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appendable)) + wrapped := appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), 0, 0, histogram.ExponentialSchemaMax) - tl, ok := wrapped.(*timeLimitAppender) - require.True(t, ok, "Expected timeLimitAppender but got %T", wrapped) + tl, ok := wrapped.(*timeLimitAppenderV2) + require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", wrapped) - require.Equal(t, baseAppender, tl.Appender, "Expected base appender but got %T", tl.Appender) + require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2) sampleLimit := 100 sl, _ = newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appendable + sl.appendableV2 = appendable sl.sampleLimit = sampleLimit }) - wrapped = appenderWithLimits(sl.appendable.Appender(context.Background()), sampleLimit, 0, histogram.ExponentialSchemaMax) + wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 0, histogram.ExponentialSchemaMax) - la, ok := wrapped.(*limitAppender) - require.True(t, ok, "Expected limitAppender but got %T", wrapped) + la, ok := wrapped.(*limitAppenderV2) + require.True(t, ok, "Expected limitAppenderV2 but got %T", wrapped) - tl, ok = la.Appender.(*timeLimitAppender) - require.True(t, ok, "Expected timeLimitAppender but got %T", la.Appender) + tl, ok = la.AppenderV2.(*timeLimitAppenderV2) + require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2) - require.Equal(t, baseAppender, tl.Appender, "Expected base appender but got %T", tl.Appender) + require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2) - wrapped = appenderWithLimits(sl.appendable.Appender(context.Background()), sampleLimit, 100, histogram.ExponentialSchemaMax) + wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 100, histogram.ExponentialSchemaMax) - bl, ok := wrapped.(*bucketLimitAppender) - require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped) + bl, ok := wrapped.(*bucketLimitAppenderV2) + require.True(t, ok, "Expected bucketLimitAppenderV2 but got %T", wrapped) - la, ok = bl.Appender.(*limitAppender) - require.True(t, ok, "Expected limitAppender but got %T", bl) + la, ok = bl.AppenderV2.(*limitAppenderV2) + require.True(t, ok, "Expected limitAppenderV2 but got %T", bl) - tl, ok = la.Appender.(*timeLimitAppender) - require.True(t, ok, "Expected timeLimitAppender but got %T", la.Appender) + tl, ok = la.AppenderV2.(*timeLimitAppenderV2) + require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2) - require.Equal(t, baseAppender, tl.Appender, "Expected base appender but got %T", tl.Appender) + require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2) - wrapped = appenderWithLimits(sl.appendable.Appender(context.Background()), sampleLimit, 100, 0) + wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 100, 0) - ml, ok := wrapped.(*maxSchemaAppender) - require.True(t, ok, "Expected maxSchemaAppender but got %T", wrapped) + ml, ok := wrapped.(*maxSchemaAppenderV2) + require.True(t, ok, "Expected maxSchemaAppenderV2 but got %T", wrapped) - bl, ok = ml.Appender.(*bucketLimitAppender) - require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped) + bl, ok = ml.AppenderV2.(*bucketLimitAppenderV2) + require.True(t, ok, "Expected bucketLimitAppenderV2 but got %T", wrapped) - la, ok = bl.Appender.(*limitAppender) - require.True(t, ok, "Expected limitAppender but got %T", bl) + la, ok = bl.AppenderV2.(*limitAppenderV2) + require.True(t, ok, "Expected limitAppenderV2 but got %T", bl) - tl, ok = la.Appender.(*timeLimitAppender) - require.True(t, ok, "Expected timeLimitAppender but got %T", la.Appender) + tl, ok = la.AppenderV2.(*timeLimitAppenderV2) + require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2) - require.Equal(t, baseAppender, tl.Appender, "Expected base appender but got %T", tl.Appender) + require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2) } -func TestScrapePoolRaces(t *testing.T) { +func TestScrapePoolRaces_AppendV2(t *testing.T) { t.Parallel() interval, _ := model.ParseDuration("1s") timeout, _ := model.ParseDuration("500ms") @@ -821,7 +398,7 @@ func TestScrapePoolRaces(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } } - sp, _ := newScrapePool(newConfig(), teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ := newScrapePool(newConfig(), nil, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) tgts := []*targetgroup.Group{ { Targets: []model.LabelSet{ @@ -852,7 +429,7 @@ func TestScrapePoolRaces(t *testing.T) { sp.stop() } -func TestScrapePoolScrapeLoopsStarted(t *testing.T) { +func TestScrapePoolScrapeLoopsStarted_AppendV2(t *testing.T) { var wg sync.WaitGroup newLoop := func(scrapeLoopOptions) loop { wg.Add(1) @@ -865,6 +442,9 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) { return l } sp := newTestScrapePool(t, newLoop) + // Force it to use V2 for this single test purpose. + sp.appendable = nil + sp.appendableV2 = teststorage.NewAppendable() tgs := []*targetgroup.Group{ { @@ -895,7 +475,7 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) { } } -func TestScrapeLoopStopBeforeRun(t *testing.T) { +func TestScrapeLoopStopBeforeRun_AppendV2(t *testing.T) { t.Parallel() sl, scraper := newTestScrapeLoop(t) @@ -943,14 +523,12 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) { } } -func nopMutator(l labels.Labels) labels.Labels { return l } - -func TestScrapeLoopStop(t *testing.T) { +func TestScrapeLoopStop_AppendV2(t *testing.T) { signal := make(chan struct{}, 1) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" }) @@ -999,157 +577,8 @@ func TestScrapeLoopStop(t *testing.T) { } } -func TestScrapeLoopRun(t *testing.T) { - t.Parallel() - var ( - signal = make(chan struct{}, 1) - errc = make(chan error) - ) - - ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) - // The loop must terminate during the initial offset if the context - // is canceled. - scraper.offsetDur = time.Hour - - go func() { - sl.run(errc) - signal <- struct{}{} - }() - - // Wait to make sure we are actually waiting on the offset. - time.Sleep(1 * time.Second) - - cancel() - select { - case <-signal: - case <-time.After(5 * time.Second): - require.FailNow(t, "Cancellation during initial offset failed.") - case err := <-errc: - require.FailNow(t, "Unexpected error", "err: %s", err) - } - - ctx, cancel = context.WithCancel(t.Context()) - sl, scraper = newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.ctx = ctx - sl.timeout = 100 * time.Millisecond - }) - // The provided timeout must cause cancellation of the context passed down to the - // scraper. The scraper has to respect the context. - scraper.offsetDur = 0 - - blockCtx, blockCancel := context.WithCancel(t.Context()) - scraper.scrapeFunc = func(ctx context.Context, _ io.Writer) error { - select { - case <-blockCtx.Done(): - cancel() - case <-ctx.Done(): - return ctx.Err() - } - return nil - } - - go func() { - sl.run(errc) - signal <- struct{}{} - }() - - select { - case err := <-errc: - require.ErrorIs(t, err, context.DeadlineExceeded) - case <-time.After(3 * time.Second): - require.FailNow(t, "Expected timeout error but got none.") - } - - // We already caught the timeout error and are certainly in the loop. - // Let the scrapes returns immediately to cause no further timeout errors - // and check whether canceling the parent context terminates the loop. - blockCancel() - select { - case <-signal: - // Loop terminated as expected. - case err := <-errc: - require.FailNow(t, "Unexpected error", "err: %s", err) - case <-time.After(3 * time.Second): - require.FailNow(t, "Loop did not terminate on context cancellation") - } -} - -func TestScrapeLoopForcedErr(t *testing.T) { - var ( - signal = make(chan struct{}, 1) - errc = make(chan error) - ) - - ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) - - forcedErr := errors.New("forced err") - sl.setForcedError(forcedErr) - - scraper.scrapeFunc = func(context.Context, io.Writer) error { - require.FailNow(t, "Should not be scraped.") - return nil - } - - go func() { - sl.run(errc) - signal <- struct{}{} - }() - - select { - case err := <-errc: - require.ErrorIs(t, err, forcedErr) - case <-time.After(3 * time.Second): - require.FailNow(t, "Expected forced error but got none.") - } - cancel() - - select { - case <-signal: - case <-time.After(5 * time.Second): - require.FailNow(t, "Scrape not stopped.") - } -} - -func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) { - // Regression test for issue #17553 - defer goleak.VerifyNone(t) - - var ( - signal = make(chan struct{}) - errc = make(chan error) - ) - - ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) - - forcedErr := errors.New("forced err") - sl.setForcedError(forcedErr) - - scraper.scrapeFunc = func(context.Context, io.Writer) error { - return nil - } - - go func() { - sl.run(errc) - close(signal) - }() - - time.Sleep(50 * time.Millisecond) - - cancel() - - select { - case <-signal: - // success case - case <-time.After(3 * time.Second): - require.FailNow(t, "Scrape loop failed to exit on context cancellation (goroutine leak detected)") - } -} - -func TestScrapeLoopMetadata(t *testing.T) { - sl, _ := newTestScrapeLoop(t) +func TestScrapeLoopMetadata_AppendV2(t *testing.T) { + sl, _ := newTestScrapeLoop(t, withAppendableV2(teststorage.NewAppendable())) app := sl.appender() total, _, _, err := app.append([]byte(`# TYPE test_metric counter @@ -1182,8 +611,8 @@ test_metric_total 1 require.Empty(t, md.Unit) } -func TestScrapeLoopSeriesAdded(t *testing.T) { - sl, _ := newTestScrapeLoop(t) +func TestScrapeLoopSeriesAdded_AppendV2(t *testing.T) { + sl, _ := newTestScrapeLoop(t, withAppendableV2(teststorage.NewAppendable())) app := sl.appender() total, added, seriesAdded, err := app.append([]byte("test_metric 1\n"), "text/plain", time.Time{}) @@ -1202,7 +631,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { require.Equal(t, 0, seriesAdded) } -func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { +func TestScrapeLoopFailWithInvalidLabelsAfterRelabel_AppendV2(t *testing.T) { target := &Target{ labels: labels.FromStrings("pod_label_invalid_012\xff", "test"), } @@ -1214,6 +643,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { NameValidationScheme: model.UTF8Validation, }} sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, target, true, relabelConfig) } @@ -1228,8 +658,9 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { require.Equal(t, 0, seriesAdded) } -func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { +func TestScrapeLoopFailLegacyUnderUTF8_AppendV2(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() sl.validationScheme = model.LegacyValidation }) @@ -1243,6 +674,7 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { // When scrapeloop has validation set to UTF-8, the metric is allowed. sl, _ = newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() sl.validationScheme = model.UTF8Validation }) @@ -1254,84 +686,6 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { require.Equal(t, 1, seriesAdded) } -func readTextParseTestMetrics(t testing.TB) []byte { - t.Helper() - - b, err := os.ReadFile("../model/textparse/testdata/alltypes.237mfs.prom.txt") - if err != nil { - t.Fatal(err) - } - return b -} - -func makeTestGauges(n int) []byte { - sb := bytes.Buffer{} - sb.WriteString("# TYPE metric_a gauge\n") - sb.WriteString("# HELP metric_a help text\n") - for i := range n { - _, _ = fmt.Fprintf(&sb, "metric_a{foo=\"%d\",bar=\"%d\"} 1\n", i, i*100) - } - sb.WriteString("# EOF\n") - return sb.Bytes() -} - -func promTextToProto(tb testing.TB, text []byte) []byte { - tb.Helper() - - p := expfmt.NewTextParser(model.UTF8Validation) - fams, err := p.TextToMetricFamilies(bytes.NewReader(text)) - if err != nil { - tb.Fatal(err) - } - // Order by name for the deterministic tests. - var names []string - for n := range fams { - names = append(names, n) - } - sort.Strings(names) - - buf := bytes.Buffer{} - for _, n := range names { - o, err := proto.Marshal(fams[n]) - if err != nil { - tb.Fatal(err) - } - - // Write first length, then binary protobuf. - varintBuf := binary.AppendUvarint(nil, uint64(len(o))) - buf.Write(varintBuf) - buf.Write(o) - } - return buf.Bytes() -} - -func TestPromTextToProto(t *testing.T) { - metricsText := readTextParseTestMetrics(t) - // TODO(bwplotka): Windows adds \r for new lines which is - // not handled correctly in the expfmt parser, fix it. - metricsText = bytes.ReplaceAll(metricsText, []byte("\r"), nil) - - metricsProto := promTextToProto(t, metricsText) - d := expfmt.NewDecoder(bytes.NewReader(metricsProto), expfmt.NewFormat(expfmt.TypeProtoDelim)) - - var got []string - for { - mf := &dto.MetricFamily{} - if err := d.Decode(mf); err != nil { - if errors.Is(err, io.EOF) { - break - } - t.Fatal(err) - } - got = append(got, mf.GetName()) - } - require.Len(t, got, 237) - // Check a few to see if those are not dups. - require.Equal(t, "go_gc_cycles_automatic_gc_cycles_total", got[0]) - require.Equal(t, "prometheus_sd_kuma_fetch_duration_seconds", got[128]) - require.Equal(t, "promhttp_metric_handler_requests_total", got[236]) -} - // BenchmarkScrapeLoopAppend benchmarks a core append function in a scrapeLoop // that creates a new parser and goes through a byte slice from a single scrape. // Benchmark compares append function run across 2 dimensions: @@ -1347,7 +701,7 @@ func TestPromTextToProto(t *testing.T) { -benchtime 5s -count 6 -cpu 2 -timeout 999m \ | tee ${bench}.txt */ -func BenchmarkScrapeLoopAppend(b *testing.B) { +func BenchmarkScrapeLoopAppend_AppendV2(b *testing.B) { for _, data := range []struct { name string parsableText []byte @@ -1372,7 +726,7 @@ func BenchmarkScrapeLoopAppend(b *testing.B) { s := teststorage.New(b) b.Cleanup(func() { _ = s.Close() }) - sl, _ := newTestScrapeLoop(b, withAppendable(s)) + sl, _ := newTestScrapeLoop(b, withAppendableV2(s)) app := sl.appender() ts := time.Time{} @@ -1391,14 +745,14 @@ func BenchmarkScrapeLoopAppend(b *testing.B) { } } -func TestScrapeLoopScrapeAndReport(t *testing.T) { +func TestScrapeLoopScrapeAndReport_AppendV2(t *testing.T) { parsableText := readTextParseTestMetrics(t) // On windows \r is added when reading, but parsers do not support this. Kill it. parsableText = bytes.ReplaceAll(parsableText, []byte("\r"), nil) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.fallbackScrapeProtocol = "application/openmetrics-text" }) scraper.scrapeFunc = func(_ context.Context, writer io.Writer) error { @@ -1422,14 +776,14 @@ func TestScrapeLoopScrapeAndReport(t *testing.T) { -benchtime 5s -count 6 -cpu 2 -timeout 999m \ | tee ${bench}.txt */ -func BenchmarkScrapeLoopScrapeAndReport(b *testing.B) { +func BenchmarkScrapeLoopScrapeAndReport_AppendV2(b *testing.B) { parsableText := readTextParseTestMetrics(b) s := teststorage.New(b) b.Cleanup(func() { _ = s.Close() }) sl, scraper := newTestScrapeLoop(b, func(sl *scrapeLoop) { - sl.appendable = s + sl.appendableV2 = s sl.fallbackScrapeProtocol = "application/openmetrics-text" }) scraper.scrapeFunc = func(_ context.Context, writer io.Writer) error { @@ -1448,7 +802,7 @@ func BenchmarkScrapeLoopScrapeAndReport(b *testing.B) { } } -func TestSetOptionsHandlingStaleness(t *testing.T) { +func TestSetOptionsHandlingStaleness_AppendV2(t *testing.T) { s := teststorage.New(t, 600000) t.Cleanup(func() { _ = s.Close() }) @@ -1460,7 +814,7 @@ func TestSetOptionsHandlingStaleness(t *testing.T) { runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = s + sl.appendableV2 = s }) numScrapes := 0 @@ -1527,14 +881,14 @@ func TestSetOptionsHandlingStaleness(t *testing.T) { require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") } -func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { +func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape_AppendV2(t *testing.T) { signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" }) @@ -1575,14 +929,14 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(got[6].V)) } -func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { +func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure_AppendV2(t *testing.T) { signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" }) @@ -1629,14 +983,14 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { // If we have a target with sample_limit set and scrape initially works, but then we hit the sample_limit error, // then we don't expect to see any StaleNaNs appended for the series that disappeared due to sample_limit error. -func TestScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t *testing.T) { +func TestScrapeLoopRunCreatesStaleMarkersOnSampleLimit_AppendV2(t *testing.T) { signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" sl.sampleLimit = 4 @@ -1699,18 +1053,18 @@ func TestScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t *testing.T) { } } -func TestScrapeLoopCache(t *testing.T) { +func TestScrapeLoopCache_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) - appTest := teststorage.NewAppendable().Then(s) + appTest := teststorage.NewAppendable().ThenV2(s) sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx sl.l = promslog.New(&promslog.Config{}) - sl.appendable = appTest + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" // Decreasing the scrape interval could make the test fail, as multiple scrapes might be initiated at identical millisecond timestamps. @@ -1764,7 +1118,7 @@ func TestScrapeLoopCache(t *testing.T) { require.Len(t, appTest.ResultSamples(), 26, "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { +func TestScrapeLoopCacheMemoryExhaustionProtection_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -1772,6 +1126,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable().ThenV2(s) sl.ctx = ctx }) numScrapes := 0 @@ -1803,7 +1158,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { require.LessOrEqual(t, len(sl.cache.series), 2000, "More than 2000 series cached.") } -func TestScrapeLoopAppend(t *testing.T) { +func TestScrapeLoopAppend_AppendV2(t *testing.T) { tests := []struct { title string honorLabels bool @@ -1863,7 +1218,7 @@ func TestScrapeLoopAppend(t *testing.T) { appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, test.honorLabels, nil) } @@ -1892,22 +1247,7 @@ func TestScrapeLoopAppend(t *testing.T) { } } -func requireEqual(t *testing.T, expected, actual any, msgAndArgs ...any) { - t.Helper() - testutil.RequireEqualWithOptions(t, expected, actual, - []cmp.Option{ - cmp.Comparer(func(a, b sample) bool { return a.Equals(b) }), - // StaleNaN samples are generated by iterating over a map, which means that the order - // of samples might be different on every test run. Sort series by label to avoid - // test failures because of that. - cmpopts.SortSlices(func(a, b sample) int { - return labels.Compare(a.L, b.L) - }), - }, - msgAndArgs...) -} - -func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { +func TestScrapeLoopAppendForConflictingPrefixedLabels_AppendV2(t *testing.T) { testcases := map[string]struct { targetLabels []string exposedLabels string @@ -1961,7 +1301,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { t.Run(name, func(t *testing.T) { appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil) } @@ -1984,9 +1324,9 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { } } -func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { +func TestScrapeLoopAppendCacheEntryButErrNotFound_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) fakeRef := storage.SeriesRef(1) expValue := float64(1) @@ -2021,16 +1361,16 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { require.Equal(t, expected, appTest.ResultSamples()) } -type appendableFunc func(ctx context.Context) storage.Appender +type appendableV2Func func(ctx context.Context) storage.AppenderV2 -func (a appendableFunc) Appender(ctx context.Context) storage.Appender { return a(ctx) } +func (a appendableV2Func) AppenderV2(ctx context.Context) storage.AppenderV2 { return a(ctx) } -func TestScrapeLoopAppendSampleLimit(t *testing.T) { +func TestScrapeLoopAppendSampleLimit_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender { + sl.appendableV2 = appendableV2Func(func(ctx context.Context) storage.AppenderV2 { // Chain appTest to verify what samples passed through. - return &limitAppender{Appender: appTest.Appender(ctx), limit: 1} + return &limitAppenderV2{AppenderV2: appTest.AppenderV2(ctx), limit: 1} }) sl.sampleMutator = func(l labels.Labels) labels.Labels { if l.Has("deleteme") { @@ -2038,7 +1378,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { } return l } - sl.sampleLimit = 1 // Same as limitAppender.limit + sl.sampleLimit = 1 // Same as limitAppenderV2.limit }) // Get the value of the Counter before performing append. @@ -2087,10 +1427,10 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { require.Equal(t, 1, seriesAdded) } -func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { +func TestScrapeLoop_HistogramBucketLimit_AppendV2(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender { - return &bucketLimitAppender{Appender: teststorage.NewAppendable().Appender(ctx), limit: 2} + sl.appendableV2 = appendableV2Func(func(ctx context.Context) storage.AppenderV2 { + return &bucketLimitAppenderV2{AppenderV2: teststorage.NewAppendable().AppenderV2(ctx), limit: 2} }) sl.enableNativeHistogramScraping = true sl.sampleMutator = func(l labels.Labels) labels.Labels { @@ -2196,12 +1536,12 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { require.Equal(t, beforeMetricValue+1, metricValue) } -func TestScrapeLoop_ChangingMetricString(t *testing.T) { +func TestScrapeLoop_ChangingMetricString_AppendV2(t *testing.T) { // This is a regression test for the scrape loop cache not properly maintaining // IDs when the string representation of a metric changes across a scrape. Thus, - // we use a real storage appender here. + // we use a real storage AppenderV2 here. appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() @@ -2229,8 +1569,9 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) { +func TestScrapeLoopAppendFailsWithNoContentType_AppendV2(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() // Explicitly setting the lack of fallback protocol here to make it obvious. sl.fallbackScrapeProtocol = "" }) @@ -2243,8 +1584,9 @@ func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) { } // TestScrapeLoopAppendEmptyWithNoContentType ensures we there are no errors when we get a blank scrape or just want to append a stale marker. -func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) { +func TestScrapeLoopAppendEmptyWithNoContentType_AppendV2(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() // Explicitly setting the lack of fallback protocol here to make it obvious. sl.fallbackScrapeProtocol = "" }) @@ -2256,9 +1598,9 @@ func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) { require.NoError(t, app.Commit()) } -func TestScrapeLoopAppendStaleness(t *testing.T) { +func TestScrapeLoopAppendStaleness_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() @@ -2286,9 +1628,9 @@ func TestScrapeLoopAppendStaleness(t *testing.T) { requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { +func TestScrapeLoopAppendNoStalenessIfTimestamp_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() _, _, _, err := app.append([]byte("metric_a 1 1000\n"), "text/plain", now) @@ -2310,10 +1652,10 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { +func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.trackTimestampsStaleness = true }) @@ -2343,7 +1685,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopAppendExemplar(t *testing.T) { +func TestScrapeLoopAppendExemplar_AppendV2(t *testing.T) { tests := []struct { title string alwaysScrapeClassicHist bool @@ -2494,8 +1836,9 @@ metric: < `, contentType: "application/vnd.google.protobuf", samples: []sample{{ - T: 1234568, - L: labels.FromStrings("__name__", "test_histogram"), + MF: "test_histogram", + T: 1234568, + L: labels.FromStrings("__name__", "test_histogram"), H: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2612,25 +1955,29 @@ metric: < alwaysScrapeClassicHist: true, contentType: "application/vnd.google.protobuf", samples: []sample{ - {L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175}, - {L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094}, - {L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2}, { - L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4, + MF: "test_histogram", + L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4, ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}}, }, { - L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16, + MF: "test_histogram", + L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16, ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}}, }, { - L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32, + MF: "test_histogram", + L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32, ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}}, }, - {L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175}, { - T: 1234568, - L: labels.FromStrings("__name__", "test_histogram"), + MF: "test_histogram", + T: 1234568, + L: labels.FromStrings("__name__", "test_histogram"), H: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2731,8 +2078,9 @@ metric: < `, samples: []sample{{ - T: 1234568, - L: labels.FromStrings("__name__", "test_histogram"), + MF: "test_histogram", + T: 1234568, + L: labels.FromStrings("__name__", "test_histogram"), H: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2831,9 +2179,9 @@ metric: < `, samples: []sample{ - {L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175}, - {L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094}, - {L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094}, + {MF: "test_histogram", L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175}, }, }, } @@ -2846,7 +2194,7 @@ metric: < appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.enableNativeHistogramScraping = test.enableNativeHistogramsIngestion sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) @@ -2892,27 +2240,7 @@ metric: < } } -func textToProto(text string, buf *bytes.Buffer) error { - // In case of protobuf, we have to create the binary representation. - pb := &dto.MetricFamily{} - // From text to proto message. - err := proto.UnmarshalText(text, pb) - if err != nil { - return err - } - // From proto message to binary protobuf. - protoBuf, err := proto.Marshal(pb) - if err != nil { - return err - } - // Write first length, then binary protobuf. - varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) - buf.Write(varintBuf) - buf.Write(protoBuf) - return nil -} - -func TestScrapeLoopAppendExemplarSeries(t *testing.T) { +func TestScrapeLoopAppendExemplarSeries_AppendV2(t *testing.T) { scrapeText := []string{`metric_total{n="1"} 1 # {t="1"} 1.0 10000 # EOF`, `metric_total{n="1"} 2 # {t="2"} 2.0 20000 # EOF`} @@ -2935,7 +2263,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) { appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -2963,12 +2291,12 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) { requireEqual(t, samples, appTest.ResultSamples()) } -func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) { +func TestScrapeLoopRunReportsTargetDownOnScrapeError_AppendV2(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest + sl.appendableV2 = appTest }) scraper.scrapeFunc = func(context.Context, io.Writer) error { cancel() @@ -2979,12 +2307,12 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) { require.Equal(t, 0.0, appTest.ResultSamples()[0].V, "bad 'up' value") } -func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) { +func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8_AppendV2(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest + sl.appendableV2 = appTest }) scraper.scrapeFunc = func(_ context.Context, w io.Writer) error { cancel() @@ -2996,7 +2324,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) { require.Equal(t, 0.0, appTest.ResultSamples()[0].V, "bad 'up' value") } -func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T) { +func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds_AppendV2(t *testing.T) { appTest := teststorage.NewAppendable().WithErrs( func(ls labels.Labels) error { switch ls.Get(model.MetricNameLabel) { @@ -3010,7 +2338,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T return nil } }, nil, nil) - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Unix(1, 0) app := sl.appender() @@ -3031,12 +2359,12 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T require.Equal(t, 1, seriesAdded) } -func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { - sl, _ := newTestScrapeLoop(t, withAppendable( - appendableFunc(func(ctx context.Context) storage.Appender { - return &timeLimitAppender{ - Appender: teststorage.NewAppendable().Appender(ctx), - maxTime: timestamp.FromTime(time.Now().Add(10 * time.Minute)), +func TestScrapeLoopOutOfBoundsTimeError_AppendV2(t *testing.T) { + sl, _ := newTestScrapeLoop(t, withAppendableV2( + appendableV2Func(func(ctx context.Context) storage.AppenderV2 { + return &timeLimitAppenderV2{ + AppenderV2: teststorage.NewAppendable().AppenderV2(ctx), + maxTime: timestamp.FromTime(time.Now().Add(10 * time.Minute)), } }), )) @@ -3051,421 +2379,12 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { require.Equal(t, 0, seriesAdded) } -func TestAcceptHeader(t *testing.T) { - tests := []struct { - name string - scrapeProtocols []config.ScrapeProtocol - scheme model.EscapingScheme - expectedHeader string - }{ - { - name: "default scrape protocols with underscore escaping", - scrapeProtocols: config.DefaultScrapeProtocols, - scheme: model.UnderscoreEscaping, - expectedHeader: "application/openmetrics-text;version=1.0.0;escaping=underscores;q=0.6,application/openmetrics-text;version=0.0.1;q=0.5,text/plain;version=1.0.0;escaping=underscores;q=0.4,text/plain;version=0.0.4;q=0.3,*/*;q=0.2", - }, - { - name: "default proto first scrape protocols with underscore escaping", - scrapeProtocols: config.DefaultProtoFirstScrapeProtocols, - scheme: model.DotsEscaping, - expectedHeader: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.6,application/openmetrics-text;version=1.0.0;escaping=dots;q=0.5,application/openmetrics-text;version=0.0.1;q=0.4,text/plain;version=1.0.0;escaping=dots;q=0.3,text/plain;version=0.0.4;q=0.2,*/*;q=0.1", - }, - { - name: "default scrape protocols with no escaping", - scrapeProtocols: config.DefaultScrapeProtocols, - scheme: model.NoEscaping, - expectedHeader: "application/openmetrics-text;version=1.0.0;escaping=allow-utf-8;q=0.6,application/openmetrics-text;version=0.0.1;q=0.5,text/plain;version=1.0.0;escaping=allow-utf-8;q=0.4,text/plain;version=0.0.4;q=0.3,*/*;q=0.2", - }, - { - name: "default proto first scrape protocols with no escaping", - scrapeProtocols: config.DefaultProtoFirstScrapeProtocols, - scheme: model.NoEscaping, - expectedHeader: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.6,application/openmetrics-text;version=1.0.0;escaping=allow-utf-8;q=0.5,application/openmetrics-text;version=0.0.1;q=0.4,text/plain;version=1.0.0;escaping=allow-utf-8;q=0.3,text/plain;version=0.0.4;q=0.2,*/*;q=0.1", - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - header := acceptHeader(tc.scrapeProtocols, tc.scheme) - require.Equal(t, tc.expectedHeader, header) - }) - } -} - -// setupTracing temporarily sets the global TracerProvider and Propagator -// and restores the original state after the test completes. -func setupTracing(t *testing.T) { - t.Helper() - - origTracerProvider := otel.GetTracerProvider() - origPropagator := otel.GetTextMapPropagator() - - tp := sdktrace.NewTracerProvider(sdktrace.WithSampler(sdktrace.AlwaysSample())) - otel.SetTracerProvider(tp) - otel.SetTextMapPropagator(propagation.TraceContext{}) - - t.Cleanup(func() { - otel.SetTracerProvider(origTracerProvider) - otel.SetTextMapPropagator(origPropagator) - }) -} - -// TestRequestTraceparentHeader verifies that the HTTP client used by the target scraper -// propagates the OpenTelemetry "traceparent" header correctly. -func TestRequestTraceparentHeader(t *testing.T) { - setupTracing(t) - - server := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) { - // the traceparent header is sent. - require.NotEmpty(t, r.Header.Get("traceparent")) - })) - defer server.Close() - serverURL, err := url.Parse(server.URL) - require.NoError(t, err) - - client, err := newScrapeClient(config_util.DefaultHTTPClientConfig, "test") - require.NoError(t, err) - - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{}, - }, - client: client, - } - - resp, err := ts.scrape(context.Background()) - require.NoError(t, err) - require.NotNil(t, resp) - t.Cleanup(func() { _ = resp.Body.Close() }) -} - -func TestTargetScraperScrapeOK(t *testing.T) { - const ( - configTimeout = 1500 * time.Millisecond - expectedTimeout = "1.5" - ) - - var ( - protobufParsing bool - allowUTF8 bool - qValuePattern = regexp.MustCompile(`q=([0-9]+(\.\d+)?)`) - ) - - server := httptest.NewServer( - http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - accept := r.Header.Get("Accept") - if allowUTF8 { - require.Containsf(t, accept, "escaping=allow-utf-8", "Expected Accept header to allow utf8, got %q", accept) - } else { - require.NotContainsf(t, accept, "escaping=allow-utf-8", "Expected Accept header to not allow utf8, got %q", accept) - } - if protobufParsing { - require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"), - "Expected Accept header to prefer application/vnd.google.protobuf.") - } - - contentTypes := strings.SplitSeq(accept, ",") - for st := range contentTypes { - match := qValuePattern.FindStringSubmatch(st) - require.Len(t, match, 3) - qValue, err := strconv.ParseFloat(match[1], 64) - require.NoError(t, err, "Error parsing q value") - require.GreaterOrEqual(t, qValue, float64(0)) - require.LessOrEqual(t, qValue, float64(1)) - require.LessOrEqual(t, len(strings.Split(match[1], ".")[1]), 3, "q value should have at most 3 decimal places") - } - - timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") - require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") - - if allowUTF8 { - w.Header().Set("Content-Type", `text/plain; version=1.0.0; escaping=allow-utf-8`) - } else { - w.Header().Set("Content-Type", `text/plain; version=0.0.4`) - } - _, _ = w.Write([]byte("metric_a 1\nmetric_b 2\n")) - }), - ) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - if err != nil { - panic(err) - } - - runTest := func(t *testing.T, acceptHeader string) { - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{}, - }, - client: http.DefaultClient, - timeout: configTimeout, - acceptHeader: acceptHeader, - } - var buf bytes.Buffer - - resp, err := ts.scrape(context.Background()) - require.NoError(t, err) - contentType, err := ts.readResponse(context.Background(), resp, &buf) - require.NoError(t, err) - if allowUTF8 { - require.Equal(t, "text/plain; version=1.0.0; escaping=allow-utf-8", contentType) - } else { - require.Equal(t, "text/plain; version=0.0.4", contentType) - } - require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String()) - } - - for _, tc := range []struct { - scrapeProtocols []config.ScrapeProtocol - scheme model.EscapingScheme - protobufParsing bool - allowUTF8 bool - }{ - { - scrapeProtocols: config.DefaultScrapeProtocols, - scheme: model.UnderscoreEscaping, - protobufParsing: false, - allowUTF8: false, - }, - { - scrapeProtocols: config.DefaultProtoFirstScrapeProtocols, - scheme: model.UnderscoreEscaping, - protobufParsing: true, - allowUTF8: false, - }, - { - scrapeProtocols: config.DefaultScrapeProtocols, - scheme: model.NoEscaping, - protobufParsing: false, - allowUTF8: true, - }, - { - scrapeProtocols: config.DefaultProtoFirstScrapeProtocols, - scheme: model.NoEscaping, - protobufParsing: true, - allowUTF8: true, - }, - } { - t.Run(fmt.Sprintf("%+v", tc), func(t *testing.T) { - protobufParsing = tc.protobufParsing - allowUTF8 = tc.allowUTF8 - runTest(t, acceptHeader(tc.scrapeProtocols, tc.scheme)) - }) - } -} - -func TestTargetScrapeScrapeCancel(t *testing.T) { - block := make(chan struct{}) - - server := httptest.NewServer( - http.HandlerFunc(func(http.ResponseWriter, *http.Request) { - <-block - }), - ) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - if err != nil { - panic(err) - } - - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{}, - }, - client: http.DefaultClient, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.UnderscoreEscaping), - } - ctx, cancel := context.WithCancel(context.Background()) - - errc := make(chan error, 1) - - go func() { - time.Sleep(1 * time.Second) - cancel() - }() - - go func() { - _, err := ts.scrape(ctx) - switch { - case err == nil: - errc <- errors.New("expected error but got nil") - case !errors.Is(ctx.Err(), context.Canceled): - errc <- fmt.Errorf("expected context cancellation error but got: %w", ctx.Err()) - default: - close(errc) - } - }() - - select { - case <-time.After(5 * time.Second): - require.FailNow(t, "Scrape function did not return unexpectedly.") - case err := <-errc: - require.NoError(t, err) - } - // If this is closed in a defer above the function the test server - // doesn't terminate and the test doesn't complete. - close(block) -} - -func TestTargetScrapeScrapeNotFound(t *testing.T) { - server := httptest.NewServer( - http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusNotFound) - }), - ) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - if err != nil { - panic(err) - } - - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{}, - }, - client: http.DefaultClient, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.UnderscoreEscaping), - } - - resp, err := ts.scrape(context.Background()) - require.NoError(t, err) - _, err = ts.readResponse(context.Background(), resp, io.Discard) - require.ErrorContains(t, err, "404", "Expected \"404 NotFound\" error but got: %s", err) -} - -func TestTargetScraperBodySizeLimit(t *testing.T) { - const ( - bodySizeLimit = 15 - responseBody = "metric_a 1\nmetric_b 2\n" - ) - var gzipResponse bool - server := httptest.NewServer( - http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("Content-Type", `text/plain; version=0.0.4`) - if gzipResponse { - w.Header().Set("Content-Encoding", "gzip") - gw := gzip.NewWriter(w) - defer func() { _ = gw.Close() }() - _, _ = gw.Write([]byte(responseBody)) - return - } - _, _ = w.Write([]byte(responseBody)) - }), - ) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - if err != nil { - panic(err) - } - - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{}, - }, - client: http.DefaultClient, - bodySizeLimit: bodySizeLimit, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.UnderscoreEscaping), - metrics: newTestScrapeMetrics(t), - } - var buf bytes.Buffer - - // Target response uncompressed body, scrape with body size limit. - resp, err := ts.scrape(context.Background()) - require.NoError(t, err) - _, err = ts.readResponse(context.Background(), resp, &buf) - require.ErrorIs(t, err, errBodySizeLimit) - require.Equal(t, bodySizeLimit, buf.Len()) - // Target response gzip compressed body, scrape with body size limit. - gzipResponse = true - buf.Reset() - resp, err = ts.scrape(context.Background()) - require.NoError(t, err) - _, err = ts.readResponse(context.Background(), resp, &buf) - require.ErrorIs(t, err, errBodySizeLimit) - require.Equal(t, bodySizeLimit, buf.Len()) - // Target response uncompressed body, scrape without body size limit. - gzipResponse = false - buf.Reset() - ts.bodySizeLimit = 0 - resp, err = ts.scrape(context.Background()) - require.NoError(t, err) - _, err = ts.readResponse(context.Background(), resp, &buf) - require.NoError(t, err) - require.Len(t, responseBody, buf.Len()) - // Target response gzip compressed body, scrape without body size limit. - gzipResponse = true - buf.Reset() - resp, err = ts.scrape(context.Background()) - require.NoError(t, err) - _, err = ts.readResponse(context.Background(), resp, &buf) - require.NoError(t, err) - require.Len(t, responseBody, buf.Len()) -} - -// testScraper implements the scraper interface and allows setting values -// returned by its methods. It also allows setting a custom scrape function. -type testScraper struct { - offsetDur time.Duration - - lastStart time.Time - lastDuration time.Duration - lastError error - - scrapeErr error - scrapeFunc func(context.Context, io.Writer) error -} - -func (ts *testScraper) offset(time.Duration, uint64) time.Duration { - return ts.offsetDur -} - -func (ts *testScraper) Report(start time.Time, duration time.Duration, err error) { - ts.lastStart = start - ts.lastDuration = duration - ts.lastError = err -} - -func (ts *testScraper) scrape(context.Context) (*http.Response, error) { - return nil, ts.scrapeErr -} - -func (ts *testScraper) readResponse(ctx context.Context, _ *http.Response, w io.Writer) (string, error) { - if ts.scrapeFunc != nil { - return "", ts.scrapeFunc(ctx, w) - } - return "", ts.scrapeErr -} - -func TestScrapeLoop_RespectTimestamps(t *testing.T) { +func TestScrapeLoop_RespectTimestamps_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - appTest := teststorage.NewAppendable().Then(s) - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + appTest := teststorage.NewAppendable().ThenV2(s) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() @@ -3483,13 +2402,13 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) { require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoop_DiscardTimestamps(t *testing.T) { +func TestScrapeLoop_DiscardTimestamps_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - appTest := teststorage.NewAppendable().Then(s) + appTest := teststorage.NewAppendable().ThenV2(s) sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.honorTimestamps = false }) @@ -3509,12 +2428,12 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) { require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest) } -func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { +func TestScrapeLoopDiscardDuplicateLabels_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - appTest := teststorage.NewAppendable().Then(s) - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + appTest := teststorage.NewAppendable().ThenV2(s) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) // We add a good and a bad metric to check that both are discarded. app := sl.appender() @@ -3545,13 +2464,13 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { require.False(t, series.Next(), "more than one series found in tsdb") } -func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { +func TestScrapeLoopDiscardUnnamedMetrics_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - appTest := teststorage.NewAppendable().Then(s) + appTest := teststorage.NewAppendable().ThenV2(s) sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleMutator = func(l labels.Labels) labels.Labels { if l.Has("drop") { return labels.FromStrings("no", "name") // This label set will trigger an error. @@ -3573,74 +2492,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { require.NoError(t, series.Err()) } -func TestReusableConfig(t *testing.T) { - variants := []*config.ScrapeConfig{ - { - JobName: "prometheus", - ScrapeTimeout: model.Duration(15 * time.Second), - }, - { - JobName: "httpd", - ScrapeTimeout: model.Duration(15 * time.Second), - }, - { - JobName: "prometheus", - ScrapeTimeout: model.Duration(5 * time.Second), - }, - { - JobName: "prometheus", - MetricsPath: "/metrics", - }, - { - JobName: "prometheus", - MetricsPath: "/metrics2", - }, - { - JobName: "prometheus", - ScrapeTimeout: model.Duration(5 * time.Second), - MetricsPath: "/metrics2", - }, - { - JobName: "prometheus", - ScrapeInterval: model.Duration(5 * time.Second), - MetricsPath: "/metrics2", - }, - { - JobName: "prometheus", - ScrapeInterval: model.Duration(5 * time.Second), - SampleLimit: 1000, - MetricsPath: "/metrics2", - }, - } - - match := [][]int{ - {0, 2}, - {4, 5}, - {4, 6}, - {4, 7}, - {5, 6}, - {5, 7}, - {6, 7}, - } - noMatch := [][]int{ - {1, 2}, - {0, 4}, - {3, 4}, - } - - for i, m := range match { - require.True(t, reusableCache(variants[m[0]], variants[m[1]]), "match test %d", i) - require.True(t, reusableCache(variants[m[1]], variants[m[0]]), "match test %d", i) - require.True(t, reusableCache(variants[m[1]], variants[m[1]]), "match test %d", i) - require.True(t, reusableCache(variants[m[0]], variants[m[0]]), "match test %d", i) - } - for i, m := range noMatch { - require.False(t, reusableCache(variants[m[0]], variants[m[1]]), "not match test %d", i) - require.False(t, reusableCache(variants[m[1]], variants[m[0]]), "not match test %d", i) - } -} - -func TestReuseScrapeCache(t *testing.T) { +func TestReuseScrapeCache_AppendV2(t *testing.T) { var ( app = teststorage.NewAppendable() cfg = &config.ScrapeConfig{ @@ -3651,7 +2503,7 @@ func TestReuseScrapeCache(t *testing.T) { MetricNameValidationScheme: model.UTF8Validation, MetricNameEscapingScheme: model.AllowUTF8, } - sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, nil, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) t1 = &Target{ labels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"), scrapeConfig: &config.ScrapeConfig{ @@ -3824,11 +2676,11 @@ func TestReuseScrapeCache(t *testing.T) { } } -func TestScrapeAddFast(t *testing.T) { +func TestScrapeAddFast_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) - sl, _ := newTestScrapeLoop(t, withAppendable(s)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(s)) app := sl.appender() _, _, _, err := app.append([]byte("up 1\n"), "text/plain", time.Time{}) @@ -3847,7 +2699,7 @@ func TestScrapeAddFast(t *testing.T) { require.NoError(t, app.Commit()) } -func TestReuseCacheRace(t *testing.T) { +func TestReuseCacheRace_AppendV2(t *testing.T) { var ( cfg = &config.ScrapeConfig{ JobName: "Prometheus", @@ -3858,7 +2710,7 @@ func TestReuseCacheRace(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } buffers = pool.New(1e3, 100e6, 3, func(sz int) any { return make([]byte, 0, sz) }) - sp, _ = newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, buffers, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, nil, teststorage.NewAppendable(), 0, nil, buffers, &Options{}, newTestScrapeMetrics(t)) t1 = &Target{ labels: labels.FromStrings("labelNew", "nameNew"), scrapeConfig: &config.ScrapeConfig{}, @@ -3884,17 +2736,7 @@ func TestReuseCacheRace(t *testing.T) { } } -func TestCheckAddError(t *testing.T) { - var appErrs appendErrors - sl, _ := newTestScrapeLoop(t) - // TODO: Check err etc - _, _ = sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) - require.Equal(t, 1, appErrs.numOutOfOrder) - - // TODO(bwplotka): Test partial error check and other cases -} - -func TestScrapeReportSingleAppender(t *testing.T) { +func TestScrapeReportSingleAppender_AppendV2(t *testing.T) { t.Parallel() s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -3904,7 +2746,7 @@ func TestScrapeReportSingleAppender(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = s + sl.appendableV2 = s // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" }) @@ -3950,7 +2792,7 @@ func TestScrapeReportSingleAppender(t *testing.T) { } } -func TestScrapeReportLimit(t *testing.T) { +func TestScrapeReportLimit_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -3967,7 +2809,7 @@ func TestScrapeReportLimit(t *testing.T) { ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4006,7 +2848,7 @@ func TestScrapeReportLimit(t *testing.T) { require.True(t, found) } -func TestScrapeUTF8(t *testing.T) { +func TestScrapeUTF8_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -4021,7 +2863,7 @@ func TestScrapeUTF8(t *testing.T) { ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4050,7 +2892,7 @@ func TestScrapeUTF8(t *testing.T) { require.True(t, series.Next(), "series not found in tsdb") } -func TestScrapeLoopLabelLimit(t *testing.T) { +func TestScrapeLoopLabelLimit_AppendV2(t *testing.T) { tests := []struct { title string scrapeLabels string @@ -4121,6 +2963,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) { } sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -4143,7 +2986,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) { } } -func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { +func TestTargetScrapeIntervalAndTimeoutRelabel_AppendV2(t *testing.T) { interval, _ := model.ParseDuration("2s") timeout, _ := model.ParseDuration("500ms") cfg := &config.ScrapeConfig{ @@ -4170,7 +3013,7 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { }, }, } - sp, _ := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ := newScrapePool(cfg, nil, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) tgts := []*targetgroup.Group{ { Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}}, @@ -4185,7 +3028,7 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { } // Testing whether we can remove trailing .0 from histogram 'le' and summary 'quantile' labels. -func TestLeQuantileReLabel(t *testing.T) { +func TestLeQuantileReLabel_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -4256,7 +3099,7 @@ test_summary_count 199 ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4304,7 +3147,7 @@ test_summary_count 199 } // Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. -func TestConvertClassicHistogramsToNHCB(t *testing.T) { +func TestConvertClassicHistogramsToNHCB_AppendV2(t *testing.T) { t.Parallel() genTestCounterText := func(name string) string { @@ -4710,7 +3553,7 @@ metric: < t.Cleanup(func() { _ = s.Close() }) sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = s + sl.appendableV2 = s sl.alwaysScrapeClassicHist = tc.alwaysScrapeClassicHistograms sl.convertClassicHistToNHCB = tc.convertClassicHistToNHCB sl.enableNativeHistogramScraping = true @@ -4788,7 +3631,7 @@ metric: < } } -func TestTypeUnitReLabel(t *testing.T) { +func TestTypeUnitReLabel_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -4837,7 +3680,7 @@ disk_usage_bytes 456 ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4874,14 +3717,14 @@ disk_usage_bytes 456 } } -func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { +func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics_AppendV2(t *testing.T) { signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) appTest := teststorage.NewAppendable() sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { sl.ctx = ctx - sl.appendable = appTest // Since we're writing samples directly below we need to provide a protocol fallback. + sl.appendableV2 = appTest // Since we're writing samples directly below we need to provide a protocol fallback. sl.fallbackScrapeProtocol = "text/plain" sl.trackTimestampsStaleness = true }) @@ -4921,7 +3764,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(got[6].V)) } -func TestScrapeLoopCompression(t *testing.T) { +func TestScrapeLoopCompression_AppendV2(t *testing.T) { s := teststorage.New(t) t.Cleanup(func() { _ = s.Close() }) @@ -4961,7 +3804,7 @@ func TestScrapeLoopCompression(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4983,157 +3826,10 @@ func TestScrapeLoopCompression(t *testing.T) { } } -func TestPickSchema(t *testing.T) { - tcs := []struct { - factor float64 - schema int32 - }{ - { - factor: 65536, - schema: -4, - }, - { - factor: 256, - schema: -3, - }, - { - factor: 16, - schema: -2, - }, - { - factor: 4, - schema: -1, - }, - { - factor: 2, - schema: 0, - }, - { - factor: 1.4, - schema: 1, - }, - { - factor: 1.1, - schema: 2, - }, - { - factor: 1.09, - schema: 3, - }, - { - factor: 1.04, - schema: 4, - }, - { - factor: 1.02, - schema: 5, - }, - { - factor: 1.01, - schema: 6, - }, - { - factor: 1.005, - schema: 7, - }, - { - factor: 1.002, - schema: 8, - }, - // The default value of native_histogram_min_bucket_factor - { - factor: 0, - schema: 8, - }, - } - - for _, tc := range tcs { - schema := pickSchema(tc.factor) - require.Equal(t, tc.schema, schema) - } -} - -func BenchmarkTargetScraperGzip(b *testing.B) { - scenarios := []struct { - metricsCount int - body []byte - }{ - {metricsCount: 1}, - {metricsCount: 100}, - {metricsCount: 1000}, - {metricsCount: 10000}, - {metricsCount: 100000}, - } - - for i := range scenarios { - var buf bytes.Buffer - var name string - gw := gzip.NewWriter(&buf) - for j := 0; j < scenarios[i].metricsCount; j++ { - name = fmt.Sprintf("go_memstats_alloc_bytes_total_%d", j) - _, _ = fmt.Fprintf(gw, "# HELP %s Total number of bytes allocated, even if freed.\n", name) - _, _ = fmt.Fprintf(gw, "# TYPE %s counter\n", name) - _, _ = fmt.Fprintf(gw, "%s %d\n", name, i*j) - } - require.NoError(b, gw.Close()) - scenarios[i].body = buf.Bytes() - } - - handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", `text/plain; version=0.0.4`) - w.Header().Set("Content-Encoding", "gzip") - for _, scenario := range scenarios { - if strconv.Itoa(scenario.metricsCount) == r.URL.Query()["count"][0] { - _, _ = w.Write(scenario.body) - return - } - } - w.WriteHeader(http.StatusBadRequest) - }) - - server := httptest.NewServer(handler) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - if err != nil { - panic(err) - } - - client, err := config_util.NewClientFromConfig(config_util.DefaultHTTPClientConfig, "test_job") - if err != nil { - panic(err) - } - - for _, scenario := range scenarios { - b.Run(fmt.Sprintf("metrics=%d", scenario.metricsCount), func(b *testing.B) { - ts := &targetScraper{ - Target: &Target{ - labels: labels.FromStrings( - model.SchemeLabel, serverURL.Scheme, - model.AddressLabel, serverURL.Host, - ), - scrapeConfig: &config.ScrapeConfig{ - MetricNameValidationScheme: model.UTF8Validation, - MetricNameEscapingScheme: model.AllowUTF8, - Params: url.Values{"count": []string{strconv.Itoa(scenario.metricsCount)}}, - }, - }, - client: client, - timeout: time.Second, - } - b.ResetTimer() - for b.Loop() { - _, err = ts.scrape(context.Background()) - require.NoError(b, err) - } - }) - } -} - // When a scrape contains multiple instances for the same time series we should increment // prometheus_target_scrapes_sample_duplicate_timestamp_total metric. -func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { - sl, _ := newTestScrapeLoop(t) +func TestScrapeLoopSeriesAddedDuplicates_AppendV2(t *testing.T) { + sl, _ := newTestScrapeLoop(t, withAppendableV2(teststorage.NewAppendable())) app := sl.appender() total, added, seriesAdded, err := app.append([]byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "text/plain", time.Time{}) @@ -5167,7 +3863,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { // This tests running a full scrape loop and checking that the scrape option // `native_histogram_min_bucket_factor` is used correctly. -func TestNativeHistogramMaxSchemaSet(t *testing.T) { +func TestNativeHistogramMaxSchemaSet_AppendV2(t *testing.T) { testcases := map[string]struct { minBucketFactor string expectedSchema int32 @@ -5188,12 +3884,12 @@ func TestNativeHistogramMaxSchemaSet(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { t.Parallel() - testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema) + testNativeHistogramMaxSchemaSetAppendV2(t, tc.minBucketFactor, tc.expectedSchema) }) } } -func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expectedSchema int32) { +func testNativeHistogramMaxSchemaSetAppendV2(t *testing.T, minBucketFactor string, expectedSchema int32) { // Create a ProtoBuf message to serve as a Prometheus metric. nativeHistogram := prometheus.NewHistogram( prometheus.HistogramOpts{ @@ -5246,6 +3942,11 @@ scrape_configs: mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond)}, nil, nil, s, reg) require.NoError(t, err) + + // TODO(bwplotka): Switch public constructor to support AppendableV2 in separate PR. + mng.appendable = nil + mng.appendableV2 = s + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) require.NoError(t, mng.ApplyConfig(cfg)) @@ -5300,7 +4001,7 @@ scrape_configs: } } -func TestTargetScrapeConfigWithLabels(t *testing.T) { +func TestTargetScrapeConfigWithLabels_AppendV2(t *testing.T) { t.Parallel() const ( configTimeout = 1500 * time.Millisecond @@ -5346,7 +4047,7 @@ func TestTargetScrapeConfigWithLabels(t *testing.T) { } } - sp, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) t.Cleanup(sp.stop) @@ -5469,21 +4170,8 @@ func TestTargetScrapeConfigWithLabels(t *testing.T) { } } -func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice chan bool) { - var scrapes int - scrapedTwice = make(chan bool) - - return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - _, _ = fmt.Fprint(w, scrapeText) - scrapes++ - if scrapes == 2 { - close(scrapedTwice) - } - })), scrapedTwice -} - // Regression test for the panic fixed in https://github.com/prometheus/prometheus/pull/15523. -func TestScrapePoolScrapeAfterReload(t *testing.T) { +func TestScrapePoolScrapeAfterReload_AppendV2(t *testing.T) { h := httptest.NewServer(http.HandlerFunc( func(w http.ResponseWriter, _ *http.Request) { _, _ = w.Write([]byte{0x42, 0x42}) @@ -5509,7 +4197,7 @@ func TestScrapePoolScrapeAfterReload(t *testing.T) { }, } - p, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + p, err := newScrapePool(cfg, nil, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) t.Cleanup(p.stop) @@ -5527,8 +4215,8 @@ func TestScrapePoolScrapeAfterReload(t *testing.T) { // Regression test against https://github.com/prometheus/prometheus/issues/16160. // The first scrape fails with a parsing error, but the second should -// succeed and cause `metric_1=11` to appear in the appender. -func TestScrapeAppendWithParseError(t *testing.T) { +// succeed and cause `metric_1=11` to appear in the AppenderV2. +func TestScrapeAppendWithParseError_AppendV2(t *testing.T) { const ( scrape1 = `metric_a 1 ` @@ -5537,7 +4225,7 @@ func TestScrapeAppendWithParseError(t *testing.T) { ) appTest := teststorage.NewAppendable() - sl, _ := newTestScrapeLoop(t, withAppendable(appTest)) + sl, _ := newTestScrapeLoop(t, withAppendableV2(appTest)) now := time.Now() app := sl.appender() @@ -5568,12 +4256,12 @@ func TestScrapeAppendWithParseError(t *testing.T) { // This test covers a case where there's a target with sample_limit set and some samples // changes between scrapes. -func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) { +func TestScrapeLoopAppendSampleLimitWithDisappearingSeries_AppendV2(t *testing.T) { const sampleLimit = 4 appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleLimit = sampleLimit }) @@ -5671,12 +4359,12 @@ func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) { // This test covers a case where there's a target with sample_limit set and each scrape sees a completely // different set of samples. -func TestScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T) { +func TestScrapeLoopAppendSampleLimitReplaceAllSamples_AppendV2(t *testing.T) { const sampleLimit = 4 appTest := teststorage.NewAppendable() sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { - sl.appendable = appTest + sl.appendableV2 = appTest sl.sampleLimit = sampleLimit }) @@ -5779,11 +4467,11 @@ func TestScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T) { requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app) } -func TestScrapeLoopDisableStalenessMarkerInjection(t *testing.T) { +func TestScrapeLoopDisableStalenessMarkerInjection_AppendV2(t *testing.T) { loopDone := atomic.NewBool(false) appTest := teststorage.NewAppendable() - sl, scraper := newTestScrapeLoop(t, withAppendable(appTest)) + sl, scraper := newTestScrapeLoop(t, withAppendableV2(appTest)) scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { if _, err := w.Write([]byte("metric_a 42\n")); err != nil { return err @@ -5817,46 +4505,9 @@ func TestScrapeLoopDisableStalenessMarkerInjection(t *testing.T) { } } -// Recommended CLI invocation: -/* - export bench=restartLoops && go test ./scrape/... \ - -run '^$' -bench '^BenchmarkScrapePoolRestartLoops' \ - -benchtime 5s -count 6 -cpu 2 -timeout 999m \ - | tee ${bench}.txt -*/ -func BenchmarkScrapePoolRestartLoops(b *testing.B) { - sp, err := newScrapePool( - &config.ScrapeConfig{ - MetricNameValidationScheme: model.UTF8Validation, - ScrapeInterval: model.Duration(1 * time.Hour), - ScrapeTimeout: model.Duration(1 * time.Hour), - }, - nil, - 0, - nil, - nil, - &Options{}, - newTestScrapeMetrics(b), - ) - require.NoError(b, err) - b.Cleanup(sp.stop) - - for i := range 1000 { - sp.activeTargets[uint64(i)] = &Target{scrapeConfig: &config.ScrapeConfig{}} - sp.loops[uint64(i)] = noopLoop() // First restart will supplement those with proper scrapeLoops. - } - sp.restartLoops(true) - - b.ReportAllocs() - b.ResetTimer() - for b.Loop() { - sp.restartLoops(true) - } -} - -// TestNewScrapeLoopHonorLabelsWiring verifies that newScrapeLoop correctly wires +// TestNewScrapeLoopHonorLabelsWiring_AppendV2 verifies that newScrapeLoop correctly wires // HonorLabels (not HonorTimestamps) to the sampleMutator. -func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { +func TestNewScrapeLoopHonorLabelsWiring_AppendV2(t *testing.T) { // Scraped metric has label "lbl" with value "scraped". // Discovery target has label "lbl" with value "discovery". // With honor_labels=true, the scraped value should win. @@ -5888,7 +4539,7 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { require.NoError(t, err) s := teststorage.New(t) - defer s.Close() + t.Cleanup(func() { _ = s.Close() }) cfg := &config.ScrapeConfig{ JobName: "test", @@ -5900,7 +4551,7 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { MetricNameValidationScheme: model.UTF8Validation, } - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, nil, s, 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -5923,7 +4574,7 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { // Query the storage to verify label values. q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) require.NoError(t, err) - defer q.Close() + t.Cleanup(func() { _ = q.Close() }) series := q.Select(t.Context(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "__name__", "metric")) require.True(t, series.Next(), "metric series not found") @@ -5933,7 +4584,7 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { } } -func TestDropsSeriesFromMetricRelabeling(t *testing.T) { +func TestDropsSeriesFromMetricRelabeling_AppendV2(t *testing.T) { target := &Target{} relabelConfig := []*relabel.Config{ { @@ -5950,6 +4601,7 @@ func TestDropsSeriesFromMetricRelabeling(t *testing.T) { }, } sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendableV2 = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, target, true, relabelConfig) } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index c2b2ae132c..a28b770155 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -94,7 +94,7 @@ func TestNewScrapePool(t *testing.T) { MetricNameValidationScheme: model.UTF8Validation, MetricNameEscapingScheme: model.AllowUTF8, } - sp, err = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err = newScrapePool(cfg, app, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) ) require.NoError(t, err) @@ -337,7 +337,7 @@ func TestDroppedTargetsList(t *testing.T) { }, }, } - sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, app, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}" expectedLength = 2 ) @@ -821,7 +821,7 @@ func TestScrapePoolRaces(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } } - sp, _ := newScrapePool(newConfig(), teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ := newScrapePool(newConfig(), teststorage.NewAppendable(), nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) tgts := []*targetgroup.Group{ { Targets: []model.LabelSet{ @@ -1007,7 +1007,7 @@ func TestScrapeLoopRun(t *testing.T) { ) ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) + sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable())) // The loop must terminate during the initial offset if the context // is canceled. scraper.offsetDur = time.Hour @@ -1031,6 +1031,7 @@ func TestScrapeLoopRun(t *testing.T) { ctx, cancel = context.WithCancel(t.Context()) sl, scraper = newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.ctx = ctx sl.timeout = 100 * time.Millisecond }) @@ -1082,7 +1083,7 @@ func TestScrapeLoopForcedErr(t *testing.T) { ) ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) + sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable())) forcedErr := errors.New("forced err") sl.setForcedError(forcedErr) @@ -1122,7 +1123,7 @@ func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) { ) ctx, cancel := context.WithCancel(t.Context()) - sl, scraper := newTestScrapeLoop(t, withCtx(ctx)) + sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable())) forcedErr := errors.New("forced err") sl.setForcedError(forcedErr) @@ -1149,7 +1150,7 @@ func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) { } func TestScrapeLoopMetadata(t *testing.T) { - sl, _ := newTestScrapeLoop(t) + sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable())) app := sl.appender() total, _, _, err := app.append([]byte(`# TYPE test_metric counter @@ -1183,7 +1184,7 @@ test_metric_total 1 } func TestScrapeLoopSeriesAdded(t *testing.T) { - sl, _ := newTestScrapeLoop(t) + sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable())) app := sl.appender() total, added, seriesAdded, err := app.append([]byte("test_metric 1\n"), "text/plain", time.Time{}) @@ -1214,6 +1215,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { NameValidationScheme: model.UTF8Validation, }} sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, target, true, relabelConfig) } @@ -1230,6 +1232,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.validationScheme = model.LegacyValidation }) @@ -1243,6 +1246,7 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { // When scrapeloop has validation set to UTF-8, the metric is allowed. sl, _ = newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.validationScheme = model.UTF8Validation }) @@ -1772,6 +1776,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable().Then(s) sl.ctx = ctx }) numScrapes := 0 @@ -2231,6 +2236,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() // Explicitly setting the lack of fallback protocol here to make it obvious. sl.fallbackScrapeProtocol = "" }) @@ -2245,6 +2251,7 @@ func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) { // TestScrapeLoopAppendEmptyWithNoContentType ensures we there are no errors when we get a blank scrape or just want to append a stale marker. func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) { sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() // Explicitly setting the lack of fallback protocol here to make it obvious. sl.fallbackScrapeProtocol = "" }) @@ -3651,7 +3658,7 @@ func TestReuseScrapeCache(t *testing.T) { MetricNameValidationScheme: model.UTF8Validation, MetricNameEscapingScheme: model.AllowUTF8, } - sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, app, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) t1 = &Target{ labels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"), scrapeConfig: &config.ScrapeConfig{ @@ -3858,7 +3865,7 @@ func TestReuseCacheRace(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } buffers = pool.New(1e3, 100e6, 3, func(sz int) any { return make([]byte, 0, sz) }) - sp, _ = newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, buffers, &Options{}, newTestScrapeMetrics(t)) + sp, _ = newScrapePool(cfg, teststorage.NewAppendable(), nil, 0, nil, buffers, &Options{}, newTestScrapeMetrics(t)) t1 = &Target{ labels: labels.FromStrings("labelNew", "nameNew"), scrapeConfig: &config.ScrapeConfig{}, @@ -3888,7 +3895,7 @@ func TestCheckAddError(t *testing.T) { var appErrs appendErrors sl, _ := newTestScrapeLoop(t) // TODO: Check err etc - _, _ = sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) + _, _ = sl.checkAddError(nil, nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) require.Equal(t, 1, appErrs.numOutOfOrder) // TODO(bwplotka): Test partial error check and other cases @@ -3967,7 +3974,7 @@ func TestScrapeReportLimit(t *testing.T) { ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4021,7 +4028,7 @@ func TestScrapeUTF8(t *testing.T) { ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4121,6 +4128,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) { } sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -4170,7 +4178,7 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { }, }, } - sp, _ := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, _ := newScrapePool(cfg, teststorage.NewAppendable(), nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) tgts := []*targetgroup.Group{ { Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}}, @@ -4256,7 +4264,7 @@ test_summary_count 199 ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4837,7 +4845,7 @@ disk_usage_bytes 456 ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -4961,7 +4969,7 @@ func TestScrapeLoopCompression(t *testing.T) { MetricNameEscapingScheme: model.AllowUTF8, } - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -5133,7 +5141,7 @@ func BenchmarkTargetScraperGzip(b *testing.B) { // When a scrape contains multiple instances for the same time series we should increment // prometheus_target_scrapes_sample_duplicate_timestamp_total metric. func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { - sl, _ := newTestScrapeLoop(t) + sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable())) app := sl.appender() total, added, seriesAdded, err := app.append([]byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "text/plain", time.Time{}) @@ -5346,7 +5354,7 @@ func TestTargetScrapeConfigWithLabels(t *testing.T) { } } - sp, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, teststorage.NewAppendable(), nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) t.Cleanup(sp.stop) @@ -5509,7 +5517,7 @@ func TestScrapePoolScrapeAfterReload(t *testing.T) { }, } - p, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + p, err := newScrapePool(cfg, teststorage.NewAppendable(), nil, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) require.NoError(t, err) t.Cleanup(p.stop) @@ -5832,6 +5840,7 @@ func BenchmarkScrapePoolRestartLoops(b *testing.B) { ScrapeTimeout: model.Duration(1 * time.Hour), }, nil, + nil, 0, nil, nil, @@ -5900,7 +5909,7 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) { MetricNameValidationScheme: model.UTF8Validation, } - sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(cfg, s, nil, 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() @@ -5950,6 +5959,7 @@ func TestDropsSeriesFromMetricRelabeling(t *testing.T) { }, } sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) { + sl.appendable = teststorage.NewAppendable() sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, target, true, relabelConfig) } diff --git a/scrape/target.go b/scrape/target.go index 4265f9e782..1040241bd3 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -454,6 +454,105 @@ func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels return ref, nil } +// limitAppender limits the number of total appended samples in a batch. +type limitAppenderV2 struct { + storage.AppenderV2 + + limit int + i int +} + +func (app *limitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (storage.SeriesRef, error) { + // Bypass sample_limit checks only if we have a staleness marker for a known series (ref value is non-zero). + // This ensures that if a series is already in TSDB then we always write the marker. + if ref == 0 || !value.IsStaleNaN(v) { + app.i++ + if app.i > app.limit { + return 0, errSampleLimit + } + } + return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts) +} + +type timeLimitAppenderV2 struct { + storage.AppenderV2 + + maxTime int64 +} + +func (app *timeLimitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (storage.SeriesRef, error) { + if t > app.maxTime { + return 0, storage.ErrOutOfBounds + } + + return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts) +} + +// bucketLimitAppender limits the number of total appended samples in a batch. +type bucketLimitAppenderV2 struct { + storage.AppenderV2 + + limit int +} + +func (app *bucketLimitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (_ storage.SeriesRef, err error) { + if h != nil { + // Return with an early error if the histogram has too many buckets and the + // schema is not exponential, in which case we can't reduce the resolution. + if len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(h.Schema) { + return 0, errBucketLimit + } + for len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit { + if h.Schema <= histogram.ExponentialSchemaMin { + return 0, errBucketLimit + } + if err = h.ReduceResolution(h.Schema - 1); err != nil { + return 0, err + } + } + } + if fh != nil { + // Return with an early error if the histogram has too many buckets and the + // schema is not exponential, in which case we can't reduce the resolution. + if len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(fh.Schema) { + return 0, errBucketLimit + } + for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit { + if fh.Schema <= histogram.ExponentialSchemaMin { + return 0, errBucketLimit + } + if err = fh.ReduceResolution(fh.Schema - 1); err != nil { + return 0, err + } + } + } + return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts) +} + +type maxSchemaAppenderV2 struct { + storage.AppenderV2 + + maxSchema int32 +} + +func (app *maxSchemaAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (_ storage.SeriesRef, err error) { + if h != nil { + if histogram.IsExponentialSchemaReserved(h.Schema) && h.Schema > app.maxSchema { + if err = h.ReduceResolution(app.maxSchema); err != nil { + return 0, err + } + } + } + if fh != nil { + if histogram.IsExponentialSchemaReserved(fh.Schema) && fh.Schema > app.maxSchema { + if err = fh.ReduceResolution(app.maxSchema); err != nil { + return 0, err + } + } + } + return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts) +} + // PopulateDiscoveredLabels sets base labels on lb from target and group labels and scrape configuration, before relabeling. func PopulateDiscoveredLabels(lb *labels.Builder, cfg *config.ScrapeConfig, tLabels, tgLabels model.LabelSet) { lb.Reset(labels.EmptyLabels()) diff --git a/util/teststorage/appender.go b/util/teststorage/appender.go index 55cb727ee0..90ab0fbc63 100644 --- a/util/teststorage/appender.go +++ b/util/teststorage/appender.go @@ -332,7 +332,8 @@ func computeOrCheckRef(ref storage.SeriesRef, ls labels.Labels) (storage.SeriesR } if storage.SeriesRef(h) != ref { - // Check for buggy ref while we at it. + // Check for buggy ref while we are at it. This only makes sense for cases without .Then*, because further appendable + // might have a different ref computation logic e.g. TSDB uses atomic increments. return 0, errors.New("teststorage.appender: found input ref not matching labels; potential bug in Appendable usage") } return ref, nil @@ -498,13 +499,14 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64 if a.next != nil { ref, err = a.next.Append(ref, ls, st, t, v, h, fh, opts) + if err != nil { + return 0, err + } + } else { + ref, err = computeOrCheckRef(ref, ls) if err != nil { return ref, err } } - ref, err = computeOrCheckRef(ref, ls) - if err != nil { - return ref, err - } return ref, partialErr }