diff --git a/command/commands.go b/command/commands.go index 76e3852abb..9b2cbc7377 100644 --- a/command/commands.go +++ b/command/commands.go @@ -686,6 +686,15 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) { }, nil }, } + + // Disabled by default until functional + if os.Getenv(OperatorDiagnoseEnableEnv) != "" { + Commands["operator diagnose"] = func() (cli.Command, error) { + return &OperatorDiagnoseCommand{ + BaseCommand: getBaseCommand(), + }, nil + } + } } // MakeShutdownCh returns a channel that can be used for shutdown diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go new file mode 100644 index 0000000000..c81fc57020 --- /dev/null +++ b/command/operator_diagnose.go @@ -0,0 +1,160 @@ +package command + +import ( + "strings" + + log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/vault/sdk/version" + "github.com/mitchellh/cli" + "github.com/posener/complete" +) + +const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE" + +var _ cli.Command = (*OperatorDiagnoseCommand)(nil) +var _ cli.CommandAutocomplete = (*OperatorDiagnoseCommand)(nil) + +type OperatorDiagnoseCommand struct { + *BaseCommand + + flagDebug bool + flagSkips []string + flagConfigs []string +} + +func (c *OperatorDiagnoseCommand) Synopsis() string { + return "Troubleshoot problems starting Vault" +} + +func (c *OperatorDiagnoseCommand) Help() string { + helpText := ` +Usage: vault operator diagnose + + This command troubleshoots Vault startup issues, such as TLS configuration or + auto-unseal. It should be run using the same environment variables and configuration + files as the "vault server" command, so that startup problems can be accurately + reproduced. + + Start diagnose with a configuration file: + + $ vault operator diagnose -config=/etc/vault/config.hcl + + Perform a diagnostic check while Vault is still running: + + $ vault operator diagnose -config=/etc/vault/config.hcl -skip=listener + +` + c.Flags().Help() + return strings.TrimSpace(helpText) +} + +func (c *OperatorDiagnoseCommand) Flags() *FlagSets { + set := NewFlagSets(c.UI) + f := set.NewFlagSet("Command Options") + + f.StringSliceVar(&StringSliceVar{ + Name: "config", + Target: &c.flagConfigs, + Completion: complete.PredictOr( + complete.PredictFiles("*.hcl"), + complete.PredictFiles("*.json"), + complete.PredictDirs("*"), + ), + Usage: "Path to a Vault configuration file or directory of configuration " + + "files. This flag can be specified multiple times to load multiple " + + "configurations. If the path is a directory, all files which end in " + + ".hcl or .json are loaded.", + }) + + f.StringSliceVar(&StringSliceVar{ + Name: "skip", + Target: &c.flagSkips, + Usage: "Skip the health checks named as arguments. May be 'listener', 'storage', or 'autounseal'.", + }) + + f.BoolVar(&BoolVar{ + Name: "debug", + Target: &c.flagDebug, + Default: false, + Usage: "Dump all information collected by Diagnose.", + }) + return set +} + +func (c *OperatorDiagnoseCommand) AutocompleteArgs() complete.Predictor { + return complete.PredictNothing +} + +func (c *OperatorDiagnoseCommand) AutocompleteFlags() complete.Flags { + return c.Flags().Completions() +} + +const status_unknown = "[ ] " +const status_ok = "\u001b[32m[ ok ]\u001b[0m " +const status_failed = "\u001b[31m[failed]\u001b[0m " +const status_warn = "\u001b[33m[ warn ]\u001b[0m " +const same_line = "\u001b[F" + +func (c *OperatorDiagnoseCommand) Run(args []string) int { + f := c.Flags() + if err := f.Parse(args); err != nil { + c.UI.Error(err.Error()) + return 1 + } + return c.RunWithParsedFlags() +} + +func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { + if len(c.flagConfigs) == 0 { + c.UI.Error("Must specify a configuration file using -config.") + return 1 + } + + c.UI.Output(version.GetVersion().FullVersionNumber(true)) + + server := &ServerCommand{ + // TODO: set up a different one? + // In particular, a UI instance that won't output? + BaseCommand: c.BaseCommand, + + // TODO: refactor to a common place? + AuditBackends: auditBackends, + CredentialBackends: credentialBackends, + LogicalBackends: logicalBackends, + PhysicalBackends: physicalBackends, + ServiceRegistrations: serviceRegistrations, + + // TODO: other ServerCommand options? + + logger: log.NewInterceptLogger(nil), + allLoggers: []log.Logger{}, + } + + phase := "Parse configuration" + c.UI.Output(status_unknown + phase) + server.flagConfigs = c.flagConfigs + config, err := server.parseConfig() + if err != nil { + c.UI.Output(same_line + status_failed + phase) + c.UI.Output("Error while reading configuration files:") + c.UI.Output(err.Error()) + return 1 + } + + // Errors in these items could stop Vault from starting but are not yet covered: + // TODO: logging configuration + // TODO: SetupTelemetry + // TODO: check for storage backend + c.UI.Output(same_line + status_ok + phase) + + phase = "Access storage" + c.UI.Output(status_unknown + phase) + _, err = server.setupStorage(config) + if err != nil { + c.UI.Output(same_line + status_failed + phase) + c.UI.Output(err.Error()) + return 1 + } + c.UI.Output(same_line + status_ok + phase) + + return 0 +} diff --git a/command/server.go b/command/server.go index a7b652bec4..c9235396e1 100644 --- a/command/server.go +++ b/command/server.go @@ -100,6 +100,8 @@ type ServerCommand struct { startedCh chan (struct{}) // for tests reloadedCh chan (struct{}) // for tests + allLoggers []log.Logger + // new stuff flagConfigs []string flagLogLevel string @@ -126,6 +128,7 @@ type ServerCommand struct { flagTestServerConfig bool flagDevConsul bool flagExitOnCoreShutdown bool + flagDiagnose string } func (c *ServerCommand) Synopsis() string { @@ -210,6 +213,19 @@ func (c *ServerCommand) Flags() *FlagSets { "Using a recovery operation token, \"sys/raw\" API can be used to manipulate the storage.", }) + // Disabled by default until functional + if os.Getenv(OperatorDiagnoseEnableEnv) != "" { + f.StringVar(&StringVar{ + Name: "diagnose", + Target: &c.flagDiagnose, + Default: notSetValue, + Usage: "Run diagnostics before starting Vault. Specify a filename to direct output to that file.", + }) + } else { + // Ensure diagnose is *not* run when feature flag is off. + c.flagDiagnose = notSetValue + } + f = set.NewFlagSet("Dev Options") f.BoolVar(&BoolVar{ @@ -812,6 +828,49 @@ func (q quiescenceSink) Accept(name string, level log.Level, msg string, args .. q.t.Reset(100 * time.Millisecond) } +func (c *ServerCommand) setupStorage(config *server.Config) (physical.Backend, error) { + // Ensure that a backend is provided + if config.Storage == nil { + return nil, fmt.Errorf("A storage backend must be specified") + } + + // Initialize the backend + factory, exists := c.PhysicalBackends[config.Storage.Type] + if !exists { + return nil, fmt.Errorf("Unknown storage type %s", config.Storage.Type) + } + + // Do any custom configuration needed per backend + switch config.Storage.Type { + case storageTypeConsul: + if config.ServiceRegistration == nil { + // If Consul is configured for storage and service registration is unconfigured, + // use Consul for service registration without requiring additional configuration. + // This maintains backward-compatibility. + config.ServiceRegistration = &server.ServiceRegistration{ + Type: "consul", + Config: config.Storage.Config, + } + } + case storageTypeRaft: + if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" { + config.ClusterAddr = envCA + } + if len(config.ClusterAddr) == 0 { + return nil, fmt.Errorf("Cluster address must be set when using raft storage") + } + } + + namedStorageLogger := c.logger.Named("storage." + config.Storage.Type) + c.allLoggers = append(c.allLoggers, namedStorageLogger) + backend, err := factory(config.Storage.Config, namedStorageLogger) + if err != nil { + return nil, fmt.Errorf("Error initializing storage of type %s: %w", config.Storage.Type, err) + } + + return backend, nil +} + func (c *ServerCommand) Run(args []string) int { f := c.Flags() @@ -843,6 +902,21 @@ func (c *ServerCommand) Run(args []string) int { } } + if c.flagDiagnose != notSetValue { + if c.flagDev { + c.UI.Error("Cannot run diagnose on Vault in dev mode.") + return 1 + } + // TODO: add a file output flag to Diagnose + diagnose := &OperatorDiagnoseCommand{ + BaseCommand: c.BaseCommand, + flagDebug: false, + flagSkips: []string{}, + flagConfigs: c.flagConfigs, + } + diagnose.RunWithParsedFlags() + } + // Load the configuration var config *server.Config var err error @@ -918,7 +992,7 @@ func (c *ServerCommand) Run(args []string) int { // Ensure logging is flushed if initialization fails defer c.flushLog() - allLoggers := []log.Logger{c.logger} + c.allLoggers = []log.Logger{c.logger} logLevelStr, err := c.adjustLogLevel(config, logLevelWasNotSet) if err != nil { @@ -931,7 +1005,7 @@ func (c *ServerCommand) Run(args []string) int { // create GRPC logger namedGRPCLogFaker := c.logger.Named("grpclogfaker") - allLoggers = append(allLoggers, namedGRPCLogFaker) + c.allLoggers = append(c.allLoggers, namedGRPCLogFaker) grpclog.SetLogger(&grpclogFaker{ logger: namedGRPCLogFaker, log: os.Getenv("VAULT_GRPC_LOGGING") != "", @@ -941,12 +1015,6 @@ func (c *ServerCommand) Run(args []string) int { c.startMemProfiler() } - // Ensure that a backend is provided - if config.Storage == nil { - c.UI.Output("A storage backend must be specified") - return 1 - } - if config.DefaultMaxRequestDuration != 0 { vault.DefaultMaxRequestDuration = config.DefaultMaxRequestDuration } @@ -988,44 +1056,15 @@ func (c *ServerCommand) Run(args []string) int { } metricsHelper := metricsutil.NewMetricsHelper(inmemMetrics, prometheusEnabled) - // Initialize the backend - factory, exists := c.PhysicalBackends[config.Storage.Type] - if !exists { - c.UI.Error(fmt.Sprintf("Unknown storage type %s", config.Storage.Type)) - return 1 - } - - // Do any custom configuration needed per backend - switch config.Storage.Type { - case storageTypeConsul: - if config.ServiceRegistration == nil { - // If Consul is configured for storage and service registration is unconfigured, - // use Consul for service registration without requiring additional configuration. - // This maintains backward-compatibility. - config.ServiceRegistration = &server.ServiceRegistration{ - Type: "consul", - Config: config.Storage.Config, - } - } - case storageTypeRaft: - if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" { - config.ClusterAddr = envCA - } - if len(config.ClusterAddr) == 0 { - c.UI.Error("Cluster address must be set when using raft storage") - return 1 - } - } - - namedStorageLogger := c.logger.Named("storage." + config.Storage.Type) - allLoggers = append(allLoggers, namedStorageLogger) - backend, err := factory(config.Storage.Config, namedStorageLogger) + // Initialize the storage backend + backend, err := c.setupStorage(config) if err != nil { - c.UI.Error(fmt.Sprintf("Error initializing storage of type %s: %s", config.Storage.Type, err)) + c.UI.Error(err.Error()) return 1 } // Prevent server startup if migration is active + // TODO: how to incorporate this check into Diagnose? if c.storageMigrationActive(backend) { return 1 } @@ -1040,7 +1079,7 @@ func (c *ServerCommand) Run(args []string) int { } namedSDLogger := c.logger.Named("service_registration." + config.ServiceRegistration.Type) - allLoggers = append(allLoggers, namedSDLogger) + c.allLoggers = append(c.allLoggers, namedSDLogger) // Since we haven't even begun starting Vault's core yet, // we know that Vault is in its pre-running state. @@ -1094,7 +1133,7 @@ func (c *ServerCommand) Run(args []string) int { var seal vault.Seal sealLogger := c.logger.ResetNamed(fmt.Sprintf("seal.%s", sealType)) - allLoggers = append(allLoggers, sealLogger) + c.allLoggers = append(c.allLoggers, sealLogger) defaultSeal := vault.NewDefaultSeal(&vaultseal.Access{ Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{ Logger: c.logger.Named("shamir"), @@ -1180,7 +1219,7 @@ func (c *ServerCommand) Run(args []string) int { DisableSealWrap: config.DisableSealWrap, DisablePerformanceStandby: config.DisablePerformanceStandby, DisableIndexing: config.DisableIndexing, - AllLoggers: allLoggers, + AllLoggers: c.allLoggers, BuiltinRegistry: builtinplugins.Registry, DisableKeyEncodingChecks: config.DisablePrintableCheck, MetricsHelper: metricsHelper, @@ -1237,7 +1276,7 @@ func (c *ServerCommand) Run(args []string) int { } namedHALogger := c.logger.Named("ha." + config.HAStorage.Type) - allLoggers = append(allLoggers, namedHALogger) + c.allLoggers = append(c.allLoggers, namedHALogger) habackend, err := factory(config.HAStorage.Config, namedHALogger) if err != nil { c.UI.Error(fmt.Sprintf(