vault operator diagnose stub command (#10819)

* Stub "operator diagnose" command.
* Parse configuration files.
* Refactor storage setup to call from diagnose.
* Add the ability to run Diagnose as a prequel to server start.
This commit is contained in:
Mark Gritter 2021-02-02 14:15:10 -06:00 committed by GitHub
parent 4038b0bbcd
commit 7a66647ca6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 253 additions and 45 deletions

View file

@ -686,6 +686,15 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) {
}, nil
},
}
// Disabled by default until functional
if os.Getenv(OperatorDiagnoseEnableEnv) != "" {
Commands["operator diagnose"] = func() (cli.Command, error) {
return &OperatorDiagnoseCommand{
BaseCommand: getBaseCommand(),
}, nil
}
}
}
// MakeShutdownCh returns a channel that can be used for shutdown

View file

@ -0,0 +1,160 @@
package command
import (
"strings"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault/sdk/version"
"github.com/mitchellh/cli"
"github.com/posener/complete"
)
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
var _ cli.Command = (*OperatorDiagnoseCommand)(nil)
var _ cli.CommandAutocomplete = (*OperatorDiagnoseCommand)(nil)
type OperatorDiagnoseCommand struct {
*BaseCommand
flagDebug bool
flagSkips []string
flagConfigs []string
}
func (c *OperatorDiagnoseCommand) Synopsis() string {
return "Troubleshoot problems starting Vault"
}
func (c *OperatorDiagnoseCommand) Help() string {
helpText := `
Usage: vault operator diagnose
This command troubleshoots Vault startup issues, such as TLS configuration or
auto-unseal. It should be run using the same environment variables and configuration
files as the "vault server" command, so that startup problems can be accurately
reproduced.
Start diagnose with a configuration file:
$ vault operator diagnose -config=/etc/vault/config.hcl
Perform a diagnostic check while Vault is still running:
$ vault operator diagnose -config=/etc/vault/config.hcl -skip=listener
` + c.Flags().Help()
return strings.TrimSpace(helpText)
}
func (c *OperatorDiagnoseCommand) Flags() *FlagSets {
set := NewFlagSets(c.UI)
f := set.NewFlagSet("Command Options")
f.StringSliceVar(&StringSliceVar{
Name: "config",
Target: &c.flagConfigs,
Completion: complete.PredictOr(
complete.PredictFiles("*.hcl"),
complete.PredictFiles("*.json"),
complete.PredictDirs("*"),
),
Usage: "Path to a Vault configuration file or directory of configuration " +
"files. This flag can be specified multiple times to load multiple " +
"configurations. If the path is a directory, all files which end in " +
".hcl or .json are loaded.",
})
f.StringSliceVar(&StringSliceVar{
Name: "skip",
Target: &c.flagSkips,
Usage: "Skip the health checks named as arguments. May be 'listener', 'storage', or 'autounseal'.",
})
f.BoolVar(&BoolVar{
Name: "debug",
Target: &c.flagDebug,
Default: false,
Usage: "Dump all information collected by Diagnose.",
})
return set
}
func (c *OperatorDiagnoseCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}
func (c *OperatorDiagnoseCommand) AutocompleteFlags() complete.Flags {
return c.Flags().Completions()
}
const status_unknown = "[ ] "
const status_ok = "\u001b[32m[ ok ]\u001b[0m "
const status_failed = "\u001b[31m[failed]\u001b[0m "
const status_warn = "\u001b[33m[ warn ]\u001b[0m "
const same_line = "\u001b[F"
func (c *OperatorDiagnoseCommand) Run(args []string) int {
f := c.Flags()
if err := f.Parse(args); err != nil {
c.UI.Error(err.Error())
return 1
}
return c.RunWithParsedFlags()
}
func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int {
if len(c.flagConfigs) == 0 {
c.UI.Error("Must specify a configuration file using -config.")
return 1
}
c.UI.Output(version.GetVersion().FullVersionNumber(true))
server := &ServerCommand{
// TODO: set up a different one?
// In particular, a UI instance that won't output?
BaseCommand: c.BaseCommand,
// TODO: refactor to a common place?
AuditBackends: auditBackends,
CredentialBackends: credentialBackends,
LogicalBackends: logicalBackends,
PhysicalBackends: physicalBackends,
ServiceRegistrations: serviceRegistrations,
// TODO: other ServerCommand options?
logger: log.NewInterceptLogger(nil),
allLoggers: []log.Logger{},
}
phase := "Parse configuration"
c.UI.Output(status_unknown + phase)
server.flagConfigs = c.flagConfigs
config, err := server.parseConfig()
if err != nil {
c.UI.Output(same_line + status_failed + phase)
c.UI.Output("Error while reading configuration files:")
c.UI.Output(err.Error())
return 1
}
// Errors in these items could stop Vault from starting but are not yet covered:
// TODO: logging configuration
// TODO: SetupTelemetry
// TODO: check for storage backend
c.UI.Output(same_line + status_ok + phase)
phase = "Access storage"
c.UI.Output(status_unknown + phase)
_, err = server.setupStorage(config)
if err != nil {
c.UI.Output(same_line + status_failed + phase)
c.UI.Output(err.Error())
return 1
}
c.UI.Output(same_line + status_ok + phase)
return 0
}

View file

@ -100,6 +100,8 @@ type ServerCommand struct {
startedCh chan (struct{}) // for tests
reloadedCh chan (struct{}) // for tests
allLoggers []log.Logger
// new stuff
flagConfigs []string
flagLogLevel string
@ -126,6 +128,7 @@ type ServerCommand struct {
flagTestServerConfig bool
flagDevConsul bool
flagExitOnCoreShutdown bool
flagDiagnose string
}
func (c *ServerCommand) Synopsis() string {
@ -210,6 +213,19 @@ func (c *ServerCommand) Flags() *FlagSets {
"Using a recovery operation token, \"sys/raw\" API can be used to manipulate the storage.",
})
// Disabled by default until functional
if os.Getenv(OperatorDiagnoseEnableEnv) != "" {
f.StringVar(&StringVar{
Name: "diagnose",
Target: &c.flagDiagnose,
Default: notSetValue,
Usage: "Run diagnostics before starting Vault. Specify a filename to direct output to that file.",
})
} else {
// Ensure diagnose is *not* run when feature flag is off.
c.flagDiagnose = notSetValue
}
f = set.NewFlagSet("Dev Options")
f.BoolVar(&BoolVar{
@ -812,6 +828,49 @@ func (q quiescenceSink) Accept(name string, level log.Level, msg string, args ..
q.t.Reset(100 * time.Millisecond)
}
func (c *ServerCommand) setupStorage(config *server.Config) (physical.Backend, error) {
// Ensure that a backend is provided
if config.Storage == nil {
return nil, fmt.Errorf("A storage backend must be specified")
}
// Initialize the backend
factory, exists := c.PhysicalBackends[config.Storage.Type]
if !exists {
return nil, fmt.Errorf("Unknown storage type %s", config.Storage.Type)
}
// Do any custom configuration needed per backend
switch config.Storage.Type {
case storageTypeConsul:
if config.ServiceRegistration == nil {
// If Consul is configured for storage and service registration is unconfigured,
// use Consul for service registration without requiring additional configuration.
// This maintains backward-compatibility.
config.ServiceRegistration = &server.ServiceRegistration{
Type: "consul",
Config: config.Storage.Config,
}
}
case storageTypeRaft:
if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" {
config.ClusterAddr = envCA
}
if len(config.ClusterAddr) == 0 {
return nil, fmt.Errorf("Cluster address must be set when using raft storage")
}
}
namedStorageLogger := c.logger.Named("storage." + config.Storage.Type)
c.allLoggers = append(c.allLoggers, namedStorageLogger)
backend, err := factory(config.Storage.Config, namedStorageLogger)
if err != nil {
return nil, fmt.Errorf("Error initializing storage of type %s: %w", config.Storage.Type, err)
}
return backend, nil
}
func (c *ServerCommand) Run(args []string) int {
f := c.Flags()
@ -843,6 +902,21 @@ func (c *ServerCommand) Run(args []string) int {
}
}
if c.flagDiagnose != notSetValue {
if c.flagDev {
c.UI.Error("Cannot run diagnose on Vault in dev mode.")
return 1
}
// TODO: add a file output flag to Diagnose
diagnose := &OperatorDiagnoseCommand{
BaseCommand: c.BaseCommand,
flagDebug: false,
flagSkips: []string{},
flagConfigs: c.flagConfigs,
}
diagnose.RunWithParsedFlags()
}
// Load the configuration
var config *server.Config
var err error
@ -918,7 +992,7 @@ func (c *ServerCommand) Run(args []string) int {
// Ensure logging is flushed if initialization fails
defer c.flushLog()
allLoggers := []log.Logger{c.logger}
c.allLoggers = []log.Logger{c.logger}
logLevelStr, err := c.adjustLogLevel(config, logLevelWasNotSet)
if err != nil {
@ -931,7 +1005,7 @@ func (c *ServerCommand) Run(args []string) int {
// create GRPC logger
namedGRPCLogFaker := c.logger.Named("grpclogfaker")
allLoggers = append(allLoggers, namedGRPCLogFaker)
c.allLoggers = append(c.allLoggers, namedGRPCLogFaker)
grpclog.SetLogger(&grpclogFaker{
logger: namedGRPCLogFaker,
log: os.Getenv("VAULT_GRPC_LOGGING") != "",
@ -941,12 +1015,6 @@ func (c *ServerCommand) Run(args []string) int {
c.startMemProfiler()
}
// Ensure that a backend is provided
if config.Storage == nil {
c.UI.Output("A storage backend must be specified")
return 1
}
if config.DefaultMaxRequestDuration != 0 {
vault.DefaultMaxRequestDuration = config.DefaultMaxRequestDuration
}
@ -988,44 +1056,15 @@ func (c *ServerCommand) Run(args []string) int {
}
metricsHelper := metricsutil.NewMetricsHelper(inmemMetrics, prometheusEnabled)
// Initialize the backend
factory, exists := c.PhysicalBackends[config.Storage.Type]
if !exists {
c.UI.Error(fmt.Sprintf("Unknown storage type %s", config.Storage.Type))
return 1
}
// Do any custom configuration needed per backend
switch config.Storage.Type {
case storageTypeConsul:
if config.ServiceRegistration == nil {
// If Consul is configured for storage and service registration is unconfigured,
// use Consul for service registration without requiring additional configuration.
// This maintains backward-compatibility.
config.ServiceRegistration = &server.ServiceRegistration{
Type: "consul",
Config: config.Storage.Config,
}
}
case storageTypeRaft:
if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" {
config.ClusterAddr = envCA
}
if len(config.ClusterAddr) == 0 {
c.UI.Error("Cluster address must be set when using raft storage")
return 1
}
}
namedStorageLogger := c.logger.Named("storage." + config.Storage.Type)
allLoggers = append(allLoggers, namedStorageLogger)
backend, err := factory(config.Storage.Config, namedStorageLogger)
// Initialize the storage backend
backend, err := c.setupStorage(config)
if err != nil {
c.UI.Error(fmt.Sprintf("Error initializing storage of type %s: %s", config.Storage.Type, err))
c.UI.Error(err.Error())
return 1
}
// Prevent server startup if migration is active
// TODO: how to incorporate this check into Diagnose?
if c.storageMigrationActive(backend) {
return 1
}
@ -1040,7 +1079,7 @@ func (c *ServerCommand) Run(args []string) int {
}
namedSDLogger := c.logger.Named("service_registration." + config.ServiceRegistration.Type)
allLoggers = append(allLoggers, namedSDLogger)
c.allLoggers = append(c.allLoggers, namedSDLogger)
// Since we haven't even begun starting Vault's core yet,
// we know that Vault is in its pre-running state.
@ -1094,7 +1133,7 @@ func (c *ServerCommand) Run(args []string) int {
var seal vault.Seal
sealLogger := c.logger.ResetNamed(fmt.Sprintf("seal.%s", sealType))
allLoggers = append(allLoggers, sealLogger)
c.allLoggers = append(c.allLoggers, sealLogger)
defaultSeal := vault.NewDefaultSeal(&vaultseal.Access{
Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{
Logger: c.logger.Named("shamir"),
@ -1180,7 +1219,7 @@ func (c *ServerCommand) Run(args []string) int {
DisableSealWrap: config.DisableSealWrap,
DisablePerformanceStandby: config.DisablePerformanceStandby,
DisableIndexing: config.DisableIndexing,
AllLoggers: allLoggers,
AllLoggers: c.allLoggers,
BuiltinRegistry: builtinplugins.Registry,
DisableKeyEncodingChecks: config.DisablePrintableCheck,
MetricsHelper: metricsHelper,
@ -1237,7 +1276,7 @@ func (c *ServerCommand) Run(args []string) int {
}
namedHALogger := c.logger.Named("ha." + config.HAStorage.Type)
allLoggers = append(allLoggers, namedHALogger)
c.allLoggers = append(c.allLoggers, namedHALogger)
habackend, err := factory(config.HAStorage.Config, namedHALogger)
if err != nil {
c.UI.Error(fmt.Sprintf(