mirror of
https://github.com/hashicorp/vault.git
synced 2026-05-28 04:10:44 -04:00
vault operator diagnose stub command (#10819)
* Stub "operator diagnose" command. * Parse configuration files. * Refactor storage setup to call from diagnose. * Add the ability to run Diagnose as a prequel to server start.
This commit is contained in:
parent
4038b0bbcd
commit
7a66647ca6
3 changed files with 253 additions and 45 deletions
|
|
@ -686,6 +686,15 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) {
|
|||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
// Disabled by default until functional
|
||||
if os.Getenv(OperatorDiagnoseEnableEnv) != "" {
|
||||
Commands["operator diagnose"] = func() (cli.Command, error) {
|
||||
return &OperatorDiagnoseCommand{
|
||||
BaseCommand: getBaseCommand(),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MakeShutdownCh returns a channel that can be used for shutdown
|
||||
|
|
|
|||
160
command/operator_diagnose.go
Normal file
160
command/operator_diagnose.go
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/vault/sdk/version"
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/posener/complete"
|
||||
)
|
||||
|
||||
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
|
||||
|
||||
var _ cli.Command = (*OperatorDiagnoseCommand)(nil)
|
||||
var _ cli.CommandAutocomplete = (*OperatorDiagnoseCommand)(nil)
|
||||
|
||||
type OperatorDiagnoseCommand struct {
|
||||
*BaseCommand
|
||||
|
||||
flagDebug bool
|
||||
flagSkips []string
|
||||
flagConfigs []string
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) Synopsis() string {
|
||||
return "Troubleshoot problems starting Vault"
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) Help() string {
|
||||
helpText := `
|
||||
Usage: vault operator diagnose
|
||||
|
||||
This command troubleshoots Vault startup issues, such as TLS configuration or
|
||||
auto-unseal. It should be run using the same environment variables and configuration
|
||||
files as the "vault server" command, so that startup problems can be accurately
|
||||
reproduced.
|
||||
|
||||
Start diagnose with a configuration file:
|
||||
|
||||
$ vault operator diagnose -config=/etc/vault/config.hcl
|
||||
|
||||
Perform a diagnostic check while Vault is still running:
|
||||
|
||||
$ vault operator diagnose -config=/etc/vault/config.hcl -skip=listener
|
||||
|
||||
` + c.Flags().Help()
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) Flags() *FlagSets {
|
||||
set := NewFlagSets(c.UI)
|
||||
f := set.NewFlagSet("Command Options")
|
||||
|
||||
f.StringSliceVar(&StringSliceVar{
|
||||
Name: "config",
|
||||
Target: &c.flagConfigs,
|
||||
Completion: complete.PredictOr(
|
||||
complete.PredictFiles("*.hcl"),
|
||||
complete.PredictFiles("*.json"),
|
||||
complete.PredictDirs("*"),
|
||||
),
|
||||
Usage: "Path to a Vault configuration file or directory of configuration " +
|
||||
"files. This flag can be specified multiple times to load multiple " +
|
||||
"configurations. If the path is a directory, all files which end in " +
|
||||
".hcl or .json are loaded.",
|
||||
})
|
||||
|
||||
f.StringSliceVar(&StringSliceVar{
|
||||
Name: "skip",
|
||||
Target: &c.flagSkips,
|
||||
Usage: "Skip the health checks named as arguments. May be 'listener', 'storage', or 'autounseal'.",
|
||||
})
|
||||
|
||||
f.BoolVar(&BoolVar{
|
||||
Name: "debug",
|
||||
Target: &c.flagDebug,
|
||||
Default: false,
|
||||
Usage: "Dump all information collected by Diagnose.",
|
||||
})
|
||||
return set
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) AutocompleteArgs() complete.Predictor {
|
||||
return complete.PredictNothing
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) AutocompleteFlags() complete.Flags {
|
||||
return c.Flags().Completions()
|
||||
}
|
||||
|
||||
const status_unknown = "[ ] "
|
||||
const status_ok = "\u001b[32m[ ok ]\u001b[0m "
|
||||
const status_failed = "\u001b[31m[failed]\u001b[0m "
|
||||
const status_warn = "\u001b[33m[ warn ]\u001b[0m "
|
||||
const same_line = "\u001b[F"
|
||||
|
||||
func (c *OperatorDiagnoseCommand) Run(args []string) int {
|
||||
f := c.Flags()
|
||||
if err := f.Parse(args); err != nil {
|
||||
c.UI.Error(err.Error())
|
||||
return 1
|
||||
}
|
||||
return c.RunWithParsedFlags()
|
||||
}
|
||||
|
||||
func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int {
|
||||
if len(c.flagConfigs) == 0 {
|
||||
c.UI.Error("Must specify a configuration file using -config.")
|
||||
return 1
|
||||
}
|
||||
|
||||
c.UI.Output(version.GetVersion().FullVersionNumber(true))
|
||||
|
||||
server := &ServerCommand{
|
||||
// TODO: set up a different one?
|
||||
// In particular, a UI instance that won't output?
|
||||
BaseCommand: c.BaseCommand,
|
||||
|
||||
// TODO: refactor to a common place?
|
||||
AuditBackends: auditBackends,
|
||||
CredentialBackends: credentialBackends,
|
||||
LogicalBackends: logicalBackends,
|
||||
PhysicalBackends: physicalBackends,
|
||||
ServiceRegistrations: serviceRegistrations,
|
||||
|
||||
// TODO: other ServerCommand options?
|
||||
|
||||
logger: log.NewInterceptLogger(nil),
|
||||
allLoggers: []log.Logger{},
|
||||
}
|
||||
|
||||
phase := "Parse configuration"
|
||||
c.UI.Output(status_unknown + phase)
|
||||
server.flagConfigs = c.flagConfigs
|
||||
config, err := server.parseConfig()
|
||||
if err != nil {
|
||||
c.UI.Output(same_line + status_failed + phase)
|
||||
c.UI.Output("Error while reading configuration files:")
|
||||
c.UI.Output(err.Error())
|
||||
return 1
|
||||
}
|
||||
|
||||
// Errors in these items could stop Vault from starting but are not yet covered:
|
||||
// TODO: logging configuration
|
||||
// TODO: SetupTelemetry
|
||||
// TODO: check for storage backend
|
||||
c.UI.Output(same_line + status_ok + phase)
|
||||
|
||||
phase = "Access storage"
|
||||
c.UI.Output(status_unknown + phase)
|
||||
_, err = server.setupStorage(config)
|
||||
if err != nil {
|
||||
c.UI.Output(same_line + status_failed + phase)
|
||||
c.UI.Output(err.Error())
|
||||
return 1
|
||||
}
|
||||
c.UI.Output(same_line + status_ok + phase)
|
||||
|
||||
return 0
|
||||
}
|
||||
|
|
@ -100,6 +100,8 @@ type ServerCommand struct {
|
|||
startedCh chan (struct{}) // for tests
|
||||
reloadedCh chan (struct{}) // for tests
|
||||
|
||||
allLoggers []log.Logger
|
||||
|
||||
// new stuff
|
||||
flagConfigs []string
|
||||
flagLogLevel string
|
||||
|
|
@ -126,6 +128,7 @@ type ServerCommand struct {
|
|||
flagTestServerConfig bool
|
||||
flagDevConsul bool
|
||||
flagExitOnCoreShutdown bool
|
||||
flagDiagnose string
|
||||
}
|
||||
|
||||
func (c *ServerCommand) Synopsis() string {
|
||||
|
|
@ -210,6 +213,19 @@ func (c *ServerCommand) Flags() *FlagSets {
|
|||
"Using a recovery operation token, \"sys/raw\" API can be used to manipulate the storage.",
|
||||
})
|
||||
|
||||
// Disabled by default until functional
|
||||
if os.Getenv(OperatorDiagnoseEnableEnv) != "" {
|
||||
f.StringVar(&StringVar{
|
||||
Name: "diagnose",
|
||||
Target: &c.flagDiagnose,
|
||||
Default: notSetValue,
|
||||
Usage: "Run diagnostics before starting Vault. Specify a filename to direct output to that file.",
|
||||
})
|
||||
} else {
|
||||
// Ensure diagnose is *not* run when feature flag is off.
|
||||
c.flagDiagnose = notSetValue
|
||||
}
|
||||
|
||||
f = set.NewFlagSet("Dev Options")
|
||||
|
||||
f.BoolVar(&BoolVar{
|
||||
|
|
@ -812,6 +828,49 @@ func (q quiescenceSink) Accept(name string, level log.Level, msg string, args ..
|
|||
q.t.Reset(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
func (c *ServerCommand) setupStorage(config *server.Config) (physical.Backend, error) {
|
||||
// Ensure that a backend is provided
|
||||
if config.Storage == nil {
|
||||
return nil, fmt.Errorf("A storage backend must be specified")
|
||||
}
|
||||
|
||||
// Initialize the backend
|
||||
factory, exists := c.PhysicalBackends[config.Storage.Type]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("Unknown storage type %s", config.Storage.Type)
|
||||
}
|
||||
|
||||
// Do any custom configuration needed per backend
|
||||
switch config.Storage.Type {
|
||||
case storageTypeConsul:
|
||||
if config.ServiceRegistration == nil {
|
||||
// If Consul is configured for storage and service registration is unconfigured,
|
||||
// use Consul for service registration without requiring additional configuration.
|
||||
// This maintains backward-compatibility.
|
||||
config.ServiceRegistration = &server.ServiceRegistration{
|
||||
Type: "consul",
|
||||
Config: config.Storage.Config,
|
||||
}
|
||||
}
|
||||
case storageTypeRaft:
|
||||
if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" {
|
||||
config.ClusterAddr = envCA
|
||||
}
|
||||
if len(config.ClusterAddr) == 0 {
|
||||
return nil, fmt.Errorf("Cluster address must be set when using raft storage")
|
||||
}
|
||||
}
|
||||
|
||||
namedStorageLogger := c.logger.Named("storage." + config.Storage.Type)
|
||||
c.allLoggers = append(c.allLoggers, namedStorageLogger)
|
||||
backend, err := factory(config.Storage.Config, namedStorageLogger)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error initializing storage of type %s: %w", config.Storage.Type, err)
|
||||
}
|
||||
|
||||
return backend, nil
|
||||
}
|
||||
|
||||
func (c *ServerCommand) Run(args []string) int {
|
||||
f := c.Flags()
|
||||
|
||||
|
|
@ -843,6 +902,21 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
}
|
||||
}
|
||||
|
||||
if c.flagDiagnose != notSetValue {
|
||||
if c.flagDev {
|
||||
c.UI.Error("Cannot run diagnose on Vault in dev mode.")
|
||||
return 1
|
||||
}
|
||||
// TODO: add a file output flag to Diagnose
|
||||
diagnose := &OperatorDiagnoseCommand{
|
||||
BaseCommand: c.BaseCommand,
|
||||
flagDebug: false,
|
||||
flagSkips: []string{},
|
||||
flagConfigs: c.flagConfigs,
|
||||
}
|
||||
diagnose.RunWithParsedFlags()
|
||||
}
|
||||
|
||||
// Load the configuration
|
||||
var config *server.Config
|
||||
var err error
|
||||
|
|
@ -918,7 +992,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
// Ensure logging is flushed if initialization fails
|
||||
defer c.flushLog()
|
||||
|
||||
allLoggers := []log.Logger{c.logger}
|
||||
c.allLoggers = []log.Logger{c.logger}
|
||||
|
||||
logLevelStr, err := c.adjustLogLevel(config, logLevelWasNotSet)
|
||||
if err != nil {
|
||||
|
|
@ -931,7 +1005,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
|
||||
// create GRPC logger
|
||||
namedGRPCLogFaker := c.logger.Named("grpclogfaker")
|
||||
allLoggers = append(allLoggers, namedGRPCLogFaker)
|
||||
c.allLoggers = append(c.allLoggers, namedGRPCLogFaker)
|
||||
grpclog.SetLogger(&grpclogFaker{
|
||||
logger: namedGRPCLogFaker,
|
||||
log: os.Getenv("VAULT_GRPC_LOGGING") != "",
|
||||
|
|
@ -941,12 +1015,6 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
c.startMemProfiler()
|
||||
}
|
||||
|
||||
// Ensure that a backend is provided
|
||||
if config.Storage == nil {
|
||||
c.UI.Output("A storage backend must be specified")
|
||||
return 1
|
||||
}
|
||||
|
||||
if config.DefaultMaxRequestDuration != 0 {
|
||||
vault.DefaultMaxRequestDuration = config.DefaultMaxRequestDuration
|
||||
}
|
||||
|
|
@ -988,44 +1056,15 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
}
|
||||
metricsHelper := metricsutil.NewMetricsHelper(inmemMetrics, prometheusEnabled)
|
||||
|
||||
// Initialize the backend
|
||||
factory, exists := c.PhysicalBackends[config.Storage.Type]
|
||||
if !exists {
|
||||
c.UI.Error(fmt.Sprintf("Unknown storage type %s", config.Storage.Type))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Do any custom configuration needed per backend
|
||||
switch config.Storage.Type {
|
||||
case storageTypeConsul:
|
||||
if config.ServiceRegistration == nil {
|
||||
// If Consul is configured for storage and service registration is unconfigured,
|
||||
// use Consul for service registration without requiring additional configuration.
|
||||
// This maintains backward-compatibility.
|
||||
config.ServiceRegistration = &server.ServiceRegistration{
|
||||
Type: "consul",
|
||||
Config: config.Storage.Config,
|
||||
}
|
||||
}
|
||||
case storageTypeRaft:
|
||||
if envCA := os.Getenv("VAULT_CLUSTER_ADDR"); envCA != "" {
|
||||
config.ClusterAddr = envCA
|
||||
}
|
||||
if len(config.ClusterAddr) == 0 {
|
||||
c.UI.Error("Cluster address must be set when using raft storage")
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
namedStorageLogger := c.logger.Named("storage." + config.Storage.Type)
|
||||
allLoggers = append(allLoggers, namedStorageLogger)
|
||||
backend, err := factory(config.Storage.Config, namedStorageLogger)
|
||||
// Initialize the storage backend
|
||||
backend, err := c.setupStorage(config)
|
||||
if err != nil {
|
||||
c.UI.Error(fmt.Sprintf("Error initializing storage of type %s: %s", config.Storage.Type, err))
|
||||
c.UI.Error(err.Error())
|
||||
return 1
|
||||
}
|
||||
|
||||
// Prevent server startup if migration is active
|
||||
// TODO: how to incorporate this check into Diagnose?
|
||||
if c.storageMigrationActive(backend) {
|
||||
return 1
|
||||
}
|
||||
|
|
@ -1040,7 +1079,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
}
|
||||
|
||||
namedSDLogger := c.logger.Named("service_registration." + config.ServiceRegistration.Type)
|
||||
allLoggers = append(allLoggers, namedSDLogger)
|
||||
c.allLoggers = append(c.allLoggers, namedSDLogger)
|
||||
|
||||
// Since we haven't even begun starting Vault's core yet,
|
||||
// we know that Vault is in its pre-running state.
|
||||
|
|
@ -1094,7 +1133,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
|
||||
var seal vault.Seal
|
||||
sealLogger := c.logger.ResetNamed(fmt.Sprintf("seal.%s", sealType))
|
||||
allLoggers = append(allLoggers, sealLogger)
|
||||
c.allLoggers = append(c.allLoggers, sealLogger)
|
||||
defaultSeal := vault.NewDefaultSeal(&vaultseal.Access{
|
||||
Wrapper: aeadwrapper.NewShamirWrapper(&wrapping.WrapperOptions{
|
||||
Logger: c.logger.Named("shamir"),
|
||||
|
|
@ -1180,7 +1219,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
DisableSealWrap: config.DisableSealWrap,
|
||||
DisablePerformanceStandby: config.DisablePerformanceStandby,
|
||||
DisableIndexing: config.DisableIndexing,
|
||||
AllLoggers: allLoggers,
|
||||
AllLoggers: c.allLoggers,
|
||||
BuiltinRegistry: builtinplugins.Registry,
|
||||
DisableKeyEncodingChecks: config.DisablePrintableCheck,
|
||||
MetricsHelper: metricsHelper,
|
||||
|
|
@ -1237,7 +1276,7 @@ func (c *ServerCommand) Run(args []string) int {
|
|||
}
|
||||
|
||||
namedHALogger := c.logger.Named("ha." + config.HAStorage.Type)
|
||||
allLoggers = append(allLoggers, namedHALogger)
|
||||
c.allLoggers = append(c.allLoggers, namedHALogger)
|
||||
habackend, err := factory(config.HAStorage.Config, namedHALogger)
|
||||
if err != nil {
|
||||
c.UI.Error(fmt.Sprintf(
|
||||
|
|
|
|||
Loading…
Reference in a new issue