diff --git a/build/dependencies.yaml b/build/dependencies.yaml
index 5d083073566..ad9894c4313 100644
--- a/build/dependencies.yaml
+++ b/build/dependencies.yaml
@@ -109,23 +109,12 @@ dependencies:
match: registry.k8s.io/e2e-test-images/agnhost:\d+\.\d+\.\d+
- name: "node-problem-detector"
- version: 1.34.0
+ version: 1.35.2
refPaths:
- path: test/e2e_node/image_list.go
match: const defaultImage
- path: test/kubemark/resources/hollow-node_template.yaml
match: registry.k8s.io/node-problem-detector/node-problem-detector
- - path: cluster/addons/node-problem-detector/npd.yaml
- match: registry.k8s.io/node-problem-detector/node-problem-detector
- - path: cluster/addons/node-problem-detector/npd.yaml
- match: app.kubernetes.io/version
- # TODO(dims): Ensure newer versions get uploaded to
- # - https://console.cloud.google.com/storage/browser/gke-release/winnode/node-problem-detector
- # - https://gcsweb.k8s.io/gcs/kubernetes-release/node-problem-detector/
- # and then the following references get fixed.
- #
- - path: cluster/gce/gci/configure.sh
- match: DEFAULT_NPD_VERSION=
#- path: cluster/gce/windows/k8s-node-setup.psm1
# match: DEFAULT_NPD_VERSION
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml
index bffda046ab6..611bd3ea8fc 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml
@@ -267,14 +267,6 @@ data:
tag kubelet
-
- @type systemd
- filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
- pos_file /var/log/gcp-journald-node-problem-detector.pos
- read_from_head true
- tag node-problem-detector
-
-
# BEGIN_NODE_JOURNAL
# Whether to include node-journal or not is determined when starting the
# cluster. It is not changed when the cluster is already running.
@@ -289,7 +281,7 @@ data:
@type grep
key _SYSTEMD_UNIT
- pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$
+ pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet)\.service$
# END_NODE_JOURNAL
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml
index 549fef75a54..44292f1d5c7 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml
@@ -282,14 +282,6 @@ data:
tag kubelet
-
- @type systemd
- filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
- pos_file /var/log/gcp-journald-node-problem-detector.pos
- read_from_head true
- tag node-problem-detector
-
-
# BEGIN_NODE_JOURNAL
# Whether to include node-journal or not is determined when starting the
# cluster. It is not changed when the cluster is already running.
@@ -304,7 +296,7 @@ data:
@type grep
key _SYSTEMD_UNIT
- pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$
+ pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet)\.service$
# END_NODE_JOURNAL
diff --git a/cluster/addons/node-problem-detector/MAINTAINERS.md b/cluster/addons/node-problem-detector/MAINTAINERS.md
deleted file mode 100644
index d2fd3b65127..00000000000
--- a/cluster/addons/node-problem-detector/MAINTAINERS.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Maintainers
-
-Random-Liu
-wangzhen127
diff --git a/cluster/addons/node-problem-detector/OWNERS b/cluster/addons/node-problem-detector/OWNERS
deleted file mode 100644
index 66cec0a3619..00000000000
--- a/cluster/addons/node-problem-detector/OWNERS
+++ /dev/null
@@ -1,8 +0,0 @@
-# See the OWNERS docs at https://go.k8s.io/owners
-
-approvers:
- - Random-Liu
- - wangzhen127
-reviewers:
- - Random-Liu
- - wangzhen127
diff --git a/cluster/addons/node-problem-detector/README.md b/cluster/addons/node-problem-detector/README.md
deleted file mode 100644
index 220ab1315d0..00000000000
--- a/cluster/addons/node-problem-detector/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Node Problem Detector
-==============
-
-Node Problem Detector is a DaemonSet running on each node, detecting node
-problems.
-
-Learn more at: https://github.com/kubernetes/node-problem-detector
diff --git a/cluster/addons/node-problem-detector/kubelet-user-standalone/npd-binding.yaml b/cluster/addons/node-problem-detector/kubelet-user-standalone/npd-binding.yaml
deleted file mode 100644
index 3d34fef427c..00000000000
--- a/cluster/addons/node-problem-detector/kubelet-user-standalone/npd-binding.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: kubelet-user-npd-binding
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: system:node-problem-detector
-subjects:
-- apiGroup: rbac.authorization.k8s.io
- kind: User
- name: kubelet
diff --git a/cluster/addons/node-problem-detector/npd.yaml b/cluster/addons/node-problem-detector/npd.yaml
deleted file mode 100644
index 838106be1bf..00000000000
--- a/cluster/addons/node-problem-detector/npd.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: node-problem-detector
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: npd-binding
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: system:node-problem-detector
-subjects:
-- kind: ServiceAccount
- name: node-problem-detector
- namespace: kube-system
----
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
- name: node-problem-detector
- namespace: kube-system
- labels:
- app.kubernetes.io/name: node-problem-detector
- app.kubernetes.io/version: v1.34.0
- addonmanager.kubernetes.io/mode: Reconcile
-spec:
- selector:
- matchLabels:
- app.kubernetes.io/name: node-problem-detector
- app.kubernetes.io/version: v1.34.0
- template:
- metadata:
- labels:
- app.kubernetes.io/name: node-problem-detector
- app.kubernetes.io/version: v1.34.0
- spec:
- nodeSelectors:
- kubernetes.io/os: linux
- containers:
- - name: node-problem-detector
- image: registry.k8s.io/node-problem-detector/node-problem-detector:v1.34.0
- command:
- - "/bin/sh"
- - "-c"
- - "exec /node-problem-detector --logtostderr --config.system-log-monitor=/config/kernel-monitor.json,/config/systemd-monitor.json --config.custom-plugin-monitor=/config/kernel-monitor-counter.json,/config/systemd-monitor-counter.json --config.system-stats-monitor=/config/system-stats-monitor.json >>/var/log/node-problem-detector.log 2>&1"
- securityContext:
- privileged: true
- resources:
- limits:
- cpu: "200m"
- memory: "100Mi"
- requests:
- cpu: "20m"
- memory: "20Mi"
- env:
- - name: NODE_NAME
- valueFrom:
- fieldRef:
- fieldPath: spec.nodeName
- volumeMounts:
- - name: log
- mountPath: /var/log
- - name: kmsg
- mountPath: /dev/kmsg
- readOnly: true
- - name: localtime
- mountPath: /etc/localtime
- readOnly: true
- volumes:
- - name: log
- hostPath:
- path: /var/log/
- - name: kmsg
- hostPath:
- path: /dev/kmsg
- - name: localtime
- hostPath:
- path: /etc/localtime
- type: "FileOrCreate"
- serviceAccountName: node-problem-detector
- tolerations:
- - operator: "Exists"
- effect: "NoExecute"
- - operator: "Exists"
- effect: "NoSchedule"
- - key: "CriticalAddonsOnly"
- operator: "Exists"
diff --git a/cluster/addons/node-problem-detector/standalone/npd-binding.yaml b/cluster/addons/node-problem-detector/standalone/npd-binding.yaml
deleted file mode 100644
index d7d64a63684..00000000000
--- a/cluster/addons/node-problem-detector/standalone/npd-binding.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: npd-binding
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: system:node-problem-detector
-subjects:
-- apiGroup: rbac.authorization.k8s.io
- kind: User
- name: system:node-problem-detector
diff --git a/cluster/gce/config-common.sh b/cluster/gce/config-common.sh
index b1d715f3c26..c61a3f2fdc1 100644
--- a/cluster/gce/config-common.sh
+++ b/cluster/gce/config-common.sh
@@ -151,8 +151,6 @@ export WINDOWS_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubelet.kubeconfig"
export WINDOWS_BOOTSTRAP_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubelet.bootstrap-kubeconfig"
# Path for kube-proxy kubeconfig file on Windows nodes.
export WINDOWS_KUBEPROXY_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubeproxy.kubeconfig"
-# Path for kube-proxy kubeconfig file on Windows nodes.
-export WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\node-problem-detector.kubeconfig"
# Pause container image for Windows container.
export WINDOWS_INFRA_CONTAINER="registry.k8s.io/pause:3.10.2"
# Storage Path for csi-proxy. csi-proxy only needs to be installed for Windows.
diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh
index c742e9b1671..3413397f7d4 100755
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@@ -289,16 +289,6 @@ export DNS_MEMORY_LIMIT="${KUBE_DNS_MEMORY_LIMIT:-170Mi}"
# Optional: Enable DNS horizontal autoscaler
export ENABLE_DNS_HORIZONTAL_AUTOSCALER="${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER:-true}"
-# Optional: Install node problem detector.
-# none - Not run node problem detector.
-# daemonset - Run node problem detector as daemonset.
-# standalone - Run node problem detector as standalone system daemon.
-export ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}"
-NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
-NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
-NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
-NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
-
CNI_HASH="${CNI_HASH:-}"
CNI_TAR_PREFIX="${CNI_TAR_PREFIX:-cni-plugins-linux-amd64-}"
CNI_STORAGE_URL_BASE="${CNI_STORAGE_URL_BASE:-https://github.com/containernetworking/plugins/releases/download}"
@@ -526,11 +516,6 @@ export WINDOWS_ENABLE_PIGZ="${WINDOWS_ENABLE_PIGZ:-true}"
# Enable Windows DSR (Direct Server Return)
export WINDOWS_ENABLE_DSR="${WINDOWS_ENABLE_DSR:-false}"
-# Install Node Problem Detector (NPD) on Windows nodes.
-# NPD analyzes the host for problems that can disrupt workloads.
-export WINDOWS_ENABLE_NODE_PROBLEM_DETECTOR="${WINDOWS_ENABLE_NODE_PROBLEM_DETECTOR:-none}"
-export WINDOWS_NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${WINDOWS_NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
-
# Enable Windows Hyper-V
# sig-storage uses it to create Virtual Hard Disks in tests
export WINDOWS_ENABLE_HYPERV="${WINDOWS_ENABLE_HYPERV:-false}"
diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh
index f774f26a8fd..6ce5d6920c1 100755
--- a/cluster/gce/config-test.sh
+++ b/cluster/gce/config-test.sh
@@ -331,16 +331,6 @@ export DNS_MEMORY_LIMIT=${KUBE_DNS_MEMORY_LIMIT:-170Mi}
# Optional: Enable DNS horizontal autoscaler
export ENABLE_DNS_HORIZONTAL_AUTOSCALER=${KUBE_ENABLE_DNS_HORIZONTAL_AUTOSCALER:-true}
-# Optional: Install node problem detector.
-# none - Not run node problem detector.
-# daemonset - Run node problem detector as daemonset.
-# standalone - Run node problem detector as standalone system daemon.
-export ENABLE_NODE_PROBLEM_DETECTOR=${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}
-NODE_PROBLEM_DETECTOR_VERSION=${NODE_PROBLEM_DETECTOR_VERSION:-}
-NODE_PROBLEM_DETECTOR_TAR_HASH=${NODE_PROBLEM_DETECTOR_TAR_HASH:-}
-NODE_PROBLEM_DETECTOR_RELEASE_PATH=${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}
-NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS=${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}
-
CNI_HASH=${CNI_HASH:-}
CNI_TAR_PREFIX=${CNI_TAR_PREFIX:-cni-plugins-linux-amd64-}
CNI_STORAGE_URL_BASE=${CNI_STORAGE_URL_BASE:-https://github.com/containernetworking/plugins/releases/download}
@@ -578,11 +568,6 @@ export WINDOWS_ENABLE_PIGZ="${WINDOWS_ENABLE_PIGZ:-true}"
# Enable Windows DSR (Direct Server Return)
export WINDOWS_ENABLE_DSR="${WINDOWS_ENABLE_DSR:-false}"
-# Install Node Problem Detector (NPD) on Windows nodes.
-# NPD analyzes the host for problems that can disrupt workloads.
-export WINDOWS_ENABLE_NODE_PROBLEM_DETECTOR="${WINDOWS_ENABLE_NODE_PROBLEM_DETECTOR:-none}"
-export WINDOWS_NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${WINDOWS_NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
-
# TLS_CIPHER_SUITES defines cipher suites allowed to be used by kube-apiserver.
# If this variable is unset or empty, kube-apiserver will allow its default set of cipher suites.
export TLS_CIPHER_SUITES=""
diff --git a/cluster/gce/gci/audit_policy_test.go b/cluster/gce/gci/audit_policy_test.go
index bd2f528cc5a..06b895d988e 100644
--- a/cluster/gce/gci/audit_policy_test.go
+++ b/cluster/gce/gci/audit_policy_test.go
@@ -73,13 +73,11 @@ func TestCreateMasterAuditPolicy(t *testing.T) {
scheduler = newUserInfo(user.KubeScheduler, user.AllAuthenticated)
apiserver = newUserInfo(user.APIServerUser, user.SystemPrivilegedGroup)
autoscaler = newUserInfo("cluster-autoscaler", user.AllAuthenticated)
- npd = newUserInfo("system:node-problem-detector", user.AllAuthenticated)
- npdSA = serviceaccount.UserInfo("kube-system", "node-problem-detector", "")
namespaceController = serviceaccount.UserInfo("kube-system", "namespace-controller", "")
endpointController = serviceaccount.UserInfo("kube-system", "endpoint-controller", "")
defaultSA = serviceaccount.UserInfo("default", "default", "")
- allUsers = []user.Info{anonymous, kubeproxy, ingress, kubelet, node, controller, scheduler, apiserver, autoscaler, npd, npdSA, namespaceController, endpointController, defaultSA}
+ allUsers = []user.Info{anonymous, kubeproxy, ingress, kubelet, node, controller, scheduler, apiserver, autoscaler, namespaceController, endpointController, defaultSA}
)
// Resources for test cases
@@ -149,15 +147,15 @@ func TestCreateMasterAuditPolicy(t *testing.T) {
at.testResources(none, node, apiserver, defaultSA, anonymous, "get", "list", "create", "patch", "update", "delete", events)
- at.testResources(request, kubelet, node, npd, npdSA, "update", "patch", nodeStatus, podStatus)
+ at.testResources(request, kubelet, node, "update", "patch", nodeStatus, podStatus)
at.testResources(request, namespaceController, "deletecollection", pods, namespaces)
- at.testResources(metadata, defaultSA, anonymous, npd, namespaceController, "get", "create", "update", secrets, configmaps, sysConfigmaps, tokenReviews)
- at.testResources(request, defaultSA, anonymous, npd, namespaceController, "get", "list", "watch", sysEndpoints, podMetrics, pods, clusterRoles, deployments)
- at.testResources(response, defaultSA, anonymous, npd, namespaceController, "create", "update", "patch", "delete", sysEndpoints, podMetrics, pods, clusterRoles, deployments)
+ at.testResources(metadata, defaultSA, anonymous, namespaceController, "get", "create", "update", secrets, configmaps, sysConfigmaps, tokenReviews)
+ at.testResources(request, defaultSA, anonymous, namespaceController, "get", "list", "watch", sysEndpoints, podMetrics, pods, clusterRoles, deployments)
+ at.testResources(response, defaultSA, anonymous, namespaceController, "create", "update", "patch", "delete", sysEndpoints, podMetrics, pods, clusterRoles, deployments)
- at.testResources(metadata, defaultSA, anonymous, npd, namespaceController, "get", "list", "watch", "create", "update", "patch", "delete", foobars, foobarbaz)
+ at.testResources(metadata, defaultSA, anonymous, namespaceController, "get", "list", "watch", "create", "update", "patch", "delete", foobars, foobarbaz)
}
type auditTester struct {
diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh
index 505ef63ad34..645b11264ec 100755
--- a/cluster/gce/gci/configure-helper.sh
+++ b/cluster/gce/gci/configure-helper.sh
@@ -845,9 +845,6 @@ function create-master-auth {
if [[ -n "${KUBE_PROXY_TOKEN:-}" ]]; then
append_or_replace_prefixed_line "${known_tokens_csv}" "${KUBE_PROXY_TOKEN}," "system:kube-proxy,uid:kube_proxy"
fi
- if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then
- append_or_replace_prefixed_line "${known_tokens_csv}" "${NODE_PROBLEM_DETECTOR_TOKEN}," "system:node-problem-detector,uid:node-problem-detector"
- fi
if [[ -n "${GCE_GLBC_TOKEN:-}" ]]; then
append_or_replace_prefixed_line "${known_tokens_csv}" "${GCE_GLBC_TOKEN}," "system:controller:glbc,uid:system:controller:glbc"
fi
@@ -1239,7 +1236,7 @@ rules:
# node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes
- level: Request
- users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"]
+ users: ["kubelet"]
verbs: ["update","patch"]
resources:
- group: "" # core
@@ -1438,41 +1435,6 @@ ${SCHEDULER_POLICY_CONFIG}
EOF
}
-function create-node-problem-detector-kubeconfig {
- local apiserver_address="${1}"
- if [[ -z "${apiserver_address}" ]]; then
- echo "Must provide API server address to create node-problem-detector kubeconfig file!"
- exit 1
- fi
- echo "Creating node-problem-detector kubeconfig file"
- mkdir -p /var/lib/node-problem-detector
- cat </var/lib/node-problem-detector/kubeconfig
-apiVersion: v1
-kind: Config
-users:
-- name: node-problem-detector
- user:
- token: ${NODE_PROBLEM_DETECTOR_TOKEN}
-clusters:
-- name: local
- cluster:
- server: https://${apiserver_address}
- certificate-authority-data: ${CA_CERT}
-contexts:
-- context:
- cluster: local
- user: node-problem-detector
- name: service-account-context
-current-context: service-account-context
-EOF
-}
-
-function create-node-problem-detector-kubeconfig-from-kubelet {
- echo "Creating node-problem-detector kubeconfig from /var/lib/kubelet/kubeconfig"
- mkdir -p /var/lib/node-problem-detector
- cp /var/lib/kubelet/kubeconfig /var/lib/node-problem-detector/kubeconfig
-}
-
function create-master-etcd-auth {
if [[ -n "${ETCD_CA_CERT:-}" && -n "${ETCD_PEER_KEY:-}" && -n "${ETCD_PEER_CERT:-}" ]]; then
local -r auth_dir="/etc/srv/kubernetes"
@@ -1688,56 +1650,6 @@ EOF
systemctl start kubelet.service
}
-# This function assembles the node problem detector systemd service file and
-# starts it using systemctl.
-function start-node-problem-detector {
- echo "Start node problem detector"
- local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
- echo "Using node problem detector binary at ${npd_bin}"
-
- local flags="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
- if [[ -z "${flags}" ]]; then
- local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
- # TODO(random-liu): Handle this for alternative container runtime.
- local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
- local -r sm_config="${KUBE_HOME}/node-problem-detector/config/systemd-monitor.json"
- local -r ssm_config="${KUBE_HOME}/node-problem-detector/config/system-stats-monitor.json"
-
- local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json"
- local -r custom_sm_config="${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json"
-
- flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
- flags+=" --logtostderr"
- flags+=" --config.system-log-monitor=${km_config},${dm_config},${sm_config}"
- flags+=" --config.system-stats-monitor=${ssm_config}"
- flags+=" --config.custom-plugin-monitor=${custom_km_config},${custom_sm_config}"
- local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
- flags+=" --port=${npd_port}"
- if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
- flags+=" ${EXTRA_NPD_ARGS}"
- fi
- fi
- flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
-
- # Write the systemd service file for node problem detector.
- cat </etc/systemd/system/node-problem-detector.service
-[Unit]
-Description=Kubernetes node problem detector
-Requires=network-online.target
-After=network-online.target
-
-[Service]
-Restart=always
-RestartSec=10
-ExecStart=${npd_bin} ${flags}
-
-[Install]
-WantedBy=multi-user.target
-EOF
-
- systemctl start node-problem-detector.service
-}
-
# Create the log file and set its properties.
#
# $1 is the file to create.
@@ -2966,16 +2878,6 @@ function start-kube-addons {
update-event-exporter ${event_exporter_yaml}
update-prometheus-to-sd-parameters ${event_exporter_yaml}
fi
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "daemonset" ]]; then
- setup-addon-manifests "addons" "node-problem-detector"
- fi
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
- # Setup role binding(s) for standalone node problem detector.
- if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then
- setup-addon-manifests "addons" "node-problem-detector/standalone"
- fi
- setup-addon-manifests "addons" "node-problem-detector/kubelet-user-standalone" "node-problem-detector"
- fi
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
setup-addon-manifests "admission-controls" "limit-range" "gce"
fi
@@ -3615,16 +3517,6 @@ function main() {
log-wrap 'CreateNodePKI' create-node-pki
log-wrap 'CreateKubeletKubeconfig' create-kubelet-kubeconfig "${KUBERNETES_MASTER_NAME}"
log-wrap 'CreateKubeproxyUserKubeconfig' create-kubeproxy-user-kubeconfig
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
- if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then
- log-wrap 'CreateNodeProblemDetectorKubeconfig' create-node-problem-detector-kubeconfig "${KUBERNETES_MASTER_NAME}"
- elif [[ -f "/var/lib/kubelet/kubeconfig" ]]; then
- log-wrap 'CreateNodeProblemDetectorKubeconfigFromKubelet' create-node-problem-detector-kubeconfig-from-kubelet
- else
- echo "Either NODE_PROBLEM_DETECTOR_TOKEN or /var/lib/kubelet/kubeconfig must be set"
- exit 1
- fi
- fi
fi
log-wrap 'DetectCgroupConfig' detect-cgroup-config
@@ -3679,9 +3571,6 @@ function main() {
log-wrap 'UpdateLegacyAddonNodeLabels' update-legacy-addon-node-labels &
else
log-wrap 'StartKubeProxy' start-kube-proxy
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
- log-wrap 'StartNodeProblemDetector' start-node-problem-detector
- fi
fi
log-wrap 'ResetMotd' reset-motd
diff --git a/cluster/gce/gci/configure.sh b/cluster/gce/gci/configure.sh
index b61f85cdf72..451f62e31b4 100644
--- a/cluster/gce/gci/configure.sh
+++ b/cluster/gce/gci/configure.sh
@@ -27,9 +27,7 @@ set -o pipefail
DEFAULT_CNI_VERSION='v1.9.1'
# CNI HASH for amd64 sha512
DEFAULT_CNI_HASH='3ea8a76852b7ddc62c087a34cccca2cb29822ca24214928cd172b28bf9d1486000ba3eb71a156445af31ff6a92c1dc3e01e702546c6ee016ef13fae06ccfb8fc'
-DEFAULT_NPD_VERSION='v1.34.0'
-DEFAULT_NPD_HASH_AMD64='3c55ff6ffadd77dbc3df3774d13164587103ca87c8b6914f5c71c87d8f498b78621e0c96538bb3c69f8f1b4194a6da553aa56b1b52001a7d9a67776ac24e80bd'
-DEFAULT_NPD_HASH_ARM64='ca1d34e64b80f6b2bdf86cfde95154122d6e14c707a748ea6fc414a55f391b1bb572a96b6b2c285996af0232917fa87e14e037125aa03a62247383af3e48c095'
+
DEFAULT_CRICTL_VERSION='v1.36.0'
DEFAULT_CRICTL_AMD64_SHA512='43ac5425d264547bc9d9c9e31c74624d9c2a63bf7de4e77fe79517e0c927ea77ee3951a2f662920bc771599a0dc4f2859b6225c3621c7cafff952e63c83d686d'
DEFAULT_CRICTL_ARM64_SHA512='485aa86f327c23cb0508e814e568bda793d291865c5cec3337ae5467a51898e9ab21a6bd38b73a6b219058bb34c9b4e7128e57360a2552b74a552e7ea1936f32'
@@ -293,56 +291,6 @@ function install-gci-mounter-tools {
mkdir -p "${CONTAINERIZED_MOUNTER_HOME}/rootfs/var/lib/kubelet"
}
-# Install node problem detector binary.
-function install-node-problem-detector {
- if [[ -n "${NODE_PROBLEM_DETECTOR_VERSION:-}" ]]; then
- local -r npd_version="${NODE_PROBLEM_DETECTOR_VERSION}"
- local -r npd_hash="${NODE_PROBLEM_DETECTOR_TAR_HASH}"
- else
- local -r npd_version="${DEFAULT_NPD_VERSION}"
- case "${HOST_PLATFORM}/${HOST_ARCH}" in
- linux/amd64)
- local -r npd_hash="${DEFAULT_NPD_HASH_AMD64}"
- ;;
- linux/arm64)
- local -r npd_hash="${DEFAULT_NPD_HASH_ARM64}"
- ;;
- # no other architectures are supported currently.
- # Assumption is that this script only runs on linux,
- # see cluster/gce/windows/k8s-node-setup.psm1 for windows
- # https://github.com/kubernetes/node-problem-detector/releases/
- *)
- echo "Unrecognized version and platform/arch combination:"
- echo "$DEFAULT_NPD_VERSION $HOST_PLATFORM/$HOST_ARCH"
- echo "Set NODE_PROBLEM_DETECTOR_VERSION and NODE_PROBLEM_DETECTOR_TAR_HASH to overwrite"
- exit 1
- ;;
- esac
- fi
- local -r npd_tar="node-problem-detector-${npd_version}-${HOST_PLATFORM}_${HOST_ARCH}.tar.gz"
-
- if is-preloaded "${npd_tar}" "${npd_hash}"; then
- echo "${npd_tar} is preloaded."
- return
- fi
-
- if [[ -n "${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}" ]]; then
- echo "Downloading ${npd_tar} from ${NODE_PROBLEM_DETECTOR_RELEASE_PATH}."
- local -r download_path="${NODE_PROBLEM_DETECTOR_RELEASE_PATH}/node-problem-detector/${npd_tar}"
- else
- echo "Downloading ${npd_tar} from github."
- local -r download_path="https://github.com/kubernetes/node-problem-detector/releases/download/${npd_version}/${npd_tar}"
- fi
- download-or-bust "${npd_hash}" "${download_path}"
- local -r npd_dir="${KUBE_HOME}/node-problem-detector"
- mkdir -p "${npd_dir}"
- tar xzf "${KUBE_HOME}/${npd_tar}" -C "${npd_dir}" --overwrite
- mv "${npd_dir}/bin"/* "${KUBE_BIN}"
- chmod a+x "${KUBE_BIN}/node-problem-detector"
- rmdir "${npd_dir}/bin"
- rm -f "${KUBE_HOME}/${npd_tar}"
-}
-
function install-cni-binaries {
local -r cni_version=${CNI_VERSION:-$DEFAULT_CNI_VERSION}
if [[ -n "${CNI_VERSION:-}" ]]; then
@@ -795,11 +743,6 @@ function install-kube-binary-config {
mv "${KUBE_HOME}/kubernetes/kubernetes-src.tar.gz" "${KUBE_HOME}"
fi
- if [[ "${KUBERNETES_MASTER:-}" == "false" ]] && \
- [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
- log-wrap "InstallNodeProblemDetector" install-node-problem-detector
- fi
-
if [[ "${NETWORK_PROVIDER:-}" == "kubenet" ]] || \
[[ "${NETWORK_PROVIDER:-}" == "cni" ]]; then
log-wrap "InstallCNIBinaries" install-cni-binaries
diff --git a/cluster/gce/upgrade.sh b/cluster/gce/upgrade.sh
index 4641ed6b24d..27b19bd3a71 100755
--- a/cluster/gce/upgrade.sh
+++ b/cluster/gce/upgrade.sh
@@ -114,11 +114,6 @@ function upgrade-master() {
function upgrade-master-env() {
echo "== Upgrading master environment variables. =="
- # Generate the node problem detector token if it isn't present on the original
- # master.
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
- NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
- fi
}
function wait-for-master() {
@@ -193,7 +188,6 @@ function get-node-os() {
#
# Vars set:
# KUBE_PROXY_TOKEN
-# NODE_PROBLEM_DETECTOR_TOKEN
# CA_CERT_BASE64
# EXTRA_DOCKER_OPTS
# KUBELET_CERT_BASE64
@@ -232,7 +226,6 @@ function setup-base-image() {
# SANITIZED_VERSION
# INSTANCE_GROUPS
# KUBE_PROXY_TOKEN
-# NODE_PROBLEM_DETECTOR_TOKEN
# CA_CERT_BASE64
# EXTRA_DOCKER_OPTS
# KUBELET_CERT_BASE64
@@ -257,8 +250,6 @@ function prepare-node-upgrade() {
node_env=$(get-node-env)
KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
export KUBE_PROXY_TOKEN
- NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${node_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
- export NODE_PROBLEM_DETECTOR_TOKEN
CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
export CA_CERT_BASE64
EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
@@ -286,14 +277,6 @@ function prepare-node-upgrade() {
function upgrade-node-env() {
echo "== Upgrading node environment variables. =="
- # Get the node problem detector token from master if it isn't present on
- # the original node.
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
- detect-master
- local master_env
- master_env=$(get-master-env)
- NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
- fi
}
# Upgrades a single node.
diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh
index 5ae35b3009d..f183436e222 100755
--- a/cluster/gce/util.sh
+++ b/cluster/gce/util.sh
@@ -1128,11 +1128,6 @@ DOCKER_REGISTRY_MIRROR_URL: $(yaml-quote "${DOCKER_REGISTRY_MIRROR_URL:-}")
ENABLE_L7_LOADBALANCING: $(yaml-quote "${ENABLE_L7_LOADBALANCING:-none}")
ENABLE_CLUSTER_LOGGING: $(yaml-quote "${ENABLE_CLUSTER_LOGGING:-false}")
ENABLE_AUTH_PROVIDER_GCP: $(yaml-quote "${ENABLE_AUTH_PROVIDER_GCP:-true}")
-ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote "${ENABLE_NODE_PROBLEM_DETECTOR:-none}")
-NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote "${NODE_PROBLEM_DETECTOR_VERSION:-}")
-NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote "${NODE_PROBLEM_DETECTOR_TAR_HASH:-}")
-NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote "${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}")
-NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote "${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}")
CNI_STORAGE_URL_BASE: $(yaml-quote "${CNI_STORAGE_URL_BASE:-}")
CNI_TAR_PREFIX: $(yaml-quote "${CNI_TAR_PREFIX:-}")
CNI_VERSION: $(yaml-quote "${CNI_VERSION:-}")
@@ -1151,7 +1146,6 @@ ENABLE_DNS_HORIZONTAL_AUTOSCALER: $(yaml-quote "${ENABLE_DNS_HORIZONTAL_AUTOSCAL
KUBE_PROXY_TOKEN: $(yaml-quote "${KUBE_PROXY_TOKEN:-}")
KUBE_PROXY_MODE: $(yaml-quote "${KUBE_PROXY_MODE:-iptables}")
DETECT_LOCAL_MODE: $(yaml-quote "${DETECT_LOCAL_MODE:-}")
-NODE_PROBLEM_DETECTOR_TOKEN: $(yaml-quote "${NODE_PROBLEM_DETECTOR_TOKEN:-}")
ADMISSION_CONTROL: $(yaml-quote "${ADMISSION_CONTROL:-}")
MASTER_IP_RANGE: $(yaml-quote "${MASTER_IP_RANGE}")
RUNTIME_CONFIG: $(yaml-quote "${RUNTIME_CONFIG}")
@@ -1615,13 +1609,6 @@ WINDOWS_INFRA_CONTAINER: $(yaml-quote "${WINDOWS_INFRA_CONTAINER}")
WINDOWS_ENABLE_PIGZ: $(yaml-quote "${WINDOWS_ENABLE_PIGZ}")
WINDOWS_ENABLE_HYPERV: $(yaml-quote "${WINDOWS_ENABLE_HYPERV}")
ENABLE_AUTH_PROVIDER_GCP: $(yaml-quote "${ENABLE_AUTH_PROVIDER_GCP}")
-ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote "${WINDOWS_ENABLE_NODE_PROBLEM_DETECTOR}")
-NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote "${NODE_PROBLEM_DETECTOR_VERSION}")
-NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote "${NODE_PROBLEM_DETECTOR_TAR_HASH}")
-NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote "${NODE_PROBLEM_DETECTOR_RELEASE_PATH}")
-NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote "${WINDOWS_NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS}")
-NODE_PROBLEM_DETECTOR_TOKEN: $(yaml-quote "${NODE_PROBLEM_DETECTOR_TOKEN:-}")
-WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE: $(yaml-quote "${WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE}")
AUTH_PROVIDER_GCP_STORAGE_PATH: $(yaml-quote "${AUTH_PROVIDER_GCP_STORAGE_PATH}")
AUTH_PROVIDER_GCP_VERSION: $(yaml-quote "${AUTH_PROVIDER_GCP_VERSION}")
AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64: $(yaml-quote "${AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64}")
@@ -2080,7 +2067,6 @@ function parse-master-env() {
local master_env
master_env=$(get-master-env)
KUBE_PROXY_TOKEN=$(get-env-val "${master_env}" "KUBE_PROXY_TOKEN")
- NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
CA_CERT_BASE64=$(get-env-val "${master_env}" "CA_CERT")
CA_KEY_BASE64=$(get-env-val "${master_env}" "CA_KEY")
KUBEAPISERVER_CERT_BASE64=$(get-env-val "${master_env}" "KUBEAPISERVER_CERT")
@@ -2903,9 +2889,6 @@ function create-master() {
# computer) can forget it later. This should disappear with
# http://issue.k8s.io/3168
KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
- if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
- NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
- fi
# Reserve the master's IP so that it can later be transferred to another VM
# without disrupting the kubelets.
diff --git a/cluster/gce/windows/configure.ps1 b/cluster/gce/windows/configure.ps1
index adb5749635f..3d52bcd3af3 100644
--- a/cluster/gce/windows/configure.ps1
+++ b/cluster/gce/windows/configure.ps1
@@ -163,14 +163,12 @@ try {
Configure-Crictl
Setup-ContainerRuntime
DownloadAndInstall-KubernetesBinaries
- DownloadAndInstall-NodeProblemDetector
DownloadAndInstall-CSIProxyBinaries
DownloadAndInstall-AuthProviderGcpBinary
Start-CSIProxy
Create-NodePki
Create-KubeletKubeconfig
Create-KubeproxyKubeconfig
- Create-NodeProblemDetectorKubeConfig
Create-AuthProviderGcpConfig
Set-PodCidr
Configure-HostNetworkingService
@@ -178,7 +176,6 @@ try {
Configure-HostDnsConf
Configure-GcePdTools
Configure-Kubelet
- Configure-NodeProblemDetector
# Even if Logging agent is already installed, the function will still [re]start the service.
if (IsLoggingEnabled $kube_env) {
diff --git a/cluster/gce/windows/k8s-node-setup.psm1 b/cluster/gce/windows/k8s-node-setup.psm1
index efcfa47b204..42d830dd4d7 100644
--- a/cluster/gce/windows/k8s-node-setup.psm1
+++ b/cluster/gce/windows/k8s-node-setup.psm1
@@ -295,8 +295,6 @@ function Set-EnvironmentVars {
"INFRA_CONTAINER" = ${kube_env}['WINDOWS_INFRA_CONTAINER']
"WINDOWS_ENABLE_PIGZ" = ${kube_env}['WINDOWS_ENABLE_PIGZ']
"WINDOWS_ENABLE_HYPERV" = ${kube_env}['WINDOWS_ENABLE_HYPERV']
- "ENABLE_NODE_PROBLEM_DETECTOR" = ${kube_env}['ENABLE_NODE_PROBLEM_DETECTOR']
- "NODEPROBLEMDETECTOR_KUBECONFIG_FILE" = ${kube_env}['WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE']
"ENABLE_AUTH_PROVIDER_GCP" = ${kube_env}['ENABLE_AUTH_PROVIDER_GCP']
"AUTH_PROVIDER_GCP_STORAGE_PATH" = ${kube_env}['AUTH_PROVIDER_GCP_STORAGE_PATH']
"AUTH_PROVIDER_GCP_VERSION" = ${kube_env}['AUTH_PROVIDER_GCP_VERSION']
@@ -1484,140 +1482,6 @@ function Install-Pigz {
}
}
-# Node Problem Detector Resources
-$NPD_SERVICE = "node-problem-detector"
-$DEFAULT_NPD_VERSION = '0.8.10-gke0.1'
-$DEFAULT_NPD_RELEASE_PATH = 'https://storage.googleapis.com/gke-release/winnode'
-$DEFAULT_NPD_HASH = '97ddfe3544da9e02a1cfb55d24f329eb29d606fca7fbbf800415d5de9dbc29a00563f8e0d1919595c8e316fd989d45b09b13c07be528841fc5fd37e21d016a2d'
-
-# Install Node Problem Detector (NPD).
-# NPD analyzes the host for problems that can disrupt workloads.
-# https://github.com/kubernetes/node-problem-detector
-function DownloadAndInstall-NodeProblemDetector {
- if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone") {
- if (ShouldWrite-File "${env:NODE_DIR}\node-problem-detector.exe") {
- $npd_version = $DEFAULT_NPD_VERSION
- $npd_hash = $DEFAULT_NPD_HASH
- if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_VERSION'])) {
- $npd_version = ${kube_env}['NODE_PROBLEM_DETECTOR_VERSION']
- $npd_hash = ${kube_env}['NODE_PROBLEM_DETECTOR_TAR_HASH']
- }
- $npd_release_path = $DEFAULT_NPD_RELEASE_PATH
- if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH'])) {
- $npd_release_path = ${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH']
- }
-
- $npd_tar = "node-problem-detector-v${npd_version}-windows_amd64.tar.gz"
-
- Log-Output "Downloading ${npd_tar}."
-
- $npd_dir = "${env:K8S_DIR}\node-problem-detector"
- New-Item -Path $npd_dir -ItemType Directory -Force -Confirm:$false
-
- MustDownload-File `
- -URLs "${npd_release_path}/node-problem-detector/${npd_tar}" `
- -Hash $npd_hash `
- -Algorithm SHA512 `
- -OutFile "${npd_dir}\${npd_tar}"
-
- tar xzvf "${npd_dir}\${npd_tar}" -C $npd_dir
- Move-Item "${npd_dir}\bin\*" "${env:NODE_DIR}\" -Force -Confirm:$false
- Remove-Item "${npd_dir}\bin" -Force -Confirm:$false
- Remove-Item "${npd_dir}\${npd_tar}" -Force -Confirm:$false
- }
- else {
- Log-Output "Node Problem Detector already installed."
- }
- }
-}
-
-# Creates the node-problem-detector user kubeconfig file at
-# $env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE (if defined).
-#
-# Create-NodePki() must be called first.
-#
-# Required ${kube_env} keys:
-# CA_CERT
-# NODE_PROBLEM_DETECTOR_TOKEN
-function Create-NodeProblemDetectorKubeConfig {
- if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone") {
- if (-not [string]::IsNullOrEmpty(${kube_env]['NODE_PROBLEM_DETECTOR_TOKEN']})) {
- Log-Output "Create-NodeProblemDetectorKubeConfig using Node Problem Detector token"
- Create-Kubeconfig -Name 'node-problem-detector' `
- -Path ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE} `
- -Token ${kube_env}['NODE_PROBLEM_DETECTOR_TOKEN']
- } elseif (Test-Path ${env:BOOTSTRAP_KUBECONFIG}) {
- Log-Output "Create-NodeProblemDetectorKubeConfig creating kubeconfig from kubelet kubeconfig"
- Copy-Item ${env:BOOTSTRAP_KUBECONFIG} -Destination ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE}
- Log-Output ("node-problem-detector bootstrap kubeconfig:`n" +
- "$(Get-Content -Raw ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE})")
- } else {
- Log-Output "Either NODE_PROBLEM_DETECTOR_TOKEN or ${env:BOOTSTRAP_KUBECONFIG} must be set"
- exit 1
- }
- }
-}
-
-# Configures NPD to run with the bundled monitor configs and report against the Kubernetes api server.
-function Configure-NodeProblemDetector {
- $npd_bin = "${env:NODE_DIR}\node-problem-detector.exe"
- if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone" -and (Test-Path $npd_bin)) {
- $npd_svc = Get-Service -Name $NPD_SERVICE -ErrorAction SilentlyContinue
- if ($npd_svc -eq $null) {
- $npd_dir = "${env:K8S_DIR}\node-problem-detector"
- $npd_logs_dir = "${env:LOGS_DIR}\node-problem-detector"
-
- New-Item -Path $npd_logs_dir -Type Directory -Force -Confirm:$false
-
- $flags = ''
- if ([string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS'])) {
- $system_log_monitors = @()
- $system_stats_monitors = @()
- $custom_plugin_monitors = @()
-
- # Custom Plugin Monitors
- $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubelet.json")
- $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubeproxy.json")
- $custom_plugin_monitors += @("${npd_dir}\config\windows-defender-monitor.json")
-
- # System Stats Monitors
- $system_stats_monitors += @("${npd_dir}\config\windows-system-stats-monitor.json")
-
- # NPD Configuration for CRI monitor
- $system_log_monitors += @("${npd_dir}\config\windows-containerd-monitor-filelog.json")
- $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-containerd.json")
-
- $flags="--v=2 --port=20256 --log_dir=${npd_logs_dir}"
- if ($system_log_monitors.count -gt 0) {
- $flags+=" --config.system-log-monitor={0}" -f ($system_log_monitors -join ",")
- }
- if ($system_stats_monitors.count -gt 0) {
- $flags+=" --config.system-stats-monitor={0}" -f ($system_stats_monitors -join ",")
- }
- if ($custom_plugin_monitors.count -gt 0) {
- $flags+=" --config.custom-plugin-monitor={0}" -f ($custom_plugin_monitors -join ",")
- }
- }
- else {
- $flags = ${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS']
- }
- $kubernetes_master_name = ${kube_env}['KUBERNETES_MASTER_NAME']
- $flags = "${flags} --apiserver-override=`"https://${kubernetes_master_name}?inClusterConfig=false&auth=${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE}`""
-
- Log-Output "Creating service: ${NPD_SERVICE}"
- Log-Output "${npd_bin} ${flags}"
- sc.exe create $NPD_SERVICE binpath= "${npd_bin} ${flags}" displayName= "Node Problem Detector"
- sc.exe failure $NPD_SERVICE reset= 30 actions= restart/5000
- sc.exe start $NPD_SERVICE
-
- Write-VerboseServiceInfoToConsole -Service $NPD_SERVICE
- }
- else {
- Log-Output "${NPD_SERVICE} already configured."
- }
- }
-}
-
# TODO(pjh): move the logging agent code below into a separate
# module; it was put here temporarily to avoid disrupting the file layout in
# the K8s release machinery.
@@ -1872,21 +1736,6 @@ $FLUENTBIT_CONFIG = @'
Parser docker
Parser containerd
-# Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
-# Example:
-# I0716 02:08:55.559351 3356 log_spam.go:42] Command line arguments:
-[INPUT]
- Name tail
- Alias node-problem-detector
- Tag node-problem-detector
- Mem_Buf_Limit 5MB
- Skip_Long_Lines On
- Refresh_Interval 5
- Path C:\etc\kubernetes\logs\node-problem-detector\*.log.INFO*
- DB /var/run/google-fluentbit/pos-files/node-problem-detector.db
- Multiline On
- Parser_Firstline glog
-
# Example:
# I0928 03:15:50.440223 4880 main.go:51] Starting CSI-Proxy Server ...
[INPUT]
diff --git a/cluster/log-dump/log-dump.sh b/cluster/log-dump/log-dump.sh
index b7505099f0e..f73a9409e0d 100755
--- a/cluster/log-dump/log-dump.sh
+++ b/cluster/log-dump/log-dump.sh
@@ -42,8 +42,8 @@ readonly node_ssh_supported_providers="gce gke aws"
readonly gcloud_supported_providers="gce gke"
readonly master_logfiles="kube-apiserver.log kube-apiserver-audit.log kube-scheduler.log kube-controller-manager.log cloud-controller-manager.log etcd.log etcd-events.log glbc.log cluster-autoscaler.log kube-addon-manager.log konnectivity-server.log fluentd.log kubelet.cov"
-readonly node_logfiles="kube-proxy.log containers/konnectivity-agent-*.log fluentd.log node-problem-detector.log kubelet.cov kube-network-policies.log"
-readonly node_systemd_services="node-problem-detector"
+readonly node_logfiles="kube-proxy.log containers/konnectivity-agent-*.log fluentd.log kubelet.cov kube-network-policies.log"
+readonly node_systemd_services=""
readonly hollow_node_logfiles="kubelet-hollow-node-*.log kubeproxy-hollow-node-*.log npd-hollow-node-*.log"
readonly aws_logfiles="cloud-init-output.log"
readonly gce_logfiles="startupscript.log"
diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go
index 91f13a18c5f..cc469df0046 100644
--- a/test/e2e_node/image_list.go
+++ b/test/e2e_node/image_list.go
@@ -92,7 +92,7 @@ func updateImageAllowList(ctx context.Context) {
}
func getNodeProblemDetectorImage() string {
- const defaultImage string = "registry.k8s.io/node-problem-detector/node-problem-detector:v1.34.0"
+ const defaultImage string = "registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.2"
image := os.Getenv("NODE_PROBLEM_DETECTOR_IMAGE")
if image == "" {
image = defaultImage
diff --git a/test/kubemark/resources/hollow-node_template.yaml b/test/kubemark/resources/hollow-node_template.yaml
index 1a51acb7a61..a6cc19c889d 100644
--- a/test/kubemark/resources/hollow-node_template.yaml
+++ b/test/kubemark/resources/hollow-node_template.yaml
@@ -99,7 +99,7 @@ spec:
cpu: {{hollow_proxy_millicpu}}m
memory: {{hollow_proxy_mem_Ki}}Ki
- name: hollow-node-problem-detector
- image: registry.k8s.io/node-problem-detector/node-problem-detector:v1.34.0
+ image: registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.2
env:
- name: NODE_NAME
valueFrom: