From 60635cc69e254eeb91fc903c6e89ecbe86d352ce Mon Sep 17 00:00:00 2001 From: "Arcan Consulting - Michael J. Arcan" Date: Mon, 16 Feb 2026 13:21:16 +0100 Subject: [PATCH] Update hclouddns: 2 modified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FIX: The health check was only checking "has IP = up", ignoring OPNsense's actual dpinger monitoring. A gateway with a stale DHCP lease would still show as up even when dpinger reported it down. Now queries OPNsense's Gateways class + dpinger_status() and matches by interface name (wan, opt1, etc.) — the same interface field already used in the plugin's gateway dropdown. The ping-based check had no source binding, so it always went through the default gateway — reporting all gateways as up even when one was down. Now queries OPNsense's own gateway_status.php which uses dpinger for accurate per-gateway health monitoring. FIX: Replace → (U+2192) with -> in ntfy Title headers. urllib encodes headers as latin-1 which cannot represent unicode characters. The message body (sent as UTF-8 data) is unaffected. --- .../scripts/HCloudDNS/gateway_health.py | 89 ++++++++++++++----- .../scripts/HCloudDNS/update_records_v2.py | 21 +++-- 2 files changed, 77 insertions(+), 33 deletions(-) diff --git a/net/hclouddns/src/opnsense/scripts/HCloudDNS/gateway_health.py b/net/hclouddns/src/opnsense/scripts/HCloudDNS/gateway_health.py index 9f0e821ba..04b983a73 100755 --- a/net/hclouddns/src/opnsense/scripts/HCloudDNS/gateway_health.py +++ b/net/hclouddns/src/opnsense/scripts/HCloudDNS/gateway_health.py @@ -174,26 +174,64 @@ def is_valid_ip(ip): return False -def quick_ping_check(target='8.8.8.8', count=1, timeout=2): +def get_opnsense_gateway_status(): + """Query OPNsense's dpinger-based gateway status and gateway-to-interface mapping. + + Returns a dict mapping OPNsense interface name (e.g. 'wan', 'opt1') to status string. + OPNsense status values: 'none' = online, 'down', 'force_down', 'loss', 'delay', etc. """ - Quick ping check for gateway connectivity. - Used as a simple fallback health check. - - Args: - target: IP or hostname to ping - count: Number of pings - timeout: Timeout in seconds - - Returns: - bool: True if ping succeeded - """ - cmd = ['ping', '-c', str(count), '-W', str(timeout), target] - + iface_status = {} try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout * count + 2) - return result.returncode == 0 - except (subprocess.TimeoutExpired, subprocess.SubprocessError): - return False + # Get gateway details with interface mapping + gw_details = subprocess.run( + ['php', '-r', """ +require_once 'config.inc'; +require_once 'util.inc'; +require_once 'interfaces.inc'; +require_once 'plugins.inc.d/dpinger.inc'; +$status = dpinger_status(); +$gws = (new \\OPNsense\\Routing\\Gateways())->gatewaysIndexedByName(); +$result = []; +foreach ($gws as $name => $gw) { + $s = isset($status[$name]) ? strtolower($status[$name]['status']) : 'none'; + $iface = isset($gw['interface']) ? $gw['interface'] : ''; + $proto = isset($gw['ipprotocol']) ? $gw['ipprotocol'] : 'inet'; + $result[] = ['name' => $name, 'interface' => $iface, 'ipprotocol' => $proto, 'status' => $s]; +} +echo json_encode($result); +"""], + capture_output=True, text=True, timeout=10 + ) + if gw_details.returncode == 0 and gw_details.stdout.strip(): + gateways = json.loads(gw_details.stdout) + for gw in gateways: + iface = gw.get('interface', '') + proto = gw.get('ipprotocol', 'inet') + status = gw.get('status', 'none') + if not iface: + continue + # Only use inet (IPv4) gateways for status matching + # (avoid overwriting with inet6 status for same interface) + if proto == 'inet': + iface_status[iface] = status + elif iface not in iface_status: + iface_status[iface] = status + except (subprocess.TimeoutExpired, subprocess.SubprocessError, json.JSONDecodeError) as e: + sys.stderr.write(f"Error querying OPNsense gateway status: {e}\n") + return iface_status + + +def is_gateway_up(interface, opnsense_status): + """Check if a gateway is up based on OPNsense's dpinger status for its interface. + + OPNsense reports status='none' for healthy gateways. + Any other value (force_down, down, loss, delay, etc.) means degraded/down. + """ + status = opnsense_status.get(interface) + if status is None: + # Interface not found in OPNsense gateways — assume up + return True + return status == 'none' def resolve_interface_name(interface): @@ -266,9 +304,9 @@ def main(): except json.JSONDecodeError: pass - # Simple ping-based health check (dpinger handles real gateway monitoring) - target = gateway_config.get('healthCheckTarget', '8.8.8.8') - is_healthy = quick_ping_check(target, count=1, timeout=2) + interface = gateway_config.get('interface', '') + opnsense_status = get_opnsense_gateway_status() + is_healthy = is_gateway_up(interface, opnsense_status) result = { 'uuid': uuid, 'status': 'up' if is_healthy else 'down' @@ -300,6 +338,9 @@ def main(): tree = ET.parse('/conf/config.xml') root = tree.getroot() + # Query OPNsense's own gateway status once for all gateways + opnsense_status = get_opnsense_gateway_status() + gateways_node = root.find('.//OPNsense/HCloudDNS/gateways') if gateways_node is not None: for gw in gateways_node.findall('gateway'): @@ -311,9 +352,9 @@ def main(): if enabled != '1': continue + name = gw.findtext('name', '') interface = gw.findtext('interface', '') checkip_method = gw.findtext('checkipMethod', 'web_ipify') - health_target = gw.findtext('healthCheckTarget', '8.8.8.8') # Resolve interface and get IP phys_if = resolve_interface_name(interface) @@ -327,8 +368,8 @@ def main(): local_ip = get_interface_ip(phys_if, ipv6=False) ipv4 = get_web_ip(checkip_method, phys_if, source_ip=local_ip, ipv6=False) - # Quick health check (ping only for speed) - status = 'up' if quick_ping_check(health_target, count=1, timeout=2) else 'down' + # Use OPNsense's dpinger-based gateway status (matched by interface) + status = 'up' if is_gateway_up(interface, opnsense_status) else 'down' result['gateways'][uuid] = { 'status': status, diff --git a/net/hclouddns/src/opnsense/scripts/HCloudDNS/update_records_v2.py b/net/hclouddns/src/opnsense/scripts/HCloudDNS/update_records_v2.py index 7816ba511..765482235 100755 --- a/net/hclouddns/src/opnsense/scripts/HCloudDNS/update_records_v2.py +++ b/net/hclouddns/src/opnsense/scripts/HCloudDNS/update_records_v2.py @@ -18,7 +18,7 @@ import syslog sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from hcloud_api import HCloudAPI -from gateway_health import get_gateway_ip, write_state_file +from gateway_health import get_gateway_ip, get_opnsense_gateway_status, is_gateway_up, write_state_file STATE_FILE = '/var/run/hclouddns_state.json' SIMULATION_FILE = '/var/run/hclouddns_simulation.json' @@ -377,8 +377,8 @@ def send_batch_notification(config, batch_results): Send a single batch notification summarizing all DNS changes. Title format: - - Failover: "HCloudDNS: Failover WAN_Primary → WAN_Backup" - - Failback: "HCloudDNS: Failback WAN_Backup → WAN_Primary" + - Failover: "HCloudDNS: Failover WAN_Primary -> WAN_Backup" + - Failback: "HCloudDNS: Failback WAN_Backup -> WAN_Primary" - DynIP: "HCloudDNS: DynIP Update on WAN_Primary" - Error: "HCloudDNS: Error" @@ -405,7 +405,7 @@ def send_batch_notification(config, batch_results): first_fo = failovers[0] from_gw = first_fo.get('from_gateway', '?') to_gw = first_fo.get('to_gateway', '?') - title = f"HCloudDNS: Failover {from_gw} → {to_gw}" + title = f"HCloudDNS: Failover {from_gw} -> {to_gw}" tags = 'warning,hclouddns' records_to_show = failovers @@ -414,7 +414,7 @@ def send_batch_notification(config, batch_results): first_fb = failbacks[0] from_gw = first_fb.get('from_gateway', '?') to_gw = first_fb.get('to_gateway', '?') - title = f"HCloudDNS: Failback {from_gw} → {to_gw}" + title = f"HCloudDNS: Failback {from_gw} -> {to_gw}" tags = 'white_check_mark,hclouddns' records_to_show = failbacks @@ -724,6 +724,7 @@ def save_runtime_state(state): def check_all_gateways(config, state): """Check health and get IPs for all gateways""" simulation = load_simulation() + opnsense_status = get_opnsense_gateway_status() for uuid, gw in config['gateways'].items(): if not gw['enabled']: @@ -760,10 +761,11 @@ def check_all_gateways(config, state): log(f"SIMULATION: Gateway '{gw['name']}' is DOWN (simulated)", syslog.LOG_WARNING) continue - # Determine status based on IP availability - # (dpinger handles real gateway health via syshook - this is a fallback check) + # Use OPNsense's dpinger status (matched by interface) as primary health source + interface = gw.get('interface', '') + dpinger_healthy = is_gateway_up(interface, opnsense_status) has_ip = gw_state['ipv4'] or gw_state['ipv6'] - new_status = 'up' if has_ip else 'down' + new_status = 'up' if (dpinger_healthy and has_ip) else 'down' old_status = gw_state.get('status', 'unknown') gw_state['lastCheck'] = int(time.time()) @@ -776,7 +778,8 @@ def check_all_gateways(config, state): else: gw_state['failCount'] = gw_state.get('failCount', 0) + 1 if old_status == 'up': - log(f"Gateway '{gw['name']}' is DOWN (failCount: {gw_state['failCount']})", syslog.LOG_WARNING) + reason = 'no IP' if not has_ip else 'dpinger: down' + log(f"Gateway '{gw['name']}' is DOWN ({reason}, failCount: {gw_state['failCount']})", syslog.LOG_WARNING) gw_state['status'] = new_status