diff --git a/sysutils/autorollback/Makefile b/sysutils/autorollback/Makefile new file mode 100644 index 000000000..3e8a6d82a --- /dev/null +++ b/sysutils/autorollback/Makefile @@ -0,0 +1,8 @@ +PLUGIN_NAME= autorollback +PLUGIN_VERSION= 1.0 +PLUGIN_COMMENT= Automatic configuration rollback with safe mode +PLUGIN_MAINTAINER= github.immobile762@passmail.net +PLUGIN_WWW= https://github.com/mplind/os-autorollback +PLUGIN_TIER= 2 + +.include "../../Mk/plugins.mk" diff --git a/sysutils/autorollback/pkg-descr b/sysutils/autorollback/pkg-descr new file mode 100644 index 000000000..7c5bf3124 --- /dev/null +++ b/sysutils/autorollback/pkg-descr @@ -0,0 +1,26 @@ +Automatic configuration rollback plugin for OPNsense. + +Provides a "Safe Mode" that snapshots the current configuration before +changes are made. If the administrator does not confirm the changes within +a configurable timeout, the system automatically reverts to the previous +known-good configuration. + +Features: + +* Timer-based auto-revert with configurable timeout (default 120 seconds) +* Persistent countdown banner in the web UI for confirmation +* CLI confirmation via configctl for SSH users +* Always-on connectivity watchdog with configurable health checks +* Crash-safe: survives reboots via early boot recovery +* Dashboard widget showing real-time status +* Git backup integration (if os-git-backup is installed) +* Configurable rollback method: full reboot, service reload, or targeted restart + +Inspired by Juniper JUNOS "commit confirmed" and MikroTik Safe Mode. + +Plugin Changelog +================ + +1.0 + +* Initial release diff --git a/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc b/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc new file mode 100644 index 000000000..8966a9650 --- /dev/null +++ b/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc @@ -0,0 +1,90 @@ + [ + '/usr/local/sbin/configctl autorollback watchdog.check', + '*/1', // Every minute + ], + ], + ]; +} + +/** + * Register the auto-rollback service for the service manager. + * This allows starting/stopping/status via the Services page and API. + * + * @return array service definitions + */ +function autorollback_services() +{ + $mdl = new \OPNsense\AutoRollback\AutoRollback(); + + $services = []; + + if ((string)$mdl->general->Enabled == '1') { + $services[] = [ + 'description' => gettext('Auto Rollback Safe Mode'), + 'configd' => [ + 'restart' => ['autorollback safemode.start'], + 'start' => ['autorollback safemode.start'], + 'stop' => ['autorollback safemode.cancel'], + ], + 'name' => 'autorollback', + 'nocheck' => true, // No PID file to check — uses state files + ]; + } + + return $services; +} + +/** + * Register syslog facility for auto-rollback events. + * + * @return array syslog configuration + */ +function autorollback_syslog() +{ + return [ + 'autorollback' => [ + 'facility' => ['autorollback', 'autorollback-recovery'], + ], + ]; +} diff --git a/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback b/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback new file mode 100755 index 000000000..584101eb2 --- /dev/null +++ b/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback @@ -0,0 +1,131 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Config Change Hook (syshook/config) + +This script is called by OPNsense every time config.xml is saved. +It receives the backup file path as its first argument. + +Purpose: + 1. Record the config change for the connectivity watchdog + 2. Record BOTH the new backup AND the previous backup (for correct rollback target) + 3. Skip recording if a rollback restore is in progress (re-entrancy guard) + 4. Skip recording if a firmware update is in progress + +This script MUST be fast and lightweight — it runs synchronously +in the config save pipeline. +""" + +import json +import os +import sys +import time +import glob +import re + +# Paths +VOLATILE_DIR = '/var/run/autorollback' +RESTORE_LOCK = os.path.join(VOLATILE_DIR, 'restoring.lock') +LAST_CONFIG_FILE = os.path.join(VOLATILE_DIR, 'last_config_change') +FIRMWARE_LOCK = '/tmp/pkg_upgrade.progress' +CONFIG_BACKUP_DIR = '/conf/backup' + +# Same regex as common.py to match only timestamped backups +BACKUP_TIMESTAMP_RE = re.compile(r'^config-\d+(\.\d+)?(_\d+)?\.xml$') + + +def get_previous_backup(current_backup): + """ + Find the backup file that existed BEFORE the current one. + This is the correct rollback target for the watchdog. + """ + try: + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if current_backup and current_backup in backups: + idx = backups.index(current_backup) + if idx > 0: + return backups[idx - 1] + elif len(backups) >= 2: + # Current backup might not be in the list yet, return second-to-last + return backups[-2] + except Exception: + pass + return '' + + +def main(): + # Get backup file path from argument + backup_file = sys.argv[1] if len(sys.argv) > 1 else '' + + # Re-entrancy guard: skip if we're restoring a config + if os.path.isfile(RESTORE_LOCK): + # Check if lock is actually held (not stale) + import fcntl + fd = None + try: + fd = open(RESTORE_LOCK, 'r') + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + # Got lock = stale file, clean up + fcntl.flock(fd, fcntl.LOCK_UN) + try: + os.unlink(RESTORE_LOCK) + except OSError: + pass + except (BlockingIOError, OSError): + # Lock held = restore in progress, skip + return + finally: + if fd is not None: + fd.close() + + # Skip during firmware updates + if os.path.isfile(FIRMWARE_LOCK): + return + + # Ensure volatile directory exists + os.makedirs(VOLATILE_DIR, mode=0o750, exist_ok=True) + + # Find the previous backup (the one BEFORE this config change) + previous_backup = get_previous_backup(backup_file) + + # Record the config change for the watchdog + try: + state = { + 'time': time.time(), + 'backup': backup_file, + 'previous_backup': previous_backup, + } + with open(LAST_CONFIG_FILE, 'w') as f: + json.dump(state, f) + except (IOError, OSError): + pass # Non-critical — don't break the config save pipeline + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery b/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery new file mode 100755 index 000000000..7d674fc25 --- /dev/null +++ b/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery @@ -0,0 +1,203 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Early Boot Recovery (syshook/early) + +This is the TERTIARY rollback trigger — the last line of defense. +It runs during early boot, BEFORE networking starts. + +Scenario: + 1. Admin enters safe mode + 2. Makes a config change that breaks something + 3. System crashes or reboots (or admin reboots to try to fix) + 4. System starts booting with the BAD config + 5. THIS SCRIPT fires before networking starts + 6. Detects the persistent state file with an expired timer + 7. Restores the known-good config.xml BEFORE any service reads it + 8. System boots with the known-good config + +The persistent state is stored at /conf/autorollback_pending.json +(on persistent storage, NOT tmpfs). + +This script must be FAST and SELF-CONTAINED — no external dependencies +beyond Python stdlib and the config file. +""" + +import json +import os +import shutil +import sys +import syslog +import tempfile +import time +import xml.etree.ElementTree as ET + +PERSISTENT_STATE = '/conf/autorollback_pending.json' +CONFIG_XML = '/conf/config.xml' +CONFIG_CACHE = '/tmp/config.cache' +CONFIG_BACKUP_DIR = '/conf/backup' + + +syslog.openlog('autorollback-recovery', syslog.LOG_PID, syslog.LOG_LOCAL4) + +def log(msg): + """Log to syslog.""" + try: + syslog.syslog(syslog.LOG_WARNING, msg) + except Exception: + pass + + +def validate_config(path): + """Quick validation of a config.xml file.""" + try: + tree = ET.parse(path) + root = tree.getroot() + return (root.tag in ('opnsense', 'pfsense') + and root.find('system') is not None + and root.find('interfaces') is not None) + except Exception: + return False + + +def validate_backup_path(path): + """Validate that backup_file is within allowed directories (defense-in-depth).""" + allowed = (CONFIG_BACKUP_DIR, os.path.dirname(CONFIG_XML)) + try: + real = os.path.realpath(path) + for d in allowed: + real_d = os.path.realpath(d) + if real.startswith(real_d + os.sep) or real == real_d: + return True + except (TypeError, ValueError): + pass + return False + + +def main(): + # Check for persistent state file + if not os.path.isfile(PERSISTENT_STATE): + return # No pending rollback — normal boot + + try: + with open(PERSISTENT_STATE, 'r') as f: + state = json.load(f) + except (json.JSONDecodeError, IOError): + return # Corrupt state file — skip + + # Only act on safe mode states + if state.get('mode') != 'safemode': + return + + # Check if the timer has expired + expiry = state.get('expiry_time', 0) + now = time.time() + + if now < expiry: + # Timer hasn't expired — don't rollback yet + # The timer daemon will handle it when cron starts + return + + # Timer expired! This means: + # - The system rebooted/crashed during safe mode + # - The timer daemon never got to fire (it was in /var/run, which is tmpfs) + # - We need to restore the known-good config NOW, before services start + + backup_file = state.get('backup_file', '') + if not backup_file or not validate_backup_path(backup_file) or not os.path.isfile(backup_file): + log('EARLY BOOT RECOVERY: Expired safe mode found but backup missing or invalid path: %s' % backup_file) + # Clean up the stale state + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + return + + # Validate the backup + if not validate_config(backup_file): + log('EARLY BOOT RECOVERY: Backup file is invalid: %s' % backup_file) + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + return + + # --- PERFORM EARLY BOOT ROLLBACK --- + log('=== EARLY BOOT RECOVERY: Safe mode expired %d seconds ago. Restoring config from %s ===' % ( + int(now - expiry), backup_file)) + + try: + # Create safety backup of current (bad) config + safety = os.path.join(CONFIG_BACKUP_DIR, 'config-pre-boot-recovery.xml') + if os.path.isfile(CONFIG_XML): + shutil.copy2(CONFIG_XML, safety) + + # Capture original ownership + try: + st = os.stat(CONFIG_XML) + orig_uid, orig_gid = st.st_uid, st.st_gid + except OSError: + orig_uid, orig_gid = 0, 0 + + # Restore the known-good config atomically via temp + rename + conf_dir = os.path.dirname(CONFIG_XML) + fd, tmp_path = tempfile.mkstemp(dir=conf_dir, prefix='.config_recovery_') + try: + os.close(fd) + shutil.copy2(backup_file, tmp_path) + os.chmod(tmp_path, 0o640) + try: + os.chown(tmp_path, orig_uid, orig_gid) + except PermissionError: + pass + os.rename(tmp_path, CONFIG_XML) + except Exception: + # Clean up temp file on failure + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + # Remove config cache + if os.path.isfile(CONFIG_CACHE): + os.unlink(CONFIG_CACHE) + + log('EARLY BOOT RECOVERY: Config restored successfully. System will boot with known-good config.') + + # Only clean up persistent state on successful recovery + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + + except Exception as e: + log('EARLY BOOT RECOVERY FAILED: %s — state preserved for retry on next boot' % str(e)) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php new file mode 100644 index 000000000..d4c140f39 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php @@ -0,0 +1,159 @@ +request->isPost()) { + $backend = new Backend(); + + // Optional custom timeout from POST body + $timeout = $this->request->getPost('timeout', 'int', null); + $param = $timeout ? (string)$timeout : ''; + + $response = $backend->configdpRun('autorollback safemode.start', [$param]); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Confirm safe mode changes - accept the configuration. + * + * @return array result + */ + public function confirmAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + $response = $backend->configdRun('autorollback safemode.confirm'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Cancel safe mode - rollback to previous config immediately. + * + * @return array result + */ + public function cancelAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + $response = $backend->configdRun('autorollback safemode.cancel'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Extend the safe mode countdown timer. + * + * @return array result + */ + public function extendAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + + $seconds = $this->request->getPost('seconds', 'int', 60); + $response = $backend->configdpRun('autorollback safemode.extend', [(string)$seconds]); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Get current auto-rollback status. + * + * @return array status information + */ + public function statusAction() + { + $backend = new Backend(); + $response = $backend->configdRun('autorollback status'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return [ + 'status' => 'error', + 'message' => 'Backend returned invalid response', + 'system_state' => 'unknown', + ]; + } + + return $result; + } +} diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php new file mode 100644 index 000000000..2ac69b426 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php @@ -0,0 +1,42 @@ +view->pick('OPNsense/AutoRollback/index'); + $this->view->generalForm = $this->getForm('general'); + } +} diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml new file mode 100644 index 000000000..1a7b6682e --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml @@ -0,0 +1,80 @@ +
+ + autorollback.general.Enabled + + checkbox + Enable the auto-rollback safe mode and connectivity watchdog features. + + + header + + + + autorollback.general.SafeModeTimeout + + text + How many seconds to wait for confirmation before automatically rolling back. Default: 120 seconds. Range: 30-3600. + + + autorollback.general.RollbackMethod + + dropdown + How to apply the restored configuration. Full reboot is most reliable (recommended). Service reload is faster but may not apply kernel tunables or interface changes. + + + header + + + + autorollback.general.WatchdogEnabled + + checkbox + Enable the always-on connectivity watchdog. Monitors system health after config changes and auto-reverts if connectivity is lost. + + + autorollback.general.WatchdogGracePeriod + + text + Seconds to wait after a config change before running health checks. Allows services time to restart. Default: 60 seconds. + + + autorollback.general.WatchdogFailThreshold + + text + Number of consecutive failed health checks before triggering a rollback. Default: 3. + + + autorollback.general.WatchdogCheckCommand + + text + Shell command to run for connectivity verification. Use %gateway% as placeholder for the default gateway IP. Default: ping -c 1 -W 3 -t 5 %gateway% + + + autorollback.general.WatchdogCheckPattern + + text + Regex pattern to match in the check command output for a successful result. Default: "1 packets received" + + + autorollback.general.WatchdogCheckCommand2 + + text + Optional second health check command. Example: host google.com (DNS resolution test). Leave empty to disable. + + + autorollback.general.WatchdogCheckPattern2 + + text + Regex pattern for the secondary check command. + + + header + + + + autorollback.general.LogRollbacks + + checkbox + Log all safe mode and rollback events to syslog. + +
diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml new file mode 100644 index 000000000..399124178 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml @@ -0,0 +1,9 @@ + + + WebCfg - Auto Rollback: Settings + + ui/autorollback/* + api/autorollback/* + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php new file mode 100644 index 000000000..70bef32fc --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php @@ -0,0 +1,36 @@ + + //OPNsense/autorollback + 1.0.0 + Auto Rollback configuration + + + + + 0 + Y + + + + + 120 + 30 + 3600 + Timeout must be between 30 and 3600 seconds. + + + + + reboot + + Full reboot (most reliable, recommended) + Service reload (faster, may miss kernel tunables) + + + + + + 0 + Y + + + + + 60 + 15 + 600 + Grace period must be between 15 and 600 seconds. + + + + + 3 + 1 + 10 + Fail threshold must be between 1 and 10. + + + + + ping -c 1 -W 3 -t 5 %gateway% + Y + /^.{1,512}$/ + Check command must be 1-512 characters. + + + + + 1 packets received + Y + /^.{1,256}$/ + Check pattern must be 1-256 characters. + + + + + + N + /^.{0,512}$/ + + + + + + N + /^.{0,256}$/ + + + + + 1 + Y + + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml new file mode 100644 index 000000000..b66bad86c --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt b/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt new file mode 100644 index 000000000..3eef46d1a --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt @@ -0,0 +1,402 @@ +{# + OPNsense Auto Rollback - Settings & Safe Mode Control Page + + This page has two sections: + 1. Safe Mode control panel (top) - Start/Confirm/Cancel with live countdown + 2. Settings form (bottom) - Plugin configuration +#} + + + + + + +
+
+
+

Safe Mode

+
Make configuration changes safely with automatic rollback protection
+
+
+ Disabled +
+
+ + +
+

+ Enter safe mode to snapshot your current configuration before making changes. + If you don't confirm within the timeout, the system will automatically revert. +

+
+ +
+
+ SSH: configctl autorollback safemode.start +
+
+ + + + + + +
+ + + + +
+
+ {{ partial("layout_partials/base_form", ['fields': generalForm, 'id': 'frm_GeneralSettings']) }} + +
+
+ +
+
+
diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py new file mode 100644 index 000000000..2472936e0 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py @@ -0,0 +1,28 @@ +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Shared library +""" diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py new file mode 100644 index 000000000..487cbeddd --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py @@ -0,0 +1,444 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Common library +Shared constants, state management, and utility functions. + +State architecture: + - Volatile state (cleared on reboot): /var/run/autorollback/ + * timer PID, active session flag, confirmation token + - Persistent state (survives reboot): /conf/autorollback_pending.json + * known-good backup path, expiry timestamp (for early-boot recovery) +""" + +import json +import os +import re +import sys +import time +import fcntl +import glob +import ipaddress +import shlex +import signal +import subprocess +import syslog +import secrets +import tempfile +import xml.etree.ElementTree as ET + +# --- Path constants --- +VOLATILE_DIR = '/var/run/autorollback' +PERSISTENT_STATE_FILE = '/conf/autorollback_pending.json' +TIMER_PID_FILE = os.path.join(VOLATILE_DIR, 'timer.pid') +RESTORE_LOCK_FILE = os.path.join(VOLATILE_DIR, 'restoring.lock') +SESSION_TOKEN_FILE = os.path.join(VOLATILE_DIR, 'session.token') +WATCHDOG_FAIL_COUNT_FILE = os.path.join(VOLATILE_DIR, 'watchdog_failures') +WATCHDOG_LAST_CONFIG_FILE = os.path.join(VOLATILE_DIR, 'last_config_change') + +CONFIG_XML = '/conf/config.xml' +CONFIG_BACKUP_DIR = '/conf/backup' +CONFIG_CACHE = '/tmp/config.cache' + +# Firmware update indicators +FIRMWARE_LOCK = '/tmp/pkg_upgrade.progress' +FIRMWARE_PROCS = ['opnsense-update', 'opnsense-bootstrap', 'opnsense-patch'] + +# Regex for valid timestamped backup filenames +BACKUP_TIMESTAMP_RE = re.compile(r'^config-\d+(\.\d+)?(_\d+)?\.xml$') + + +# --- Syslog setup (open once at module load, never close) --- +syslog.openlog('autorollback', syslog.LOG_PID, syslog.LOG_LOCAL4) + +def log_info(msg): + syslog.syslog(syslog.LOG_INFO, msg) + +def log_warning(msg): + syslog.syslog(syslog.LOG_WARNING, msg) + +def log_error(msg): + syslog.syslog(syslog.LOG_ERR, msg) + + +# --- Directory management --- +def ensure_volatile_dir(): + """Create the volatile state directory if it doesn't exist.""" + os.makedirs(VOLATILE_DIR, mode=0o750, exist_ok=True) + + +# --- Settings reader (single source of truth) --- +def read_model_settings(): + """Read all plugin settings from config.xml. Used by all scripts.""" + defaults = { + 'enabled': False, + 'timeout': 120, + 'rollback_method': 'reboot', + 'watchdog_enabled': False, + 'grace_period': 60, + 'fail_threshold': 3, + 'check_command': 'ping -c 1 -W 3 -t 5 %gateway%', + 'check_pattern': '1 packets received', + 'check_command_2': '', + 'check_pattern_2': '', + 'log_rollbacks': True, + } + try: + tree = ET.parse(CONFIG_XML) + root = tree.getroot() + ar = root.find('.//OPNsense/autorollback/general') + if ar is not None: + return { + 'enabled': (ar.findtext('Enabled', '0') == '1'), + 'timeout': int(ar.findtext('SafeModeTimeout', '120')), + 'rollback_method': ar.findtext('RollbackMethod', 'reboot'), + 'watchdog_enabled': (ar.findtext('WatchdogEnabled', '0') == '1'), + 'grace_period': int(ar.findtext('WatchdogGracePeriod', '60')), + 'fail_threshold': int(ar.findtext('WatchdogFailThreshold', '3')), + 'check_command': ar.findtext('WatchdogCheckCommand', + 'ping -c 1 -W 3 -t 5 %gateway%'), + 'check_pattern': ar.findtext('WatchdogCheckPattern', + '1 packets received'), + 'check_command_2': ar.findtext('WatchdogCheckCommand2', ''), + 'check_pattern_2': ar.findtext('WatchdogCheckPattern2', ''), + 'log_rollbacks': (ar.findtext('LogRollbacks', '1') == '1'), + } + except Exception as e: + log_warning('Could not read model settings: %s' % str(e)) + return defaults + + +# --- Persistent state management --- +def read_persistent_state(): + """Read the persistent state file. Returns dict or None.""" + try: + if os.path.isfile(PERSISTENT_STATE_FILE): + with open(PERSISTENT_STATE_FILE, 'r') as f: + return json.load(f) + except (json.JSONDecodeError, IOError, OSError) as e: + log_warning('Failed to read persistent state: %s' % str(e)) + return None + +def write_persistent_state(state): + """Write persistent state atomically using temp file + rename.""" + dir_name = os.path.dirname(PERSISTENT_STATE_FILE) + fd_num = None + tmp_path = None + try: + fd_num, tmp_path = tempfile.mkstemp(dir=dir_name, prefix='.autorollback_') + with os.fdopen(fd_num, 'w') as f: + fd_num = None # os.fdopen takes ownership + json.dump(state, f, indent=2) + f.flush() + os.fsync(f.fileno()) + os.rename(tmp_path, PERSISTENT_STATE_FILE) + tmp_path = None # Rename succeeded + except (IOError, OSError) as e: + log_error('Failed to write persistent state: %s' % str(e)) + if tmp_path and os.path.isfile(tmp_path): + os.unlink(tmp_path) + raise + finally: + if fd_num is not None: + os.close(fd_num) + +def clear_persistent_state(): + """Remove the persistent state file.""" + try: + if os.path.isfile(PERSISTENT_STATE_FILE): + os.unlink(PERSISTENT_STATE_FILE) + except OSError: + pass + + +# --- Session token management --- +def generate_session_token(): + """Generate a cryptographically random session token for safe mode.""" + token = secrets.token_hex(32) + ensure_volatile_dir() + fd = os.open(SESSION_TOKEN_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + try: + os.write(fd, token.encode()) + finally: + os.close(fd) + return token + +def read_session_token(): + """Read the current session token, or None.""" + try: + if os.path.isfile(SESSION_TOKEN_FILE): + with open(SESSION_TOKEN_FILE, 'r') as f: + return f.read().strip() + except (IOError, OSError): + pass + return None + +def clear_session_token(): + """Remove the session token file.""" + try: + if os.path.isfile(SESSION_TOKEN_FILE): + os.unlink(SESSION_TOKEN_FILE) + except OSError: + pass + + +# --- Re-entrancy guard --- +def is_restore_in_progress(): + """Check if a restore operation is currently running (re-entrancy guard).""" + if not os.path.isfile(RESTORE_LOCK_FILE): + return False + fd = None + try: + fd = open(RESTORE_LOCK_FILE, 'r') + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + # We got the lock — nobody holds it, stale file + fcntl.flock(fd, fcntl.LOCK_UN) + try: + os.unlink(RESTORE_LOCK_FILE) + except OSError: + pass + return False + except (BlockingIOError, OSError): + return True # Lock held — restore in progress + except (IOError, OSError): + return False + finally: + if fd is not None: + fd.close() + +def acquire_restore_lock(): + """Acquire the restore lock. Returns file descriptor or None.""" + ensure_volatile_dir() + try: + fd = open(RESTORE_LOCK_FILE, 'w') + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + fd.write(str(os.getpid())) + fd.flush() + return fd + except (BlockingIOError, IOError, OSError): + return None + +def release_restore_lock(fd): + """Release the restore lock.""" + try: + fcntl.flock(fd, fcntl.LOCK_UN) + fd.close() + if os.path.isfile(RESTORE_LOCK_FILE): + os.unlink(RESTORE_LOCK_FILE) + except (IOError, OSError): + pass + + +# --- Timer PID management --- +def read_timer_pid(): + """Read the PID of the running background timer, or None.""" + try: + if os.path.isfile(TIMER_PID_FILE): + with open(TIMER_PID_FILE, 'r') as f: + pid = int(f.read().strip()) + # Check if process is still alive + os.kill(pid, 0) + return pid + except (ValueError, ProcessLookupError, PermissionError, IOError, OSError): + clean_timer_pid() + return None + +def write_timer_pid(pid): + """Store the timer process PID.""" + ensure_volatile_dir() + with open(TIMER_PID_FILE, 'w') as f: + f.write(str(pid)) + +def clean_timer_pid(): + """Remove the timer PID file.""" + try: + if os.path.isfile(TIMER_PID_FILE): + os.unlink(TIMER_PID_FILE) + except OSError: + pass + + +# --- Kill running timer --- +def kill_timer(): + """Kill the background timer process if running.""" + pid = read_timer_pid() + if pid is not None: + try: + os.kill(pid, signal.SIGTERM) + for _ in range(10): + time.sleep(0.1) + try: + os.kill(pid, 0) + except ProcessLookupError: + break + else: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + except (ProcessLookupError, PermissionError): + pass + clean_timer_pid() + + +# --- Safe mode state queries --- +def is_safe_mode_active(): + """Check if safe mode is currently active.""" + state = read_persistent_state() + if state is None: + return False + if state.get('mode') != 'safemode': + return False + if read_timer_pid() is not None: + return True + expiry = state.get('expiry_time', 0) + if time.time() < expiry: + return True + return False + +def get_safe_mode_info(): + """Get full safe mode status information. Always returns all keys.""" + state = read_persistent_state() + default = { + 'active': False, + 'mode': 'idle', + 'backup_file': '', + 'backup_revision': '', + 'start_time': 0, + 'expiry_time': 0, + 'remaining_seconds': 0, + 'timeout': 0, + 'rollback_method': 'reboot', + 'timer_pid': None, + 'token': None, + } + if state is None: + return default + + now = time.time() + expiry = state.get('expiry_time', 0) + remaining = max(0, expiry - now) + + return { + 'active': state.get('mode') == 'safemode' and ( + remaining > 0 or read_timer_pid() is not None), + 'mode': state.get('mode', 'idle'), + 'backup_file': state.get('backup_file', ''), + 'backup_revision': state.get('backup_revision', ''), + 'start_time': state.get('start_time', 0), + 'expiry_time': expiry, + 'remaining_seconds': int(remaining), + 'timeout': state.get('timeout', 0), + 'rollback_method': state.get('rollback_method', 'reboot'), + 'timer_pid': read_timer_pid(), + 'token': read_session_token(), + } + + +# --- Firmware update detection --- +def is_firmware_update_running(): + """Check if a firmware update is in progress.""" + if os.path.isfile(FIRMWARE_LOCK): + return True + try: + for proc_name in FIRMWARE_PROCS: + result = subprocess.run( + ['pgrep', '-x', proc_name], # -x = exact match on process name + capture_output=True, timeout=5 + ) + if result.returncode == 0: + return True + except (subprocess.TimeoutExpired, OSError): + pass + return False + + +# --- Config backup helpers --- +def get_latest_backup(): + """Get the path of the most recent timestamped config backup.""" + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + # Only consider timestamped backups, not safety backups like config-pre-rollback.xml + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if backups: + return backups[-1] + return None + +def get_previous_backup(): + """Get the second-most-recent timestamped backup (the one BEFORE the latest).""" + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if len(backups) >= 2: + return backups[-2] + return None + +def get_backup_revision(backup_path): + """Extract the revision timestamp from a backup filename.""" + basename = os.path.basename(backup_path) + if basename.startswith('config-') and basename.endswith('.xml'): + return basename[7:-4] + return None + + +# --- Gateway detection --- +def get_default_gateway(): + """Get the default gateway IP from the routing table. Returns validated IP string.""" + try: + result = subprocess.run( + ['route', '-n', 'get', 'default'], + capture_output=True, text=True, timeout=5 + ) + for line in result.stdout.splitlines(): + line = line.strip() + if line.startswith('gateway:'): + gw = line.split(':', 1)[1].strip() + # Validate it's a real IP address (prevents injection) + ipaddress.ip_address(gw) + return gw + except (subprocess.TimeoutExpired, OSError, ValueError, IndexError): + pass + return None + + +# --- Configd helper --- +def configctl(cmd, timeout=60): + """Run a configctl command. Uses shlex for safe argument splitting.""" + try: + if os.path.exists('/var/run/configd.socket'): + result = subprocess.run( + ['configctl'] + shlex.split(cmd), + capture_output=True, text=True, timeout=timeout + ) + return result.returncode == 0, result.stdout.strip() + else: + log_warning('configd socket not available, skipping configctl: %s' % cmd) + return False, 'configd unavailable' + except (subprocess.TimeoutExpired, OSError) as e: + log_warning('configctl failed for "%s": %s' % (cmd, str(e))) + return False, str(e) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py new file mode 100755 index 000000000..126bbc5eb --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py @@ -0,0 +1,377 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Rollback Executor + +This script performs the actual configuration rollback. It is called by: + 1. timer_daemon.py (on timer expiry) + 2. safemode.py cancel (manual cancel) + 3. watchdog.py (on connectivity failure) + 4. 10-autorollback-recovery (early boot recovery) + +Safety features: + - Acquires exclusive restore lock (prevents re-entrancy) + - Validates backup file path (must be within /conf/) + - Validates backup file content before restore + - Creates safety backup before overwriting config + - Atomic restore via temp file + rename + - Preserves original config.xml ownership + - Removes config cache to force fresh read + - Supports two rollback methods: full reboot or service reload + - Falls back to direct script execution if configd is unavailable + - Logs everything to syslog + +Usage: rollback.py + rollback_method: "reboot" or "reload" +""" + +import json +import os +import shutil +import subprocess +import sys +import tempfile +import time +import xml.etree.ElementTree as ET + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + acquire_restore_lock, release_restore_lock, + is_firmware_update_running, + CONFIG_XML, CONFIG_CACHE, CONFIG_BACKUP_DIR +) + +# Allowed directories for backup files (path traversal defense) +ALLOWED_BACKUP_DIRS = ( + os.path.realpath(CONFIG_BACKUP_DIR), + os.path.realpath('/conf'), +) + + +def validate_backup_path(path): + """ + Validate that a backup file path is within allowed directories. + Prevents path traversal attacks. + """ + real_path = os.path.realpath(path) + for allowed_dir in ALLOWED_BACKUP_DIRS: + if real_path.startswith(allowed_dir + os.sep) or real_path == allowed_dir: + return True + return False + + +def validate_config_xml(path): + """Validate that a file is a parseable OPNsense config.xml.""" + try: + tree = ET.parse(path) + root = tree.getroot() + # Basic sanity: must have or legacy root + if root.tag not in ('opnsense', 'pfsense'): + return False, 'Root element is "%s", expected "opnsense"' % root.tag + # Must have a system section + if root.find('system') is None: + return False, 'Missing section' + # Must have interfaces + if root.find('interfaces') is None: + return False, 'Missing section' + return True, 'Valid' + except ET.ParseError as e: + return False, 'XML parse error: %s' % str(e) + except Exception as e: + return False, 'Validation error: %s' % str(e) + + +def _get_file_ownership(path): + """Get the uid/gid of an existing file. Returns (uid, gid) or None.""" + try: + st = os.stat(path) + return st.st_uid, st.st_gid + except OSError: + return None + + +def restore_config(backup_path): + """ + Restore a config.xml backup file. + + Strategy: + 1. Validate the backup path and content + 2. Create a safety backup of the CURRENT config (in case rollback makes things worse) + 3. Preserve original file ownership + 4. Copy backup to /conf/config.xml atomically via temp file + rename + 5. Remove config cache + """ + # Validate path is within allowed directories + if not validate_backup_path(backup_path): + msg = 'Backup path outside allowed directories: %s' % backup_path + log_error(msg) + return False, msg + + # Validate backup content + valid, msg = validate_config_xml(backup_path) + if not valid: + log_error('Backup validation failed for %s: %s' % (backup_path, msg)) + return False, msg + + # Capture existing ownership before we overwrite + ownership = _get_file_ownership(CONFIG_XML) + + # Safety backup of current config (last resort recovery) + safety_backup = os.path.join(CONFIG_BACKUP_DIR, 'config-pre-rollback.xml') + try: + if os.path.isfile(CONFIG_XML): + shutil.copy2(CONFIG_XML, safety_backup) + log_info('Safety backup created: %s' % safety_backup) + except Exception as e: + log_warning('Could not create safety backup: %s' % str(e)) + # Continue anyway — the rollback is more important + + # Restore the config atomically via temp file + rename + tmp_fd = None + tmp_path = None + try: + conf_dir = os.path.dirname(CONFIG_XML) + tmp_fd, tmp_path = tempfile.mkstemp(dir=conf_dir, prefix='.config_rollback_') + + # Close the fd from mkstemp, copy file content + os.close(tmp_fd) + tmp_fd = None + + shutil.copy2(backup_path, tmp_path) + + # Set permissions — OPNsense expects 0640 + os.chmod(tmp_path, 0o640) + + # Preserve original ownership if we captured it, otherwise use root:wheel + if ownership: + uid, gid = ownership + else: + try: + import pwd + import grp + uid = pwd.getpwnam('root').pw_uid + gid = grp.getgrnam('wheel').gr_gid + except (KeyError, ImportError): + uid, gid = 0, 0 + + try: + os.chown(tmp_path, uid, gid) + except PermissionError: + pass # Best effort + + os.rename(tmp_path, CONFIG_XML) + tmp_path = None # Rename succeeded, don't clean up + log_info('Configuration restored from: %s' % backup_path) + except Exception as e: + log_error('Failed to restore config: %s' % str(e)) + # Clean up failed temp file + if tmp_path and os.path.isfile(tmp_path): + try: + os.unlink(tmp_path) + except OSError: + pass + # Try to restore from safety backup + if os.path.isfile(safety_backup): + try: + shutil.copy2(safety_backup, CONFIG_XML) + log_info('Restored from safety backup after failed rollback') + except Exception: + pass + return False, 'Failed to restore: %s' % str(e) + finally: + if tmp_fd is not None: + os.close(tmp_fd) + + # Remove config cache so PHP reads fresh config + try: + if os.path.isfile(CONFIG_CACHE): + os.unlink(CONFIG_CACHE) + log_info('Config cache removed') + except OSError: + pass + + return True, 'Configuration restored successfully' + + +def apply_reboot(): + """Apply configuration by rebooting the system.""" + log_info('ROLLBACK: Initiating full system reboot') + try: + # Try configd first + if os.path.exists('/var/run/configd.socket'): + subprocess.run( + ['configctl', 'system', 'reboot'], + capture_output=True, timeout=10 + ) + else: + # Direct reboot + subprocess.Popen( + ['/usr/local/etc/rc.reboot'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + return True + except Exception as e: + log_error('Reboot command failed: %s' % str(e)) + # Last resort + try: + subprocess.Popen( + ['shutdown', '-r', 'now'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + return True + except Exception as e2: + log_error('All reboot methods failed: %s' % str(e2)) + return False + + +def apply_reload(): + """Apply configuration by reloading all services (no reboot).""" + log_info('ROLLBACK: Initiating service reload via rc.reload_all') + try: + # rc.reload_all accepts a delay parameter + proc = subprocess.Popen( + ['/usr/local/etc/rc.reload_all'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + # Don't wait for it — it can take a while and we don't want to block + log_info('rc.reload_all started (pid=%d)' % proc.pid) + return True + except Exception as e: + log_error('rc.reload_all failed: %s' % str(e)) + # Fallback: try individual service restarts + log_info('Attempting individual service restarts as fallback') + try: + if os.path.exists('/var/run/configd.socket'): + for cmd in ['filter reload', 'interface reconfigure', + 'dns reload', 'dhcpd restart']: + try: + subprocess.run( + ['configctl'] + cmd.split(), + capture_output=True, timeout=30 + ) + except Exception: + pass + return True + except Exception as e2: + log_error('Fallback service restarts also failed: %s' % str(e2)) + return False + + +def main(): + if len(sys.argv) < 3: + print(json.dumps({ + 'status': 'error', + 'message': 'Usage: rollback.py ' + })) + sys.exit(1) + + backup_file = sys.argv[1] + rollback_method = sys.argv[2] + + # Validate inputs + if not os.path.isfile(backup_file): + msg = 'Backup file does not exist: %s' % backup_file + log_error(msg) + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + if not validate_backup_path(backup_file): + msg = 'Backup file outside allowed directories: %s' % backup_file + log_error(msg) + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + if rollback_method not in ('reboot', 'reload'): + rollback_method = 'reboot' # Default to safest option + log_warning('Unknown rollback method, defaulting to reboot') + + # Prevent rollback during firmware updates + if is_firmware_update_running(): + msg = 'Rollback blocked: firmware update in progress' + log_warning(msg) + print(json.dumps({'status': 'blocked', 'message': msg})) + sys.exit(1) + + # Acquire exclusive lock + lock_fd = acquire_restore_lock() + if lock_fd is None: + msg = 'Another rollback is already in progress' + log_warning(msg) + print(json.dumps({'status': 'locked', 'message': msg})) + sys.exit(1) + + try: + # Step 1: Restore config.xml + log_info('=== ROLLBACK STARTING === backup=%s method=%s' % ( + backup_file, rollback_method)) + + success, msg = restore_config(backup_file) + if not success: + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + # Step 2: Apply the restored config + if rollback_method == 'reboot': + apply_success = apply_reboot() + else: + apply_success = apply_reload() + + if apply_success: + log_info('=== ROLLBACK COMPLETE === method=%s' % rollback_method) + print(json.dumps({ + 'status': 'ok', + 'message': 'Rollback completed (method: %s)' % rollback_method, + 'backup_restored': backup_file, + 'method': rollback_method, + })) + else: + log_error('=== ROLLBACK APPLY FAILED === method=%s' % rollback_method) + # If reload failed, try reboot as last resort + if rollback_method == 'reload': + log_info('Reload failed, falling back to reboot') + apply_reboot() + print(json.dumps({ + 'status': 'partial', + 'message': 'Config restored but service apply failed. Rebooting.', + 'backup_restored': backup_file, + })) + + finally: + release_restore_lock(lock_fd) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py new file mode 100755 index 000000000..6feccd1e8 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py @@ -0,0 +1,368 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Safe Mode Controller + +Usage: + safemode.py start [timeout_seconds] + safemode.py confirm + safemode.py cancel + safemode.py extend [additional_seconds] + +Start: Snapshots current config, launches background timer. +Confirm: Accepts changes, kills timer, clears state. +Cancel: Manually triggers rollback immediately. +Extend: Adds time to the countdown. +""" + +import json +import os +import sys +import subprocess +import time + +# Add parent directory to path for lib imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from lib.common import ( + ensure_volatile_dir, log_info, log_warning, log_error, + read_model_settings, + read_persistent_state, write_persistent_state, clear_persistent_state, + generate_session_token, clear_session_token, + is_safe_mode_active, get_safe_mode_info, + is_firmware_update_running, is_restore_in_progress, + get_latest_backup, get_backup_revision, + write_timer_pid, kill_timer, read_timer_pid, + VOLATILE_DIR, CONFIG_XML, CONFIG_BACKUP_DIR +) + + +def force_config_save(): + """ + Force OPNsense to save the current config, creating a backup. + We do this to ensure we have a backup of the exact running state. + Returns the backup path or None. + """ + try: + # Use configctl to trigger a config save + result = subprocess.run( + ['configctl', 'firmware', 'configure'], + capture_output=True, text=True, timeout=30 + ) + + # Now find the most recent backup + backup = get_latest_backup() + if backup: + log_info('Config backup created: %s' % backup) + return backup + else: + log_error('No backup found after config save') + return None + except Exception as e: + log_error('Failed to force config save: %s' % str(e)) + return None + + +def _launch_timer_daemon(timeout, rollback_method): + """Launch the background timer daemon process. Returns (pid, error_msg).""" + timer_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'timer_daemon.py') + try: + proc = subprocess.Popen( + [sys.executable, timer_script, str(int(timeout)), rollback_method], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True # Detach from parent + ) + write_timer_pid(proc.pid) + return proc.pid, None + except Exception as e: + return None, str(e) + + +def start_safe_mode(timeout_override=None): + """Enter safe mode. Snapshot config and start countdown timer.""" + result = {'status': 'error', 'message': ''} + + # Pre-flight checks + settings = read_model_settings() + if not settings['enabled']: + result['message'] = 'Auto-rollback plugin is disabled. Enable it in System > Auto Rollback.' + print(json.dumps(result)) + return + + if is_firmware_update_running(): + result['message'] = 'Cannot enter safe mode during a firmware update.' + print(json.dumps(result)) + return + + if is_restore_in_progress(): + result['message'] = 'A restore operation is already in progress.' + print(json.dumps(result)) + return + + if is_safe_mode_active(): + info = get_safe_mode_info() + result['message'] = 'Safe mode is already active (%d seconds remaining).' % info['remaining_seconds'] + result['status'] = 'already_active' + result.update(info) + print(json.dumps(result)) + return + + # Determine timeout — use is not None to allow timeout_override=0 edge case + if timeout_override is not None: + timeout = timeout_override + else: + timeout = settings['timeout'] + timeout = max(30, min(3600, int(timeout))) + + # Step 1: Get the current config as our "known good" backup + # The most recent backup IS the current running config (saved moments ago) + backup = get_latest_backup() + if not backup: + # Force a save to create one + backup = force_config_save() + if not backup: + result['message'] = 'Failed to create configuration backup.' + print(json.dumps(result)) + return + + backup_revision = get_backup_revision(backup) + now = time.time() + expiry = now + timeout + + # Step 2: Generate session token for the confirmation UI + token = generate_session_token() + + # Step 3: Write persistent state (survives reboot for early-boot recovery) + state = { + 'mode': 'safemode', + 'backup_file': backup, + 'backup_revision': backup_revision, + 'start_time': now, + 'expiry_time': expiry, + 'timeout': timeout, + 'rollback_method': settings['rollback_method'], + } + write_persistent_state(state) + + # Step 4: Launch background timer process + pid, err = _launch_timer_daemon(timeout, settings['rollback_method']) + if pid is None: + log_error('Failed to start timer daemon: %s' % err) + clear_persistent_state() + clear_session_token() + result['message'] = 'Failed to start countdown timer: %s' % err + print(json.dumps(result)) + return + + log_info('Safe mode started: timeout=%ds, backup=%s, timer_pid=%d' % ( + timeout, backup, pid)) + + # Step 5: Trigger git backup if available + try: + subprocess.run( + ['configctl', 'firmware', 'configure'], + capture_output=True, timeout=10 + ) + except Exception: + pass # Non-critical + + result = { + 'status': 'ok', + 'message': 'Safe mode activated. You have %d seconds to confirm changes.' % timeout, + 'timeout': timeout, + 'remaining_seconds': timeout, + 'expiry_time': expiry, + 'backup_file': backup, + 'backup_revision': backup_revision, + 'token': token, + 'rollback_method': settings['rollback_method'], + } + print(json.dumps(result)) + + +def confirm_safe_mode(): + """Confirm changes and exit safe mode gracefully.""" + result = {'status': 'error', 'message': ''} + + if not is_safe_mode_active(): + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + # Kill the background timer + kill_timer() + + # Clear all state + state = read_persistent_state() + clear_persistent_state() + clear_session_token() + + log_info('Safe mode confirmed. Changes accepted. Previous backup: %s' % ( + state.get('backup_file', 'unknown') if state else 'unknown')) + + result = { + 'status': 'ok', + 'message': 'Changes confirmed. Safe mode deactivated.', + } + print(json.dumps(result)) + + +def cancel_safe_mode(): + """Cancel changes and rollback immediately.""" + result = {'status': 'error', 'message': ''} + + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + # Kill the background timer first + kill_timer() + + backup_file = state.get('backup_file', '') + rollback_method = state.get('rollback_method', 'reboot') + + if not backup_file or not os.path.isfile(backup_file): + clear_persistent_state() + clear_session_token() + result['message'] = 'Backup file not found: %s' % backup_file + print(json.dumps(result)) + return + + log_info('Safe mode cancelled. Rolling back to: %s (method: %s)' % ( + backup_file, rollback_method)) + + # Clear state before rollback (important: prevents re-entrancy) + clear_persistent_state() + clear_session_token() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + proc_result = subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + capture_output=True, text=True, timeout=300 + ) + if proc_result.returncode == 0: + result = { + 'status': 'ok', + 'message': 'Rollback initiated (method: %s). System is reverting.' % rollback_method, + 'rollback_method': rollback_method, + } + else: + result['message'] = 'Rollback script failed: %s' % proc_result.stderr + except Exception as e: + result['message'] = 'Rollback execution failed: %s' % str(e) + + print(json.dumps(result)) + + +def extend_safe_mode(additional_seconds=None): + """Extend the safe mode countdown timer.""" + result = {'status': 'error', 'message': ''} + + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + if additional_seconds is None: + additional_seconds = 60 # Default extension + + additional_seconds = max(10, min(3600, int(additional_seconds))) + + # Update expiry in persistent state + new_expiry = state.get('expiry_time', time.time()) + additional_seconds + state['expiry_time'] = new_expiry + write_persistent_state(state) + + # Kill old timer and start a new one with remaining time + kill_timer() + remaining = int(new_expiry - time.time()) + if remaining > 0: + rollback_method = state.get('rollback_method', 'reboot') + pid, err = _launch_timer_daemon(remaining, rollback_method) + if pid is None: + log_error('Failed to restart timer: %s' % err) + else: + remaining = 0 + + log_info('Safe mode extended by %d seconds. New remaining: %d seconds.' % ( + additional_seconds, remaining)) + + result = { + 'status': 'ok', + 'message': 'Timer extended by %d seconds. %d seconds remaining.' % ( + additional_seconds, remaining), + 'remaining_seconds': remaining, + 'expiry_time': new_expiry, + } + print(json.dumps(result)) + + +if __name__ == '__main__': + ensure_volatile_dir() + + if len(sys.argv) < 2: + print(json.dumps({'status': 'error', 'message': 'Usage: safemode.py start|confirm|cancel|extend [args]'})) + sys.exit(1) + + action = sys.argv[1].lower() + + if action == 'start': + timeout = None + if len(sys.argv) > 2: + try: + timeout = int(sys.argv[2]) + except ValueError: + print(json.dumps({'status': 'error', 'message': 'Invalid timeout value: %s' % sys.argv[2]})) + sys.exit(1) + start_safe_mode(timeout) + elif action == 'confirm': + confirm_safe_mode() + elif action == 'cancel': + cancel_safe_mode() + elif action == 'extend': + extra = None + if len(sys.argv) > 2: + try: + extra = int(sys.argv[2]) + except ValueError: + print(json.dumps({'status': 'error', 'message': 'Invalid seconds value: %s' % sys.argv[2]})) + sys.exit(1) + extend_safe_mode(extra) + else: + print(json.dumps({'status': 'error', 'message': 'Unknown action: %s' % action})) + sys.exit(1) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py new file mode 100755 index 000000000..017201806 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py @@ -0,0 +1,145 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Status Reporter + +Returns the current state of the auto-rollback system as JSON. +Used by the dashboard widget, API, and CLI. + +Usage: status.py (no arguments) +""" + +import json +import os +import sys +import time + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + ensure_volatile_dir, + read_model_settings, + read_persistent_state, read_session_token, + read_timer_pid, is_restore_in_progress, + WATCHDOG_FAIL_COUNT_FILE, WATCHDOG_LAST_CONFIG_FILE, +) + + +def get_watchdog_status(): + """Get the watchdog subsystem status.""" + fail_count = 0 + last_config_time = 0 + last_config_backup = '' + + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + with open(WATCHDOG_FAIL_COUNT_FILE, 'r') as f: + fail_count = int(f.read().strip()) + except (ValueError, IOError): + pass + + try: + if os.path.isfile(WATCHDOG_LAST_CONFIG_FILE): + with open(WATCHDOG_LAST_CONFIG_FILE, 'r') as f: + data = json.load(f) + last_config_time = data.get('time', 0) + last_config_backup = data.get('backup', '') + except (json.JSONDecodeError, IOError): + pass + + return { + 'fail_count': fail_count, + 'last_config_change': last_config_time, + 'last_config_backup': last_config_backup, + } + + +def main(): + ensure_volatile_dir() + + now = time.time() + settings = read_model_settings() + state = read_persistent_state() + watchdog = get_watchdog_status() + + # Determine safe mode status + safe_mode_active = False + safe_mode_remaining = 0 + safe_mode_info = {} + + if state and state.get('mode') == 'safemode': + expiry = state.get('expiry_time', 0) + remaining = max(0, expiry - now) + timer_pid = read_timer_pid() + safe_mode_active = remaining > 0 or timer_pid is not None + + safe_mode_info = { + 'backup_file': state.get('backup_file', ''), + 'backup_revision': state.get('backup_revision', ''), + 'start_time': state.get('start_time', 0), + 'expiry_time': expiry, + 'remaining_seconds': int(remaining), + 'timeout': state.get('timeout', 0), + 'rollback_method': state.get('rollback_method', 'reboot'), + 'timer_pid': timer_pid, + } + safe_mode_remaining = int(remaining) + + # Determine overall system state + if is_restore_in_progress(): + system_state = 'restoring' + elif safe_mode_active: + system_state = 'safe_mode' + elif settings['enabled']: + system_state = 'armed' + else: + system_state = 'disabled' + + result = { + 'status': 'ok', + 'timestamp': now, + 'system_state': system_state, + 'settings': settings, + 'safe_mode': { + 'active': safe_mode_active, + 'remaining_seconds': safe_mode_remaining, + **safe_mode_info, + }, + 'watchdog': { + 'enabled': settings['watchdog_enabled'], + **watchdog, + }, + 'token': read_session_token(), + } + + print(json.dumps(result, indent=2)) + + +if __name__ == '__main__': + try: + main() + except Exception as e: + print(json.dumps({'status': 'error', 'message': str(e)})) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py new file mode 100755 index 000000000..15ddc9bbe --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py @@ -0,0 +1,198 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Timer Daemon + +This is a background process that counts down and triggers rollback +if not killed before expiry. It is the PRIMARY rollback trigger. + +Design: + - Launched by safemode.py start + - Double-forks to fully detach from configd parent process + - Sleeps in 1-second intervals (allows responsive cancellation via SIGTERM) + - On expiry: reads the backup path from persistent state and executes rollback + - On SIGTERM: exits cleanly (safe mode was confirmed or cancelled) + - PID is stored in /var/run/autorollback/timer.pid + +Usage: timer_daemon.py +""" + +import os +import sys +import signal +import time +import subprocess + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + read_persistent_state, clear_persistent_state, clear_session_token, + clean_timer_pid, write_timer_pid, VOLATILE_DIR +) + +# Global flag for clean shutdown +_shutdown = False + + +def handle_sigterm(signum, frame): + """Handle SIGTERM for clean shutdown (safe mode confirmed/cancelled).""" + global _shutdown + _shutdown = True + + +def daemonize(): + """ + Double-fork to fully detach from the parent process (configd). + + This ensures the timer daemon survives even if configd restarts, + and that configd doesn't block waiting for our exit. + """ + # First fork — exit parent (returns control to configd) + pid = os.fork() + if pid > 0: + # Parent: exit immediately so configd doesn't block + os._exit(0) + + # First child: create new session + os.setsid() + + # Second fork — prevent reacquiring a controlling terminal + pid = os.fork() + if pid > 0: + # First child exits + os._exit(0) + + # Second child: the actual daemon process + # Redirect standard file descriptors to /dev/null + devnull = os.open(os.devnull, os.O_RDWR) + try: + os.dup2(devnull, 0) # stdin + os.dup2(devnull, 1) # stdout + os.dup2(devnull, 2) # stderr + finally: + if devnull > 2: + os.close(devnull) + + # Update PID file with our actual daemon PID + write_timer_pid(os.getpid()) + + +def run_timer(timeout, rollback_method): + """Main timer loop. Counts down and triggers rollback on expiry.""" + global _shutdown + + # Register signal handlers + signal.signal(signal.SIGTERM, handle_sigterm) + signal.signal(signal.SIGINT, handle_sigterm) + + log_info('Timer daemon started: timeout=%ds, method=%s, pid=%d' % ( + timeout, rollback_method, os.getpid())) + + # Count down in 1-second intervals + elapsed = 0 + while elapsed < timeout: + if _shutdown: + log_info('Timer daemon received shutdown signal. Exiting cleanly.') + clean_timer_pid() + sys.exit(0) + + time.sleep(1) + elapsed += 1 + + # Timer expired! Time to rollback. + log_warning('SAFE MODE TIMER EXPIRED after %d seconds. Initiating rollback.' % timeout) + + # Read the backup file from persistent state + state = read_persistent_state() + if state is None: + log_error('Timer expired but no persistent state found. Someone else handled it.') + clean_timer_pid() + sys.exit(0) + + backup_file = state.get('backup_file', '') + if not backup_file or not os.path.isfile(backup_file): + log_error('Timer expired but backup file missing: %s' % backup_file) + clear_persistent_state() + clear_session_token() + clean_timer_pid() + sys.exit(1) + + # Clear state BEFORE rollback to prevent re-entrancy + clear_persistent_state() + clear_session_token() + clean_timer_pid() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + log_info('Executing rollback: backup=%s, method=%s' % (backup_file, rollback_method)) + result = subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, text=True, timeout=300 + ) + if result.returncode != 0: + log_error('Rollback script failed: %s' % result.stderr) + sys.exit(1) + else: + log_info('Rollback script completed successfully.') + except subprocess.TimeoutExpired: + log_error('Rollback script timed out after 300 seconds.') + sys.exit(1) + except Exception as e: + log_error('Rollback execution failed: %s' % str(e)) + sys.exit(1) + + +def main(): + if len(sys.argv) < 3: + print('Usage: timer_daemon.py ', file=sys.stderr) + sys.exit(1) + + try: + timeout = int(sys.argv[1]) + except ValueError: + print('Invalid timeout value: %s' % sys.argv[1], file=sys.stderr) + sys.exit(1) + + if timeout <= 0: + print('Timeout must be positive, got: %d' % timeout, file=sys.stderr) + sys.exit(1) + + rollback_method = sys.argv[2] + if rollback_method not in ('reboot', 'reload'): + rollback_method = 'reboot' + + # Double-fork to fully detach from configd + daemonize() + + # Now running as a proper daemon + run_timer(timeout, rollback_method) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py new file mode 100755 index 000000000..2ec888404 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py @@ -0,0 +1,354 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Connectivity Watchdog + +Called by cron every minute. This is Layer 2 of the safety system: + Layer 1: Timer daemon (primary, second-precise) + Layer 2: This watchdog (secondary, minute-precise) + Layer 3: Early boot recovery (tertiary, crash recovery) + +This script has TWO functions: + +1. CRON SAFETY NET for Safe Mode: + If the timer daemon died but safe mode state is still pending and expired, + trigger rollback. This catches the case where the timer process crashed. + +2. CONNECTIVITY WATCHDOG (always-on): + After any config change, run health checks. If checks fail N consecutive + times within the grace period after a config change, rollback to the + last known-good config. + +Usage: watchdog.py (no arguments, called by cron) +""" + +import json +import os +import re +import shlex +import subprocess +import sys +import time + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + read_model_settings, ensure_volatile_dir, + read_persistent_state, clear_persistent_state, clear_session_token, + is_restore_in_progress, is_firmware_update_running, + get_default_gateway, get_previous_backup, + read_timer_pid, kill_timer, clean_timer_pid, + write_timer_pid, + VOLATILE_DIR, WATCHDOG_FAIL_COUNT_FILE, WATCHDOG_LAST_CONFIG_FILE, + CONFIG_XML +) + + +def get_fail_count(): + """Read the consecutive failure count.""" + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + with open(WATCHDOG_FAIL_COUNT_FILE, 'r') as f: + return int(f.read().strip()) + except (ValueError, IOError): + pass + return 0 + + +def set_fail_count(count): + """Write the consecutive failure count.""" + try: + with open(WATCHDOG_FAIL_COUNT_FILE, 'w') as f: + f.write(str(count)) + except IOError: + pass + + +def clear_fail_count(): + """Reset the failure counter.""" + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + os.unlink(WATCHDOG_FAIL_COUNT_FILE) + except OSError: + pass + + +def get_last_config_change(): + """Read the last config change record (time, new backup, previous backup).""" + try: + if os.path.isfile(WATCHDOG_LAST_CONFIG_FILE): + with open(WATCHDOG_LAST_CONFIG_FILE, 'r') as f: + data = json.load(f) + return ( + data.get('time', 0), + data.get('backup', ''), + data.get('previous_backup', ''), + ) + except (json.JSONDecodeError, IOError): + pass + return 0, '', '' + + +def run_health_check(command, pattern, gateway=None): + """ + Run a health check command and match its output against a pattern. + Returns (passed, output). + + Security: gateway is already validated by get_default_gateway() via + ipaddress.ip_address(). We still use shlex.quote() for defense-in-depth + since the command runs with shell=True. + """ + if not command: + return True, 'No command configured' + + # Substitute %gateway% placeholder with safely quoted value + if '%gateway%' in command: + if gateway: + command = command.replace('%gateway%', shlex.quote(gateway)) + else: + # No gateway available, skip this check + return True, 'No gateway available, skipping check' + + try: + result = subprocess.run( + command, shell=True, + capture_output=True, text=True, timeout=15 + ) + output = result.stdout + result.stderr + + if pattern: + try: + if re.search(pattern, output): + return True, output.strip()[:200] + else: + return False, 'Pattern "%s" not found in output' % pattern + except re.error as e: + log_warning('Watchdog: invalid regex pattern "%s": %s — treating as pass' % (pattern, e)) + return True, 'Invalid pattern (skipped)' + else: + # No pattern — just check exit code + return result.returncode == 0, output.strip()[:200] + + except subprocess.TimeoutExpired: + return False, 'Command timed out after 15 seconds' + except Exception as e: + return False, 'Command error: %s' % str(e) + + +def check_safe_mode_expired(): + """ + CRON SAFETY NET: Check if safe mode timer expired but daemon died. + This is the secondary trigger — catches crashed timer daemons. + """ + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + return False + + expiry = state.get('expiry_time', 0) + now = time.time() + + if now < expiry: + # Not expired yet — check if timer daemon is still alive + if read_timer_pid() is None: + remaining = int(expiry - now) + log_warning('Safe mode timer daemon died! %d seconds remaining. Restarting timer.' % remaining) + # Restart the timer daemon + rollback_method = state.get('rollback_method', 'reboot') + timer_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'timer_daemon.py') + try: + proc = subprocess.Popen( + [sys.executable, timer_script, str(remaining), rollback_method], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + # Don't write PID here — the daemon writes its own after double-fork. + # The Popen PID is the pre-fork process which exits immediately. + log_info('Timer daemon restarted with %d seconds remaining' % remaining) + except Exception as e: + log_error('Failed to restart timer daemon: %s' % str(e)) + return False + + # Timer expired and daemon is not running — we need to rollback! + log_warning('CRON SAFETY NET: Safe mode expired %d seconds ago. Timer daemon missing. Triggering rollback.' % ( + int(now - expiry))) + + backup_file = state.get('backup_file', '') + rollback_method = state.get('rollback_method', 'reboot') + + if not backup_file or not os.path.isfile(backup_file): + log_error('Cannot rollback: backup file missing: %s' % backup_file) + clear_persistent_state() + clear_session_token() + return True + + # Clear state before rollback + clear_persistent_state() + clear_session_token() + clean_timer_pid() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, timeout=300 + ) + except Exception as e: + log_error('Cron safety net rollback failed: %s' % str(e)) + + return True + + +def run_watchdog(settings): + """ + CONNECTIVITY WATCHDOG: Run health checks after config changes. + """ + last_change_time, last_backup, previous_backup = get_last_config_change() + + if last_change_time == 0: + # No recent config change recorded — nothing to watch + clear_fail_count() + return + + now = time.time() + age = now - last_change_time + grace = settings['grace_period'] + + # Only run checks within the grace period after a config change + if age > grace + 300: + # More than grace+5min since last change — stop watching + clear_fail_count() + return + + # Still within grace period — skip checks until grace period elapses + if age < grace: + clear_fail_count() # Reset stale count from previous config change + return + + # Run health checks + gateway = get_default_gateway() + + check1_ok, check1_msg = run_health_check( + settings['check_command'], settings['check_pattern'], gateway) + + check2_ok = True + check2_msg = '' + if settings.get('check_command_2'): + check2_ok, check2_msg = run_health_check( + settings['check_command_2'], settings['check_pattern_2'], gateway) + + all_ok = check1_ok and check2_ok + + if all_ok: + fails = get_fail_count() + if fails > 0: + log_info('Watchdog: health check recovered after %d failures' % fails) + clear_fail_count() + return + + # Check failed + fails = get_fail_count() + 1 + set_fail_count(fails) + + log_warning('Watchdog: health check failed (%d/%d). Check1: %s. Check2: %s' % ( + fails, settings['fail_threshold'], + check1_msg if not check1_ok else 'OK', + check2_msg if not check2_ok else 'OK')) + + if fails >= settings['fail_threshold']: + log_warning('WATCHDOG: Failure threshold reached (%d/%d). Triggering rollback!' % ( + fails, settings['fail_threshold'])) + + # Find the correct backup to restore — the one BEFORE the config change + # that broke connectivity (previous_backup), NOT the new one. + backup_file = None + if previous_backup and os.path.isfile(previous_backup): + backup_file = previous_backup + log_info('Watchdog: rolling back to pre-change backup: %s' % backup_file) + else: + # Fallback: try to find the second-most-recent backup + backup_file = get_previous_backup() + if backup_file: + log_info('Watchdog: rolling back to previous backup: %s' % backup_file) + else: + log_error('Watchdog: No suitable backup file available for rollback') + clear_fail_count() + return + + rollback_method = settings['rollback_method'] + clear_fail_count() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, timeout=300 + ) + except Exception as e: + log_error('Watchdog rollback failed: %s' % str(e)) + + +def main(): + result = {'status': 'ok', 'checks': []} + + # Skip if restore is in progress (re-entrancy guard) + if is_restore_in_progress(): + result['message'] = 'Restore in progress, skipping watchdog' + print(json.dumps(result)) + return + + # Skip during firmware updates + if is_firmware_update_running(): + result['message'] = 'Firmware update in progress, skipping watchdog' + print(json.dumps(result)) + return + + # Check 1: Safe mode cron safety net + if check_safe_mode_expired(): + result['message'] = 'Safe mode expired — rollback triggered by cron safety net' + print(json.dumps(result)) + return + + # Check 2: Connectivity watchdog + settings = read_model_settings() + if settings['enabled'] and settings['watchdog_enabled']: + run_watchdog(settings) + result['message'] = 'Watchdog check completed' + else: + result['message'] = 'Watchdog disabled' + + print(json.dumps(result)) + + +if __name__ == '__main__': + ensure_volatile_dir() + main() diff --git a/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf b/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf new file mode 100644 index 000000000..8550cad96 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf @@ -0,0 +1,48 @@ +[safemode.start] +command:/usr/local/opnsense/scripts/autorollback/safemode.py start +parameters:%s +type:script_output +message:Starting auto-rollback safe mode +description:Start safe mode with configuration snapshot + +[safemode.confirm] +command:/usr/local/opnsense/scripts/autorollback/safemode.py confirm +parameters: +type:script_output +message:Confirming safe mode changes +description:Confirm configuration changes and exit safe mode + +[safemode.cancel] +command:/usr/local/opnsense/scripts/autorollback/safemode.py cancel +parameters: +type:script_output +message:Cancelling safe mode - reverting changes +description:Cancel safe mode and revert to previous configuration + +[safemode.extend] +command:/usr/local/opnsense/scripts/autorollback/safemode.py extend +parameters:%s +type:script_output +message:Extending safe mode timer +description:Extend the safe mode countdown timer + +[rollback.execute] +command:/usr/local/opnsense/scripts/autorollback/rollback.py +parameters:%s +type:script_output +message:Executing configuration rollback +description:Roll back to a previous configuration + +[watchdog.check] +command:/usr/local/opnsense/scripts/autorollback/watchdog.py +parameters: +type:script_output +message:Running watchdog health check +description:Connectivity watchdog health check + +[status] +command:/usr/local/opnsense/scripts/autorollback/status.py +parameters: +type:script_output +message:Getting auto-rollback status +description:Report current auto-rollback state diff --git a/sysutils/autorollback/src/opnsense/service/templates/OPNsense/AutoRollback/+TARGETS b/sysutils/autorollback/src/opnsense/service/templates/OPNsense/AutoRollback/+TARGETS new file mode 100644 index 000000000..e69de29bb diff --git a/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js b/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js new file mode 100644 index 000000000..14d32c933 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2026 MP Lindsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * OPNsense Auto Rollback - Persistent Global Banner + * + * This script injects a countdown banner at the top of EVERY page when + * safe mode is active. It polls the status API and shows/hides the banner + * dynamically. Includes confirm/revert buttons for immediate action + * without navigating to the plugin settings page. + * + * This file should be included in the base layout template or via a + * system hook that adds JavaScript to every page. + * + * Design: Non-intrusive but unmissable. Fixed position below the navbar, + * full-width, with a pulsing amber background during safe mode. + */ +(function() { + 'use strict'; + + // Don't double-initialize + if (window._autorollbackBannerInit) return; + window._autorollbackBannerInit = true; + + const POLL_INTERVAL_IDLE = 10000; // 10s when not in safe mode + const POLL_INTERVAL_ACTIVE = 1000; // 1s during safe mode + const BANNER_ID = 'autorollback-global-banner'; + + let pollTimer = null; + let currentPollInterval = POLL_INTERVAL_IDLE; + let bannerElement = null; + + function createBanner() { + if (document.getElementById(BANNER_ID)) return; + + const banner = document.createElement('div'); + banner.id = BANNER_ID; + banner.innerHTML = ` + +
+ + Safe Mode Active + -- + + + +
+
+ `; + + document.body.appendChild(banner); + bannerElement = banner; + + // Button event listeners + document.getElementById('arb-confirm').addEventListener('click', function() { + this.disabled = true; + this.textContent = 'Confirming...'; + apiPost('confirm', function() { + pollStatus(); + }); + }); + + document.getElementById('arb-revert').addEventListener('click', function() { + if (confirm('Revert to the previous configuration?\nThe system may reboot.')) { + this.disabled = true; + this.textContent = 'Reverting...'; + apiPost('cancel', function() { + pollStatus(); + }); + } + }); + + document.getElementById('arb-extend').addEventListener('click', function() { + apiPost('extend', function() { + pollStatus(); + }, {seconds: 60}); + }); + } + + function showBanner(remaining, total) { + if (!bannerElement) createBanner(); + + bannerElement.classList.add('visible'); + + // Danger mode when under 20% time remaining + let pct = total > 0 ? remaining / total : 0; + if (pct <= 0.2) { + bannerElement.classList.add('danger'); + } else { + bannerElement.classList.remove('danger'); + } + + // Update countdown + let mins = Math.floor(remaining / 60); + let secs = remaining % 60; + let display = mins > 0 + ? mins + 'm ' + String(secs).padStart(2, '0') + 's' + : secs + 's'; + document.getElementById('arb-countdown').textContent = display; + + // Progress bar + document.getElementById('arb-progress').style.width = (pct * 100) + '%'; + + // Re-enable buttons + let confirmBtn = document.getElementById('arb-confirm'); + let revertBtn = document.getElementById('arb-revert'); + confirmBtn.disabled = false; + confirmBtn.innerHTML = '✓ CONFIRM'; + revertBtn.disabled = false; + revertBtn.innerHTML = '↺ REVERT'; + + // Push body content down to avoid overlap + document.body.style.paddingTop = bannerElement.offsetHeight + 'px'; + } + + function hideBanner() { + if (bannerElement) { + bannerElement.classList.remove('visible'); + document.body.style.paddingTop = ''; + } + } + + function apiPost(action, callback, data) { + let xhr = new XMLHttpRequest(); + xhr.open('POST', '/api/autorollback/service/' + action, true); + xhr.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded'); + + // Include CSRF token if available (OPNsense uses jQuery for this) + let csrfToken = ''; + if (typeof $ !== 'undefined' && $.ajaxSettings && $.ajaxSettings.headers) { + csrfToken = $.ajaxSettings.headers['X-CSRFToken'] || ''; + } + if (csrfToken) { + xhr.setRequestHeader('X-CSRFToken', csrfToken); + } + + xhr.onload = function() { + if (callback) callback(); + }; + + let body = ''; + if (data) { + body = Object.keys(data).map(function(k) { + return encodeURIComponent(k) + '=' + encodeURIComponent(data[k]); + }).join('&'); + } + xhr.send(body); + } + + function pollStatus() { + let xhr = new XMLHttpRequest(); + xhr.open('GET', '/api/autorollback/service/status', true); + xhr.onload = function() { + try { + let data = JSON.parse(xhr.responseText); + let state = data.system_state || 'disabled'; + let safeMode = data.safe_mode || {}; + + if (state === 'safe_mode' && safeMode.remaining_seconds > 0) { + showBanner(safeMode.remaining_seconds, safeMode.timeout || 120); + setPolling(POLL_INTERVAL_ACTIVE); + } else { + hideBanner(); + setPolling(POLL_INTERVAL_IDLE); + } + } catch (e) { + // Silently ignore parse errors — API might be temporarily unavailable + } + }; + xhr.onerror = function() { + // API unreachable — could be mid-rollback, keep polling + }; + xhr.send(); + } + + function setPolling(interval) { + if (interval === currentPollInterval && pollTimer) return; + currentPollInterval = interval; + if (pollTimer) clearInterval(pollTimer); + pollTimer = setInterval(pollStatus, interval); + } + + // Initialize + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', function() { + pollStatus(); + setPolling(POLL_INTERVAL_IDLE); + }); + } else { + pollStatus(); + setPolling(POLL_INTERVAL_IDLE); + } +})(); diff --git a/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js new file mode 100644 index 000000000..2dddf459c --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2026 MP Lindsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * OPNsense Auto Rollback - Dashboard Widget + * + * Shows real-time safe mode status with countdown, one-click + * start/confirm/cancel controls directly from the dashboard. + */ +export default class AutoRollback extends BaseWidget { + constructor() { + super(); + this.tickTimeout = 2; + } + + getMarkup() { + return $(` +
+ + +
+ + + Loading + + +
+ + + + + +
+ + + + +
+ + + +
+ `); + } + + async onMarkupRendered() { + const self = this; + + $('#arw-btn-start').on('click', async function() { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/start', {}, 'POST'); + self.tickTimeout = 1; + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + + $('#arw-btn-confirm').on('click', async function() { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/confirm', {}, 'POST'); + self.tickTimeout = 2; + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + + $('#arw-btn-revert').on('click', async function() { + if (confirm('Revert to previous configuration? The system may reboot.')) { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/cancel', {}, 'POST'); + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + } + }); + + $('#arw-btn-extend').on('click', async function() { + try { + await self.ajaxCall('/api/autorollback/service/extend', {seconds: 60}, 'POST'); + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + } + + async onWidgetTick() { + try { + const data = await this.ajaxCall('/api/autorollback/service/status'); + if (!data || data.status === 'error') { + this._renderError(); + return; + } + this._renderStatus(data); + } catch(e) { + this._renderError(); + } + } + + _renderStatus(data) { + const state = data.system_state || 'disabled'; + const safeMode = data.safe_mode || {}; + const watchdog = data.watchdog || {}; + + const badge = $('#arw-badge'); + const dot = $('#arw-dot'); + const badgeText = $('#arw-badge-text'); + + badge.css({'background': '#e9ecef', 'color': '#495057'}); + dot.css({'background': '#6c757d', 'animation': 'none'}); + + if (state === 'safe_mode') { + badge.css({'background': '#fff3cd', 'color': '#856404'}); + dot.css({'background': '#f0ad4e', 'animation': 'arw-blink 1s infinite'}); + badgeText.text('Safe Mode'); + this.tickTimeout = 1; + } else if (state === 'restoring') { + badge.css({'background': '#f8d7da', 'color': '#721c24'}); + dot.css({'background': '#d9534f', 'animation': 'arw-blink 0.5s infinite'}); + badgeText.text('Restoring'); + } else if (state === 'armed') { + badge.css({'background': '#d4edda', 'color': '#155724'}); + dot.css({'background': '#28a745'}); + badgeText.text('Armed'); + this.tickTimeout = 5; + } else { + badgeText.text('Disabled'); + this.tickTimeout = 10; + } + + const method = data.settings?.rollback_method || ''; + $('#arw-method').text(method === 'reboot' ? 'reboot' : method === 'reload' ? 'reload' : ''); + + if (state === 'safe_mode' && safeMode.remaining_seconds > 0) { + const remaining = Math.round(safeMode.remaining_seconds); + const total = safeMode.timeout || 120; + const pct = total > 0 ? (remaining / total) * 100 : 0; + + let mins = Math.floor(remaining / 60); + let secs = remaining % 60; + let display = mins > 0 + ? `${mins}m ${String(secs).padStart(2,'0')}s` + : `${secs}s`; + $('#arw-countdown').html(display); + + let barColor = pct > 50 ? '#5cb85c' : (pct > 20 ? '#f0ad4e' : '#d9534f'); + $('#arw-bar').css({'width': pct + '%', 'background': barColor}); + + $('#arw-countdown-section').show(); + } else { + $('#arw-countdown-section').hide(); + } + + $('#arw-btn-start').toggle(state === 'armed').prop('disabled', false); + $('#arw-btn-confirm').toggle(state === 'safe_mode').prop('disabled', false); + $('#arw-btn-revert').toggle(state === 'safe_mode').prop('disabled', false); + $('#arw-btn-extend').toggle(state === 'safe_mode').prop('disabled', false); + + if (watchdog.enabled) { + let wdText = 'Watchdog: monitoring'; + if (watchdog.fail_count > 0) { + wdText = `Watchdog: ${watchdog.fail_count} failure(s)`; + } + $('#arw-watchdog-text').text(wdText); + $('#arw-watchdog').show(); + } else { + $('#arw-watchdog').hide(); + } + } + + _renderError() { + $('#arw-badge').css({'background': '#f8d7da', 'color': '#721c24'}); + $('#arw-badge-text').text('Error'); + $('#arw-countdown-section').hide(); + $('#arw-btn-start, #arw-btn-confirm, #arw-btn-revert, #arw-btn-extend').hide(); + } +} diff --git a/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml b/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml new file mode 100644 index 000000000..b2518271c --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml @@ -0,0 +1,11 @@ + + + AutoRollback.js + + /api/autorollback/service/status + + + Auto Rollback + + +