File: //usr/share/imunify360-webshield/webshield-watchdog
#!/opt/imunify360/venv/bin/python3
"""
The watchdog script that checks the webshield and restarts it if error found
"""
import json
import logging
import logging.handlers
import os
import requests
import subprocess
import sys
import time
import uuid
import yaml
from configparser import ConfigParser
from pathlib import Path
import sentry_sdk
from sentry_sdk import configure_scope
logging.raiseExceptions = False
class Watchdog:
    port = 52224
    request_timeout = 4
    subprocess_timeout = 30
    config_path = '/etc/sysconfig/imunify360/imunify360-merged.config'
    user_agent = 'Webshield-watchdog-agent'
    sentry_dsn_path = '/usr/share/imunify360-webshield/sentry'
    package_name = 'imunify360-webshield-bundle'
    license_path = '/var/imunify360/license.json'
    flag_path = '/var/imunify360/webshield_broken'
    integration_path = '/etc/sysconfig/imunify360/integration.conf'
    services_full = ('imunify360-webshield', 'imunify360-webshield-ssl-cache')
    services_ws_only = ('imunify360-webshield',)
    mode_flag_path = '/usr/share/imunify360-webshield/modularity_mode'
    wafd_sock_path = "/var/run/imunify360/libiplists-daemon.sock"
    wafd_check_binary = "i360_wafd_check"
    def __init__(self):
        self.services = (self.services_ws_only if
            os.path.exists(self.integration_path) else self.services_full)
        self.is_enabled = self._get_config_status()
        self.is_running = self._get_current_status()
        self.sentry_dsn = self._get_dsn()
        self.log_level = logging.INFO
        self.logger = self._setup_logging()
    def _setup_logging(self):
        logger = logging.getLogger('imunify360-webshield-watchdog')
        logger.setLevel(self.log_level)
        handler = logging.handlers.SysLogHandler('/dev/log')
        formatter = logging.Formatter('%(name)s: %(message)s')
        handler.formatter = formatter
        logger.addHandler(handler)
        self._init_sentry()
        return logger
    @classmethod
    def _get_server_id(cls):
        try:
            with open(cls.license_path) as f:
                data = json.load(f)
        except Exception:
            return 'none'
        return data.get('id', 'none')
    @classmethod
    def _get_dsn(cls):
        try:
            with open(cls.sentry_dsn_path) as f:
                return f.read().strip()
        except Exception:
            return
    def _init_sentry(self):
        sentry_sdk.init(dsn=self.sentry_dsn, release=self._imunify360_version())
        with configure_scope() as scope:
            scope.user = {'id': self._get_server_id()}
    @classmethod
    def _get_config_status(cls):
        with open(cls.config_path) as f:
            parsed_config = yaml.safe_load(f)
        if not 'WEBSHIELD' in parsed_config:
            return False
        return parsed_config["WEBSHIELD"].get('enable', False)
    def _get_current_status(self, attempts=3, wait=5):
        if not self.is_standalone():
            return False
        for i in range(attempts):
            errors = 0
            for service in self.services:
                try:
                    proc = subprocess.run(['service', service, 'status'],
                        stdout=subprocess.DEVNULL,
                        stderr=subprocess.DEVNULL,
                        timeout=self.subprocess_timeout)
                except subprocess.TimeoutExpired:
                    errors = 124
                    continue
                errors += proc.returncode
            if not errors:
                return True
            time.sleep(wait)
        return False
    def _make_http_request(self, i):
        url = "http://0.0.0.0:{}/selfcheck?uuid={}".format(
            self.port, uuid.uuid4())
        curr_timeout = self.request_timeout * i
        try:
            requests.get(
                url,
                headers={'User-Agent': self.user_agent},
                allow_redirects=False,
                timeout=curr_timeout)
        except Exception:
            return False
        return True
    def _check_http_request(self):
        for i in range(1, 4):
            if self._make_http_request(i):
                return True
            time.sleep(2)
        return False
    def _call_service(self, action='restart'):
        service = self.services[0]
        try:
            proc = subprocess.run(
                ['service', service, action],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                timeout=self.subprocess_timeout)
        except subprocess.TimeoutExpired:
            return False
        if proc.returncode != 0:
            return False
        return True
    @classmethod
    def _collect_output(cls, cmd):
        try:
            cp = subprocess.run(
                cmd,
                stdin=subprocess.DEVNULL,
                stdout=subprocess.PIPE,
                stderr=subprocess.DEVNULL,
                timeout=cls.subprocess_timeout)
        except (OSError, subprocess.TimeoutExpired):
            return ''
        if cp.returncode != 0:
            return ''
        return cp.stdout.decode()
    @classmethod
    def _get_rpm_version(cls):
        cmd = ['rpm', '-q', '--queryformat=%{VERSION}-%{RELEASE}',
               cls.package_name]
        return cls._collect_output(cmd)
    @classmethod
    def _get_dpkg_version(cls):
        cmd = ['dpkg', '--status', cls.package_name]
        out = cls._collect_output(cmd)
        if not out:
            return
        for line in out.splitlines():
            if line.startswith("Version:"):
                return line.strip().split()[1]
    @classmethod
    def _imunify360_version(cls):
        version = cls._get_rpm_version()
        if not version:
            version = cls._get_dpkg_version()
        return version
    @classmethod
    def _get_flag_timestamp(cls):
        try:
            with open(cls.flag_path) as o:
                return int(o.read().strip())
        except Exception:
            pass
    @classmethod
    def _put_flag_timestamp(cls):
        tms = int(time.time())
        try:
            with open(cls.flag_path, 'w') as w:
                w.write("{}".format(tms))
        except Exception:
            pass
    @classmethod
    def _set_flag(cls):
        tms = cls._get_flag_timestamp()
        if not tms or time.time() - tms >= 86400:    # 24h
            cls._put_flag_timestamp()
            return True
        return False
    @classmethod
    def _remove_flag_if_exists(cls):
        if not os.path.exists(cls.flag_path):
            return False
        try:
            os.unlink(cls.flag_path)
            return True
        except Exception:
            pass
    def run(self):
        if self.is_enabled and self.is_running:
            result = self._check_http_request()
            if not result:
                done = self._set_flag()
                if done:    # File has been created or updated
                    self.logger.error(
                        '%s is inaccessible', self.services[0])
                    self._call_service('restart')
            else:
                done = self._remove_flag_if_exists()
                if done:    # File has been deleted
                    self.logger.info('%s is resumed.', self.services[0])
            return
        if self.is_enabled and not self.is_running:
            done = self._set_flag()
            if done:
                self.logger.error(
                        '%s is not running. Restart.', self.services[0])
                self._call_service('restart')
            return
        if not self.is_enabled and self.is_running:
            self.logger.warning(
                    '%s is running while being disabled. Stopping...',
                    self.services[0])
            self._call_service('stop')
            return
        self.logger.info('%s is disabled. OK', self.services[0])
    def check_wafd(self):
        """
        The wafd is expected to be running by all means
        because not only the webshield is dependent on it.
        We call small wafd utility to check wafd is responsive.
        Otherwise we'll try to restart wafd.
        """
        check_ip = "93.89.215.4"
        cmd = [self.wafd_check_binary, "-path", self.wafd_sock_path, check_ip]
        try:
            p = subprocess.run(cmd, check=True, timeout=2, capture_output=True)
        except Exception:
            # On any exception we just fall through to restart wafd
            pass
        else:
            out = p.stdout.decode("utf-8")
            if "Response" in out and check_ip in out and "status: 0" in out:
                # We got a sensible response, so wafd is running and responsible.
                # Nothing to do, return
                return
        # If we got here it means that wafd is not responsible. Trying to restart it
        cmd = ["systemctl", "restart", "imunify360-wafd"]
        try:
            subprocess.run(cmd, check=True)
        except Exception as e:
            self.logger.error("Failed to restart wafd: %s", e)
    @classmethod
    def is_standalone(cls):
        try:
            with open(cls.mode_flag_path) as f:
                mode = f.read().strip()
                if mode in ('nginx', 'apache'):
                    return False
                return True
        except Exception:
            # A file read error we treat as standalone mode
            return True
    @classmethod
    def is_apache2nginx_mode(cls):
        try:
            return Path('/var/lib/apache2nginx/state').read_text().strip() != 'off'
        except Exception:
            return False
    @staticmethod
    def is_nginx_server() -> bool:
        try:
            cfg = ConfigParser()
            cfg.read('/etc/sysconfig/imunify360/integration.conf')
            return cfg.get('web_server', 'server_type', fallback='') == 'nginx'
        except Exception:
            pass
        return False
    @staticmethod
    def is_cloudways() -> bool:
        try:
            hostname = subprocess.check_output(['hostname', '-f'], text=True).strip()
            if hostname.endswith(('.cloudwaysapps.com', '.cloudwaysstagingapps.com')):
                return True
            output = subprocess.check_output(['/usr/local/sbin/apm', 'info'], text=True)
            return 'Cloudways' in output
        except Exception:
            pass
        return False
    @classmethod
    def is_force_use_coraza(cls) -> bool:
        # This code is a ported copy-paste from Go agent.
        return cls.is_nginx_server() and cls.is_cloudways()
if __name__ == '__main__':
    w = Watchdog()
    w.check_wafd()
    if not Watchdog.is_standalone():
        sys.exit()
    if Watchdog.is_force_use_coraza():
        sys.exit()
    if Watchdog.is_apache2nginx_mode():
        sys.exit()
    w.run()