#!/bin/sh
#
# cfengine3
#
#
# Created by Nakarin Phooripoom on 22/6/2011.
# Copyright 2021 Northern.tech AS.
#
### BEGIN INIT INFO
# Provides:          cfengine3
# Required-Start:    $local_fs $remote_fs $network $time
# Required-Stop:     $local_fs $remote_fs $network
# Default-Start:     2 3 4 5
# Default-Stop:      0 1 6
# Short-Description: GNU configuration engine
# Description:       Tool for configuring and maintaining network machines
### END INIT INFO
#
# chkconfig: 2345 60 40
# description: Starts the cfengine daemons for remote and periodic \
#    execution of cfengine and for environment monitoring.
# set -e

#
# Detect and replace non-POSIX shell
#
try_exec() {
    type "$1" > /dev/null 2>&1 && exec "$@"
}

broken_posix_shell()
{
    unset foo
    local foo=1
    test "$foo" != "1"
}

if broken_posix_shell >/dev/null 2>&1; then
    try_exec /usr/xpg4/bin/sh "$0" "$@"
    echo "No compatible shell script interpreter found."
    echo "Please find a POSIX shell for your system."
    exit 42
fi

##### start: Defined variables and conditios #####

PREFIX=${CFTEST_PREFIX:-/var/cfengine}
INPUTDIR=default
if [ "$INPUTDIR" = "default" ]; then
    INPUTDIR="$PREFIX/inputs"
fi

CFEXECD=${CFTEST_CFEXECD:-$PREFIX/bin/cf-execd}
CFSERVD=${CFTEST_CFSERVD:-$PREFIX/bin/cf-serverd}
CFMOND=${CFTEST_CFMOND:-$PREFIX/bin/cf-monitord}
CFAGENT=${CFTEST_CFAGENT:-$PREFIX/bin/cf-agent}

PIDFILE_TEMPLATE=$PREFIX/%s.pid

STOP_ATTEMPTS=6
# From which point we start using SIGKILL.
KILL_THRESHOLD=4
FORCE_KILL=0

CFENTERPRISE_INITD=$PREFIX/bin/cfengine3-nova-hub-init-d.sh

PATH=/sbin:/bin:/usr/sbin:/usr/bin:$PREFIX/bin

# ensure that we use RPATH/loader entries from our binaries and not from the environment
unset LIBPATH		# AIX < 5.3
unset LD_LIBRARY_PATH

# Has the package been 'removed' but not purged?
test -f $CFEXECD || exit 0

# default is no OS
SUSE=0
REDHAT=0
DEBIAN=0

# SuSE
if [ -d /var/lock/subsys ] && [ -f /usr/bin/zypper ]; then
    SUSE=1
fi

# RedHat4/RedHat5/CentOS/Fedora
if [ -f /usr/bin/yum ] || [ -f /usr/sbin/up2date ]; then
    if [ -d /var/lock/subsys ]; then
        REDHAT=1
    fi
fi

# Debian/Ubuntu
if [ -f /etc/debian_version ] && [ -f /usr/bin/apt-get ]; then
    DEBIAN=1
    if [ -d /run/lock ]; then
        LOCKDIR=/run/lock
    else
        LOCKDIR=/var/lock
    fi
fi

if [ -z "$LOCKDIR" ]; then
    LOCKDIR=/var/lock
fi

# default control file
if [ "$DEBIAN" = "1" ]; then
    DEFAULT=/etc/default/cfengine3
    INIT_FUNCTIONS=/lib/lsb/init-functions
    if [ -e "$INIT_FUNCTIONS" ]; then
        . "$INIT_FUNCTIONS"
    fi
else
    DEFAULT=/etc/sysconfig/cfengine3
fi

if [ -f $DEFAULT ]; then
    . $DEFAULT
else
    RUN_CF_EXECD=1
    RUN_CF_SERVERD=1
    RUN_CF_MONITORD=1
fi

# Check /sbin/startproc for SUSE
if [ -x /sbin/startproc ] && [ -f /etc/rc.status ]; then
    . /etc/rc.status
    rc_reset
    PROC=1
else
    PROC=0
fi

# Source function library. (Redhat/Centos/Fedora)
if [ -f /usr/bin/yum ] && [ -f /etc/rc.d/init.d/functions ]; then
    . /etc/rc.d/init.d/functions
    FUNC=1
else
    FUNC=0
fi

# Check something for Debian/Ubuntu
if [ -x /sbin/start-stop-daemon ]; then
    SSD=1
else
    SSD=0
fi

CURRENT_PS_UID=__none__
INSIDE_CONTAINER=-1

if ps --help 2>/dev/null | egrep -e '--cols\b' > /dev/null || ps --help output 2>/dev/null | egrep -e '--cols\b' > /dev/null; then
    # There is a bug in SUSE which means that ps output will be truncated even
    # when piped to grep, if the terminal size is small. However using --cols
    # will override it. NOTE: On Suse 12 and 15, `ps` mentions `--cols` only
    # in `ps --help output` only.
    PS_OPTIONS="--cols 200"
else
    PS_OPTIONS=
fi

OS="$(uname)"

##### end: Defined variables and conditions #####

##### start: Defined functions #####

# Takes a process string to match and returns a list of PIDs.
# Argument can optionally be preceded by "--ignore-user" to disable matching of
# the calling user's UID.
cf_get_matching_pids()
{
    # "ps" is an extremely incompatible tool, with differing options and output
    # format almost everywhere. However, some things we can rely on: "ps -ef"
    # seems to be supported everywhere, and we can usually rely on UID being the
    # first field, PID and PPID the second and third, and command line the last
    # field. In between there is a mess, and we stay away from it. We use this
    # to match against the UID of the calling user.
    #
    # We also do some much more non-portable operations to try to avoid
    # processes inside containers.

    if [ "$CURRENT_PS_UID" = "__none__" ]; then
        CURRENT_PS_UID="$(ps $PS_OPTIONS -ef |egrep "^ *[^ ]+ +$$ "|awk -F' ' '{print $1}')"
    fi

    if [ $INSIDE_CONTAINER = -1 ]; then
        # Are we inside a container?
        if [ -f /proc/self/cgroup ] && sed 's_/\(-\|system\|user\).slice__;s_/user-[0-9]*.slice__;s_/session-[0-9]*.scope__;s_/cfengine3.service__' /proc/self/cgroup | egrep '[0-9]+:devices:/[^:]' > /dev/null; then
            INSIDE_CONTAINER=1
        else
            INSIDE_CONTAINER=0
        fi
    fi

    local ignore_user=0
    if [ "$1" = "--ignore-user" ]; then
        ignore_user=1
        shift
    fi

    case "$OS" in
        SunOS)
            local user_flag=-U
            local user_string="$CURRENT_PS_UID"
            if [ $ignore_user = 1 ]; then
                user_flag=
                user_string=
            fi

            if [ -x /usr/bin/zonename ];then
                /usr/bin/pgrep -f -z "$(/usr/bin/zonename)" $user_flag "$user_string" "$1"
            else
                /usr/bin/pgrep -f $user_flag "$user_string" "$1"
            fi
            ;;
        *)
            local user_string="$CURRENT_PS_UID"
            if [ $ignore_user = 1 ]; then
                # Match any user.
                user_string="[^ ]+"
            fi

            local pids="$(ps $PS_OPTIONS -ef|grep -v grep|egrep "^ *$user_string .*$1"|awk -F' ' '{print $2}')"

            for pid in $pids; do
                # If the last character of the cgroup is not the root directory,
                # then this is a container process.
                if [ -f /proc/$pid/cgroup ] && sed 's_/\(-\|system\|user\).slice__;s_/user-[0-9]*.slice__;s_/session-[0-9]*.scope__;s_/cfengine3.service__;s_/cf-execd.service__;s_/cf-serverd.service__;s_/cf-monitord.service__' /proc/$pid/cgroup | egrep '[0-9]+:devices:/[^:]' > /dev/null; then
                    if [ $INSIDE_CONTAINER = 1 ]; then
                        echo $pid
                    fi
                else
                    if [ $INSIDE_CONTAINER = 0 ]; then
                        echo $pid
                    fi
                fi
            done
            ;;
    esac
}

cf_pidfile_pid()
{
    local pidfile="$1"
    if egrep -v '^ *[0-9]+ *$' "$pidfile" > /dev/null 2>&1; then
        echo "Malformed PID file: $pidfile." 1>&2
    elif [ -f "$pidfile" ]; then
        cat "$pidfile"
    fi
}

cf_get_daemon_pid()
{
    local daemon="$1"
    local base_daemon="$(basename "$daemon")"
    local pidfile="$(printf "$PIDFILE_TEMPLATE" "$base_daemon")"
    local pidfile_pid
    local matching_pids

    # Try to get the PID of the daemon process from the pidfile and make sure it
    # is valid.
    pidfile_pid="$(cf_pidfile_pid "$pidfile")"
    matching_pids="$(cf_get_matching_pids "$base_daemon")"
    if [ -n "$pidfile_pid" ]; then
        if echo "$matching_pids" | grep "^$pidfile_pid\$" >/dev/null; then
            # A valid PID from the pidfile, let's report it.
            echo "$pidfile_pid"
            return
        else
            # PID from the pid file not in the matching pids -> must be a
            # leftover. Let's remove it.
            rm -f "$pidfile"
        fi;
    fi;

    # No or invalid pidfile, let's report the matching PIDs.
    echo "$matching_pids"
}

cf_start_daemon()
{
    local daemon="$1"
    shift
    local base_daemon="$(basename "$daemon")"

    echo -n "Starting $base_daemon..."

    # Redhat/Centos/Fedora
    if [ "$REDHAT" = "1" ] && [ "$FUNC" = "1" ]; then
        daemon "$daemon" "$@"
        cf_touch "/var/lock/subsys/$base_daemon"
        cf_touch "/var/lock/subsys/cfengine3"
    elif [ "$REDHAT" = "1" ] && [ "$FUNC" = "0" ]; then
        "$daemon" "$@"
        cf_touch "/var/lock/subsys/$base_daemon"
        cf_touch "/var/lock/subsys/cfengine3"

    # SUSE
    elif [ "$SUSE" = "1" ] && [ "$PROC" = "1" ]; then
        /sbin/startproc "$daemon" "$@"
        rc_status -v
        cf_touch "/var/run/$base_daemon.pid"
    elif [ "$SUSE" = "1" ] && [ "$PROC" = "0" ]; then
        "$daemon" "$@"
        cf_touch "/var/run/$base_daemon.pid"

    # Debian/Ubuntu
    elif [ "$DEBIAN" = "1" ] && [ "$SSD" = "1" ]; then
        /sbin/start-stop-daemon --start --pidfile "$(printf "$PIDFILE_TEMPLATE" "$base_daemon")" --exec "$daemon" -- "$@"
        cf_touch "$LOCKDIR/$base_daemon"
    elif [ "$DEBIAN" = "1" ] && [ "$SSD" = "0" ]; then
        "$daemon" "$@"
        cf_touch "$LOCKDIR/$base_daemon"

    # All others
    else
        "$daemon" "$@"
    fi

    echo
}

# Return 0 if any daemon was attempted killed, 1 otherwise.
cf_stop_daemon()
{
    local daemon="$1"
    local base_daemon="$(basename "$daemon")"
    local pidfile="$(printf "$PIDFILE_TEMPLATE" "$base_daemon")"
    local pids
    local ret=1

    pids="$(cf_get_daemon_pid $daemon)"

    if [ -z "$pids" ]; then
        return 1
    fi

    echo "Shutting down $base_daemon..."

    local iter=1
    local signal=TERM
    while [ $iter -le $STOP_ATTEMPTS ] && [ -n "$pids" ]; do
        ret=0

        if [ $iter -ge $KILL_THRESHOLD ] || [ $FORCE_KILL = 1 ]; then
            signal=KILL
        fi

        if [ $iter -ge 2 ]; then
            # Give the daemon some time to do its cleanup and actually
            # terminate. Then check its state again.
            sleep 2
            pids="$(cf_get_daemon_pid $daemon)"
        fi

        if [ "$signal" != "KILL" ] && [ -f "$pidfile" ] && [ "$DEBIAN" = "1" ] && [ "$SSD" = "1" ]; then
            # Debian/Ubuntu with start-stop-daemon.
            /sbin/start-stop-daemon -o --stop --pidfile "$pidfile" --retry 5 --name "$base_daemon" >/dev/null 2>&1
        else
            # All others, or if there is no pidfile, or if using SIGKILL.
            for pid in $pids; do
                kill -$signal "$pid" >/dev/null 2>&1
                if [ "$signal" = "KILL" ]; then
                    # If using SIGKILL, the daemons won't be able to do this themselves.
                    rm -f "$pidfile"
                fi
            done
        fi

        pids="$(cf_get_daemon_pid $daemon)"
        iter=$(($iter+1))
    done

    # Redhat/Centos/Fedora
    if [ "$REDHAT" = "1" ] && [ "$FUNC" = "1" ]; then
        if [ -f "/var/lock/subsys/$base_daemon" ]; then
            cf_rm -f "/var/lock/subsys/$base_daemon"
            cf_rm -f "/var/lock/subsys/cfengine3"
        fi
    elif [ "$REDHAT" = "1" ] && [ "$FUNC" = "0" ]; then
        if [ -f "/var/lock/subsys/$base_daemon" ]; then
            cf_rm -f "/var/lock/subsys/$base_daemon"
            cf_rm -f "/var/lock/subsys/cfengine3"
        fi

    # SUSE
    elif [ "$SUSE" = "1" ] && [ "$PROC" = "1" ]; then
        rc_status -v
        if [ -f "/var/run/$base_daemon.pid" ]; then
            cf_rm "/var/run/$base_daemon.pid"
        fi
    elif [ "$SUSE" = "1" ] && [ "$PROC" = "0" ]; then
        if [ -f "/var/run/$base_daemon.pid" ]; then
            cf_rm "/var/run/$base_daemon.pid"
        fi

    # Debian/Ubuntu
    elif [ "$DEBIAN" = "1" ] && [ "$SSD" = "1" ]; then
        if [ -f "$LOCKDIR/$base_daemon" ]; then
            cf_rm "$LOCKDIR/$base_daemon"
        fi
    elif [ "$DEBIAN" = "1" ] && [ "$SSD" = "0" ]; then
        if [ -f "$LOCKDIR/$base_daemon" ]; then
            cf_rm "$LOCKDIR/$base_daemon"
        fi
    fi

    return $ret
}

# Returns 0 if agents were found, 1 if not.
cf_stop_agents()
{
    local base_agent="$(basename "$CFAGENT")"
    local not_found=1
    local ret=1
    local signal=TERM
    local iter=1

    while [ $iter -le $STOP_ATTEMPTS ]; do
        if [ $iter -ge $KILL_THRESHOLD ] || [ $FORCE_KILL = 1 ]; then
            signal=KILL
        fi

        if [ $iter -gt 2 ]; then
            # If we failed at first, give things time to settle.
            sleep 2
        fi

        not_found=1
        for pid in $(cf_get_matching_pids "$CFAGENT"); do
            not_found=0
            ret=0
            kill -$signal $pid
        done

        if [ $not_found = 1 ]; then
            break
        fi

        iter=$(($iter+1))
    done

    return $ret
}

cf_status_daemon()
{
    local daemon="$1"
    local base_daemon="$(basename "$daemon")"
    local pidfile="$(printf "$PIDFILE_TEMPLATE" "$base_daemon")"
    local daemon_status="$(cf_pidfile_pid "$pidfile")"
    local pids="$(cf_get_matching_pids "$base_daemon")"

    if [ -f "$pidfile" ] && ! echo "$pids" | grep "$daemon_status" > /dev/null; then
        echo "Warning: PID file '$pidfile' does not contain the right PID ('$pids' != '$daemon_status'). Should restart CFEngine."
    fi

    # https is expected to have multiple pids, skip detection of multiple
    # pids to avoid false alarms

    if [ "${base_daemon}" != "httpd" ] && [ "${base_daemon}" != "cf-hub" ]; then
        # Lack of quotes around $pids is important to eliminate newlines.
        if [ -n "$(echo $pids | grep '[^0-9]')" ]; then
            echo "Warning: Multiple $base_daemon processes present. Should restart CFEngine."
        fi
    fi

    if [ -n "$pids" ]; then
        if [ "$REDHAT" = "1" ] && [ "$FUNC" = "1" ]; then
            status "$daemon"
        elif [ "$SUSE" = "1" ] && [ "$PROC" = "1" ]; then
            printf 'Checking for %s ' "$base_daemon"
            checkproc "$daemon"
            rc_status -v
        else
            # The additional "echo" is to eliminate newlines in $pids.
            echo "$base_daemon is running..."
        fi
    else
        echo "$base_daemon is not running"
        return 3
    fi
}

cf_start_core_daemons()
{
    # start cf-execd
    if [ "$RUN_CF_EXECD" = "1" ]; then
        cf_start_daemon "$CFEXECD"
    fi

    # start cf-serverd
    if [ "$RUN_CF_SERVERD" = "1" ]; then
        cf_start_daemon "$CFSERVD"
    fi

    # start cf-monitord
    if [ "$RUN_CF_MONITORD" = "1" ]; then
        cf_start_daemon "$CFMOND"
    fi
}

cf_stop_core_daemons()
{
    # Stopping the CFEngine daemons is sometimes extremely tricky, because the
    # killing of a daemon may coincide with the spawning of a new agent, which
    # will spawn a new daemon and bring us right back to the start. So we always
    # kill both, making sure they are dead, and once we have, we go back and do
    # one more complete round of process detection to make *sure* that nothing
    # is alive. As long as something is alive we will always require another
    # round, up to STOP_ATTEMPTS times.

    local iter=1
    while [ $iter -le $STOP_ATTEMPTS ]; do
        local any_process_present=0

        if [ $iter -ge 2 ]; then
            # Let's wait a bit before the second attempt.
            sleep 2
        fi

        if [ $iter -ge $KILL_THRESHOLD ]; then
            # The stopping functions internally use SIGKILL if they feel the
            # need, but only after some time, so we still force it here, in case
            # we have a fork()-bomb-like situation where each kill results in a
            # new daemon or agent.
            FORCE_KILL=1
        fi

        # First stop any agents running because they may start the daemons we
        # will be stopping behind our back.
        cf_stop_agents && any_process_present=1

        # shutdown cf-execd
        if [ "$RUN_CF_EXECD" = "1" ]; then
            cf_stop_daemon "$CFEXECD" && any_process_present=1
        fi

        # shutdown cf-serverd
        if [ "$RUN_CF_SERVERD" = "1" ]; then
            cf_stop_daemon "$CFSERVD" && any_process_present=1
        fi

        # shutdown cf-monitord
        if [ "$RUN_CF_MONITORD" = "1" ]; then
            cf_stop_daemon "$CFMOND" && any_process_present=1
        fi

        # In case agents were spawned just as we were trying to kill the
        # daemons, try killing those too.
        cf_stop_agents && any_process_present=1

        if [ $any_process_present = 0 ]; then
            break
        fi

        iter=$(($iter+1))
    done
}

cf_status_core_daemons()
{
    local cf_status_core_daemons_exit=0
    # status cf-execd
    if [ "$RUN_CF_EXECD" = "1" ]; then
        cf_status_daemon "$CFEXECD"
        if [ "$?" = "3" ]; then
            cf_status_core_daemons_exit=3
        fi
    fi

    # status cf-serverd
    if [ "$RUN_CF_SERVERD" = "1" ]; then
        cf_status_daemon "$CFSERVD"
        if [ "$?" = "3" ]; then
            cf_status_core_daemons_exit=3
        fi
    fi

    # status cf-monitord
    if [ "$RUN_CF_MONITORD" = "1" ]; then
        cf_status_daemon "$CFMOND"
        if [ "$?" = "3" ]; then
            cf_status_core_daemons_exit=3
        fi
    fi

    # Return the exit code
    return $cf_status_core_daemons_exit
}

cf_touch()
{
    # Not during testing.
    if [ -z "$CFTEST_PREFIX" ]; then
        touch "$@"
    fi
}

cf_rm()
{
    # Not during testing.
    if [ -z "$CFTEST_PREFIX" ]; then
        rm "$@"
    fi
}

##### end: Defined functions #####

##### Source enterprise script, if present #####

if [ -f "$CFENTERPRISE_INITD" ]; then
    ENTERPRISE_SERVER=1
    CALLED_FROM_INITSCRIPT="1" . "$CFENTERPRISE_INITD"
else
    ENTERPRISE_SERVER=0
    alias enterprise_start=true
    alias enterprise_stop=true
    alias enterprise_status=true
fi

### start scripting here ###

case "$1" in
    start)
        if ! [ -f $INPUTDIR/promises.cf ]; then
            # Prevent the enterprise scripts from launching any non-web
            # related stuff when we are not bootstrapped.
            export CF_ONLY_WEB_SERVICES=1
        fi

        # Start Enterprise services.
        enterprise_start

        if [ "$CF_ONLY_WEB_SERVICES" = "1" ]; then

            if [ "$ENTERPRISE_SERVER" = "1" ]; then
                # Enterprise servers automatically run "start" as part of package installation,
                # at which point they don't have policy in inputs/, because they are not bootstrapped.
                # This is not an error, like for community or enterprise clients (case below).
                exit 0
            fi

            # On hosts we do not start any process when there is no
            # policy, so indicate this as an error.
            echo "No policy found in $INPUTDIR/promises.cf, not starting core daemons" >&2
            exit 1
        fi

        cf_start_core_daemons

        exit 0
        ;;
    stop)
        if [ "$CF_ONLY_WEB_SERVICES" != "1" ]; then
            cf_stop_core_daemons
        fi

        # Stop Enterprise services.
        enterprise_stop

        exit 0
        ;;
    status)
        if [ "$CF_ONLY_WEB_SERVICES" = "1" ]; then
            exit 0
        fi

        # Status Enterprise services.
        enterprise_status

        cf_status_core_daemons

        exit $?
        ;;
    restart|reload|force-reload)
        $0 stop
        $0 start
        ;;
    *)
        echo "Usage: $0 {start|stop|status|restart|reload|force-reload}" >&2
        exit 1
        ;;
esac
