# shellcheck shell=bash
# only allow this script to be sourced, by cf-support.sh, and not run directly
# as it relies on functions defined in cf-support.sh

# Here we use a bash quirk which allows return to be called outside of a function in a sourced file
# and a subshell to protect from returning from the script environment.
# If executed directly calling return will result in an error and return
# non-zero so proceed to the else near the end of this file..
if ! (return 0 2>/dev/null); then
  echo "run /var/cfengine/bin/cf-support instead of this file"
  exit 1
fi

# check if we are indeed on a hub
# this script should only be packaged in hub packages but just to be sure
# let's do some checks before-hand
if [ ! -d /var/cfengine/httpd ]; then
  echo "It seems that this host is not a CFEngine Enterprise Hub. Exiting."
  exit 1
fi

if ps -efl | grep cf-serve[rd]; then
  log_cmd "cf-serverd -Fv | awk '/=== BEGIN summary of access promises ===/,/=== END summary of access promises ===/'"
else
  echo "Could not get summary of access promises. cf-serverd must be running and it was not."
fi

# Collect all the PostgreSQL log files
for f in /var/log/postgresql.log*; do
    gzip_add "$f"
done

# tmpdir defined in cf-support which sources this file so disable check
# shellcheck disable=SC2154
mkdir -p "$tmpdir$WORKDIR/httpd"
"$BINDIR"/rsync -az "$WORKDIR/httpd/logs" "$tmpdir$WORKDIR/httpd/"

file_add "$WORKDIR"/state/pg/data/postgresql.conf
file_add "$WORKDIR"/state/pg/data/pg_hba.conf

# Collect settings from the running PostgreSQL instance
"$BINDIR/psql" cfdb -c "\copy (SELECT * from pg_settings) To '$tmpdir/pg_settings.csv' With CSV DELIMITER ',' HEADER"

# Hostkeys in one DB but not in another
LASTSEEN_LMDB_FILE="/var/cfengine/state/cf_lastseen.lmdb"
BINDIR="/var/cfengine/bin"
# Temp files for different sources
lmdb_hostkeys_a_temp=$(mktemp)
lmdb_hostkeys_k_temp=$(mktemp)
lmdb_hostkeys_qi_temp=$(mktemp)
lmdb_hostkeys_qo_temp=$(mktemp)
lmdb_combined_hostkeys_temp=$(mktemp)
postgres_hostkeys_temp=$(mktemp)
# Get hostkeys from lmdb
cf-check dump "$LASTSEEN_LMDB_FILE" | grep '"SHA=' | awk -F'"' '{print $4}' > $lmdb_hostkeys_a_temp
cf-check dump "$LASTSEEN_LMDB_FILE" | grep '"kSHA=' | awk -F'"' '{print $2}' | sed 's/^k//' > $lmdb_hostkeys_k_temp
cf-check dump "$LASTSEEN_LMDB_FILE" | grep '"qiSHA=' | awk -F'"' '{print $2}' | sed 's/^qi//' > $lmdb_hostkeys_qi_temp
cf-check dump "$LASTSEEN_LMDB_FILE" | grep '"qoSHA=' | awk -F'"' '{print $2}' | sed 's/^qo//' > $lmdb_hostkeys_qo_temp
cat $lmdb_hostkeys_a_temp $lmdb_hostkeys_k_temp $lmdb_hostkeys_qi_temp $lmdb_hostkeys_qo_temp | sort | uniq > $lmdb_combined_hostkeys_temp
# Get hostkeys from PostgreSQL
"$BINDIR/psql" cfdb -t -c "SELECT hostkey FROM __hosts;" | sed 's/^ //g' | sort | uniq > $postgres_hostkeys_temp
sed -i '/^$/d' $postgres_hostkeys_temp
comm -23 "$lmdb_combined_hostkeys_temp" "$postgres_hostkeys_temp" > $tmpdir/hostkeys-in-lmdb-not-pg.txt
comm -13 "$lmdb_combined_hostkeys_temp" "$postgres_hostkeys_temp" > $tmpdir/hostkeys-in-pg-not-lmdb.txt
# Clean up temporary files
rm "$lmdb_hostkeys_a_temp" "$lmdb_hostkeys_k_temp" "$lmdb_hostkeys_qi_temp" "$lmdb_hostkeys_qo_temp" "$lmdb_combined_hostkeys_temp" "$postgres_hostkeys_temp"
echo "** Host keys in one DB but not another" >> $tmpdir/system-info.txt
echo "- $(cat $tmpdir/hostkeys-in-lmdb-not-pg.txt | wc -l) hostkeys in lastseen LMDB not in PostgreSQL __hosts [[./hostkeys-in-lmdb-not-pg.txt]]" >> $tmpdir/system-info.txt
echo "- $(cat $tmpdir/hostkeys-in-pg-not-lmdb.txt | wc -l) hostkeys in PostgreSQL __hosts not in lastseen LMDB [[./hostkeys-in-pg-not-lmdb.txt]]" >> $tmpdir/system-info.txt

# Show 20 largest tables in cfdb database
# see https://www.postgresql.org/docs/current/catalog-pg-class.html
# and https://www.postgresql.org/docs/current/functions-admin.html (pg_total_relation_size function)
# Don't include TOAST tables as their size is included in pg_total_relation_size for each table
# and is not interesting to us directly: https://www.postgresql.org/docs/current/storage-toast.html.
# Don't include indexes (relkind <> 'i') as their size is included in pg_total_relation_size as well.
"$BINDIR/psql" cfdb -c "SELECT nspname || '.' || relname AS relation,
    pg_size_pretty(pg_total_relation_size(C.oid)) AS total_size
  FROM pg_class C
  LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
  WHERE nspname NOT IN ('pg_catalog', 'information_schema')
  AND C.relkind <> 'i'
  AND nspname !~ '^pg_toast'
  ORDER BY pg_total_relation_size(C.oid) DESC
  Limit 20;" > "$tmpdir"/cfdb-biggest-tables.log
log_cmd "$BINDIR/psql cfdb -c \"SELECT count(*) as host_count from __hosts;\""
log_cmd "$BINDIR/psql cfdb -c \"SELECT count(*) as host_count_deleted from __hosts WHERE deleted IS NOT NULL;\""
log_cmd "$BINDIR/psql cfdb -c \"SELECT count(*) as host_count_reporting_last_two_weeks from __hosts WHERE lastreporttimestamp >= NOW() - INTERVAL '2 weeks';\""
log_cmd "$BINDIR/psql cfdb -c \"SELECT count(*) as host_count_reporting_last_month from __hosts WHERE lastreporttimestamp >= NOW() - INTERVAL '1 month';\""
log_cmd "$BINDIR/psql cfdb -c \"select name, details, units, avg(value), count(value) from diagnostics group by name, details, units;\""
log_cmd "$BINDIR/psql cfdb -c \"select host, status, type, count(*) from __status group by host, status, type order by host, status;\""
log_cmd "$BINDIR/psql cfdb -c \"select count(*) from pg_stat_activity;\""
log_cmd "$BINDIR/psql cfdb -c \"select max(now() - xact_start) from pg_stat_activity where state in ('idle in transaction', 'active');\""
"$BINDIR/psql" cfdb -c "SELECT now(),* from pg_stat_activity;" > "$tmpdir"/psql-current-queries.log
"$BINDIR/psql" cfdb -c "SELECT
    age(clock_timestamp(), query_start),
    usename,
    datname,
    query
FROM pg_stat_activity
WHERE
    state != 'idle'
AND query NOT ILIKE '%pg_stat_activity%'
ORDER BY age desc;" > "$tmpdir"/psql-age-of-nonidle-queries.log

# This can help you see queries that are not currently progressing because of other parts of the system (e.g., lock contention)
"$BINDIR/psql" cfdb -c "SELECT
    usename,
    datname,
    query,
    wait_event_type,
    wait_event
FROM pg_stat_activity
WHERE
    state != 'idle'
AND wait_event != ''" > "$tmpdir"/psql-waiting-not-idle.log

"$BINDIR/psql" cfdb -x -c "SELECT * FROM pg_stat_database" > "$tmpdir"/psql-db-statistics.log

# Table statistics; The numbers in these columns can help you evaluate how your
# indexes are performing and whether they're being effectively used by the
# queries you're running. If you find that your tables have many sequential
# scans, you would probably benefit from creating additional indexes that can be
# used by your most common queries.
"$BINDIR/psql" cfdb -x -c "SELECT * FROM pg_stat_all_tables" > "$tmpdir"/psql-table-statisctics.log

# This can be valuable information that can help you evaluate whether you would
# benefit from adding RAM to your database cluster so that your most common
# queries can be effectively cached.
"$BINDIR/psql" cfdb -c "SELECT
    datname,
    100 * blks_hit / (blks_hit + blks_read) as cache_hit_ratio
FROM
    pg_stat_database
WHERE
    (blks_hit + blks_read) > 0;" > "$tmpdir"/psql-database-cache-hit-ratio.log


# Get row count of each table https://stackoverflow.com/a/28709658
"$BINDIR/psql" cfdb -c "CREATE FUNCTION cf_support_rowcount_all(schema_name text default 'public')
  RETURNS table(table_name text, cnt bigint) as
\$\$
declare
 table_name text;
begin
  for table_name in SELECT c.relname FROM pg_class c
    JOIN pg_namespace s ON (c.relnamespace=s.oid)
    WHERE c.relkind = 'r' AND s.nspname=schema_name
  LOOP
    RETURN QUERY EXECUTE format('select cast(%L as text),count(*) from %I.%I',
       table_name, schema_name, table_name);
  END LOOP;
end
\$\$ language plpgsql;
select cf_support_rowcount_all();
DROP FUNCTION cf_support_rowcount_all;" > "$tmpdir"/psql-current-rows-per-table.log

# non_interactive defined in cf-support which sources this file so disable check
# shellcheck disable=SC2154
if [ "$non_interactive" -eq 0 ]; then
  read -r -p "Include masterfiles in support submission [Y/n]: " response
fi
response=${response:-y}
case $response in
  [yY][eE][sS]|[yY])
    mkdir -p "$tmpdir$WORKDIR"
    "$BINDIR/rsync" -az "$WORKDIR/masterfiles" "$tmpdir$WORKDIR"
    echo "Added directory $WORKDIR/masterfiles"
    ;;
  *)
    echo "Not including masterfiles in support submission"
    ;;
esac
