#!/bin/bash
#
# Collect aggregate statistics about MySQL threads
#

function usage {
	local MESSAGE=$1
	local NL=$'\n'
	cat <<-EOD
		${MESSAGE:+${NL}${MESSAGE}${NL}}
		SYNOPSIS

		        $(basename $0)  <Mode> [<Cmd> [<State> [<Query> [<DB> [<User> [<Name> [<Type> [<Time>]]]]]]]]

		ARGUMENTS

		        <Mode> := SUM | MIN | MAX | AVG | COUNT
			              How to aggregate the result SUM/MIN/MAX/AVG apply to PROCESSLIST_TIME, while COUNT simply counts
		                  total matching threads

		        <Cmd> := <Regex> [Default: .*]
	                     A regex to match against the Command (performance_schema.threads.PROCESSLIST_COMMAND) column

		        <State> := <Regex> [Default: .*]
	                     A regex to match against the State (performance_schema.threads.PROCESSLIST_STATE) column


		        <Query> := <Regex> [Default: .*]
	                     A regex to match against the Info (performance_schema.threads.PROCESSLIST_INFO) column
	                     This colum typically returns the currently executung SQL for FORGROUND threads

		        <DB> := <Regex> [Default: .*]
	                     A regex to match against the DB (performance_schema.threads.PROCESSLIST_DB) column
	                     This column is the *native* db the thread is executing from (eg use xxxxxx) not necessarily
	                     the DB the the query is operating against

		        <User> := <Regex> [Default: .*]
	                     A regex to match against the User (performance_schema.threads.PROCESSLIST_USER) column
	                     This column hold the authenticated user for the thread for FOREGROUND threads

		        <Name> := <Regex> [Default: .*]
	                     A regex to match against the Host (performance_schema.threads.NAME) column
	                     This column holds the name corresponding to the thread's purpose, the default filter
	                     yields only threads which would be reported by SHOW PROCESSLIST

				<Type> := <Regex> [Default: FOREGROUND]
	                     A regex to match against the Type (performance_schema.threads.TYPE) column
	                     This column breaks the threads out into major groups, typically FOREGROUND and BACKGROUND
	                     Only FOREGROUND threads are reported by SHOW PROCESSLIST

				<Time> := <op><int> [Default: >0]
	                     A regex to match against the Time (performance_schema.threads.PROCESSLIST_TIME) column
	                     Note that NULL will be treated as '0' (eg all BACKGROUND threads) and only FOREGROUND
	                     threads typically report a time

                 <Op>  := < | <= | >= | > | =

                 <Regex> := [!]<MySQL-Compatible-REGEXP>

	EOD

	exit 1

}


# (Try to) copy arguments into named variables, applying defaults for unset values
# Zabbix only supports $1..$9
MODE=${1}
PROCESSLIST_COMMAND=${2}
PROCESSLIST_STATE=${3}
PROCESSLIST_INFO=${4}
PROCESSLIST_DB=${5}
# TODO: Decompose user@host forms?
PROCESSLIST_USER=${6}
NAME=${7}
TYPE=${8:-^FOREGROUND$}
PROCESSLIST_TIME=${9}

# Case insensitive string matches
shopt -s nocasematch

# SQL Criteria
WHERE=()

if [ -z "${MODE}" ]; then
	usage "ERROR: You must specify at least a MODE argument, valid options are SUM, MIN, MAX, AVG, COUNT"
elif [[ ! "${MODE}" =~ ^SUM|MIN|MAX|AVG|COUNT$ ]]; then
	usage "ERROR: Invalid mode '${MODE}', valid options are SUM, MIN, MAX, AVG, COUNT"
fi

# Validate and clean the REGEXPs
for x in PROCESSLIST_COMMAND PROCESSLIST_STATE PROCESSLIST_INFO PROCESSLIST_DB PROCESSLIST_USER PROCESSLIST_HOST NAME TYPE ROLE; do
	# Check validity of all regex columns
	VAL="${!x}"

	# Skip any empty values
	[ -n "$VAL" ] || continue

	# Handle negation (leading !)
	OP='REGEXP'
	if [ "${VAL:0:1}" == '!' ]; then
		OP='NOT REGEXP'
		VAL="${VAL:1}"
	fi

	# Check that the regexp can't escape from the mysql string
	if [[ "$VAL" =~ [\\\\\'] ]]; then
		usage "ERROR: Regex '$VAL' for column $x contains ' or \\ which is not permitted"
	fi

	# Ensure the REGEXP is anchored
	[ "${VAL:0:1}"  == '^' ] || VAL="^${VAL}"
	[ "${VAL:(-1)}" == '$' ] || VAL="${VAL}\$"

	# Shove the full expression into the WHERE criteria array
	WHERE+=( "$x $OP '${VAL}'" )
done

# And the TIME constraint
if [ -n "${PROCESSLIST_TIME}" ]; then
	# Set default cmp op if missing
	[[ "${PROCESSLIST_TIME}" =~ ^[0-9]+$ ]] && PROCESSLIST_TIME=">=${PROCESSLIST_TIME}"
	if [[ ! "${PROCESSLIST_TIME}" =~ ^[\>\<]?=?[0-9]+$ ]]; then
		usage "ERROR: Invalid TIME criteria '$TIME', must be of the form (>|>=|<|<=|=){integer}"
	fi
	OP=${PROCESSLIST_TIME%%[0-9]*}
	VAL=${PROCESSLIST_TIME##*[!0-9]}
	WHERE+=( "PROCESSLIST_TIME $OP $VAL" )
fi

CRITERIA=
[ ${#WHERE[@]} -gt 0 ] && printf -v CRITERIA ' AND %s ' "${WHERE[@]}"

if [ "$MODE" == "COUNT" ]; then
	COLUMN="${MODE}(*) as Result"
else
	COLUMN="IFNULL(${MODE}(PROCESSLIST_TIME), 0) AS Result"
fi

# Note that we exclude our own thread (by CONNECTION_ID()) from the results!
RESULT=$(mysql -NB -e "SELECT ${COLUMN} FROM performance_schema.threads WHERE IFNULL(PROCESSLIST_ID,-1) <> CONNECTION_ID() ${CRITERIA}")
[ -n "$RESULT" ] || RESULT=ZBX_NOTSUPPORTED
echo $RESULT





