#!/usr/bin/python3
# pylint: disable=consider-using-with,line-too-long,broad-exception-caught,invalid-name,missing-class-docstring,missing-module-docstring
#
# A simple(ish) script to incrementally count the number of php errors written to the
# system journal in an effiecient way by saving a journal cursor between runs
#
# php-error-count [(delta|rate|total)]
#



import sys
import os
import re
import json

try:
    from systemd import journal
except Exception:
    print('ZBX_NOTSUPPORTED')
    sys.exit('Failed to import journal module')

from datetime import datetime, timedelta

# Cursor state file
STATEFILE = '/var/lib/zabbix-agent/php-error-count.cursor'

# How far back to look when we can't find the cursor
MAXAGE = timedelta(hours=12)

# Regex message excludes these are normal idle startup/shutdown messages from php-fpm (ie we don't care about them)
EXCLUDES = re.compile(r'^NOTICE: (Terminating \.\.\.|exiting, bye-bye!|systemd monitor interval set to \d+.*|ready to handle connections|fpm is running, pid \d+|using inherited socket fd=\d+, .*)$')

# Journal filters
FILTERS = (
        '_EXE=/bin/php',
        '_EXE=/sbin/php-fpm',
        'SYSLOG_IDENTIFIER=php',
        '_COMM=php',
        '_COMM=php-fpm'
)

# pylint: disable=too-few-public-methods
class Mode:
    DELTA  = 1  # Count since last run
    RATE   = 2  # Frequency (rate/sec) since last run
    TOTAL  = 3  # Total count since first run

# Check for a mode passed on the command line
MODE = Mode.DELTA
if len(sys.argv) > 1:
    MODE = getattr(Mode, sys.argv[1].upper(), Mode.DELTA)


#############################################################################################################

# Try to read the cursor from the last run
state = None
try:
    state = open(STATEFILE, mode='r', encoding='utf-8').read().strip()
except Exception:
    pass
if state:
    try:
        # New format - json encoded
        state = json.loads(state)
    except Exception:
        # Old format - bare cursor string
        state = { 'cursor':state, 'count':0 }
if not isinstance(state, dict):
    # Empty state - initialise it
    state = { 'cursor':None, 'count':0 }
if 'cursor' not in state:
    state['cursor'] = None
if 'count' not in state:
    state['count'] = 0



# Open the journal
j = journal.Reader()

# First of all find the *last* entry in the journal so we can save it's cursor to resume in the next run
# we need to do this first to avoid race conditions
j.seek_tail()
last = j.get_previous()

# Try to seek to the entry subsequent to the cursor from the statefile
first = None
if state['cursor']:
    try:
        j.seek_cursor(state['cursor'])
        first = j.get_next(1)
    except Exception:
        pass

# If we failed to find our cursor just backup by MAXAGE instead
if first is None or first['__CURSOR'] != state['cursor']:
    j.seek_realtime(datetime.now() - MAXAGE)
    first = j.get_next(1)
    j.get_previous()


# Now install filters to limit what we're examining
for i in FILTERS:
    j.add_match(i)
    j.add_disjunction()


# Finaly iterate over any matches, checking for exclusions by message regex
count = 0
for event in j:
    if not EXCLUDES.match(event['MESSAGE']):
        # This event is 'counted'
        count += 1
    if event['_BOOT_ID'] == last['_BOOT_ID'] and event['__MONOTONIC_TIMESTAMP'] > last['__MONOTONIC_TIMESTAMP']:
        # We've overrun the end of the journal (as recorded when we started reading it)
        # so update our final event to point to here - we can't just use the last event
        # read in this loop blindly because we're reading with filters
        last = event

# Calculate our output
output = 'ZBX_UNSUPPORTED'
if MODE == Mode.TOTAL:
    output = state['count'] + count
elif MODE == Mode.RATE:
    # Note that this won't be the "true" timedelta since there may not be
    # regular events in the journal, ie it's aligned to the *journal entries*
    # and not the time between invocations, it should be adequate though
    period = last['__REALTIME_TIMESTAMP'] - first['__REALTIME_TIMESTAMP']
    # Force a minimum accounting period of 1 second for sanity...
    sec    = min(1, (period.microseconds + (period.seconds + period.days*24*3600) * 10**6) / 10**6)
    if sec > 0:
        output = count/sec
    else:
        output = 0.0
    output = f"{output}0.6lf"
    #output = '%0.6lf' % output)
elif MODE == Mode.DELTA:
    output = count

# Now update our state file
state['cursor'] = last['__CURSOR']
state['count']  += count

try:
    # Need to use os.open() here because as usual the native python option
    # hasn't been thought out and you can't set a mode
    fd = os.open(STATEFILE, os.O_RDWR|os.O_TRUNC|os.O_CREAT, 0o600)
    fo = os.fdopen(fd, 'w')
    json.dump(state, fo, skipkeys=True, indent=4)
    fo.close()
except Exception as e:
    print(e)
    sys.stderr.write(f"Failed to write final cursor value '{state['cursor']}' to state file '{STATEFILE}'")

# And now why we came....
print(output)

