# I’m managing an infrastructure with a number of databases who
# (for codified reasons that I cannot influence) suffer from this situation often.
# So, I’ve written a cron script to manage the situation.
# Does anyone see any foreseeable issues with this logic (see below)?

# —————————————– SCRIPT —————————————–
#!/bin/bash
## Tool to unstick MySQL Replicators.
## Set to run from cron once a minute.

# */1 * * * * /usr/local/bin/whipSlave.mysql.sh > /dev/null 2>&1

# Last updated: MM/DD/YYYY

COMMANDS=”mysql grep awk logger”
export PATH=’/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin’
for i in $COMMANDS
do
X=`echo $i | tr ‘[a-z]’ ‘[A-Z]’`
export $X=`type -p $i`
done

# Define variables
USERNAME=dbuser
PASSWORD=password

# Define Functions
## Obtain MwSQL slave server status
function SLAVE()
{
STATUS=`$MYSQL -u $USERNAME -p$PASSWORD -e \
“SHOW SLAVE STATUS \G” |
$GREP Seconds_Behind_Master |
$AWK ‘{print $2}’`
}

## Skip errors
function UNSTICK()
{
$MYSQL -u $USERNAME -p$PASSWORD -e \
“STOP SLAVE; SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1; START SLAVE;”
sleep 5
# Check everything again
CHECK
}

## Decide what to do…
function CHECK()
{
# Obtain status
SLAVE
if [ $STATUS = NULL ]
then
# I think the replicator is broken
echo “MySQL Slave database is not replicating. Fixing…” | $LOGGER
UNSTICK
else
# Everything should be fine
echo “MySQL Slave is $STATUS seconds behind its Master.” | $LOGGER
fi
}

## Are we running?
function ALIVE()
{
UP=`$MYSQL -u $USERNAME -p$PASSWORD -e \
“SHOW SLAVE STATUS \G” |
$GREP Slave_IO_Running |
$AWK ‘{print $2}’`

if [ $UP = Yes ]
then
# Let’s check if everything is good, then…
CHECK
else
# Uh oh…let’s not do anything.
echo “MySQL Slave IO is not running!” | $LOGGER
exit 1
fi
}

# How is everything?
ALIVE

#EoF
exit 0