Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

The guide assumes that you have SSH access to the Squirro servers and the password to access mysql.

  1. Identify the current master

  2. For each broken slave, stop the Squirro cluster service:

    $ sudo monit stop
    Code Block
    sudo systemctl stop sqclusterd

  3. Ensure you have a running system with one cluster node.

  4. At the master:

    Code Block
    mysql> RESET MASTER;
    
    Query OK,
     
     0
     
     rows affected (0.14
     
     sec)
     
    
    
    mysql> FLUSH TABLES WITH READ LOCK;
    
    Query OK,
     
     0
     
     rows affected (0.00
     
     sec)
     
    
    
    mysql> SHOW MASTER STATUS;
    
    +------------------+----------+--------------+------------------+
    
    |
     File             | Position |
     File             | Position | Binlog_Do_DB | Binlog_Ignore_DB |
    
    +------------------+----------+--------------+------------------+
    
    | mysql-bin.000001
     |     12268 |              |                  |
     |     12268 |              |                  |
    +------------------+----------+--------------+------------------+
    
    1
     
     row
     
     in
     
     set
     
     (0.00
     
     sec)
     
    mysql>
     
     
    $ mkdir 
    
    mysql> exit
    
    $ mkdir /var/lib/squirro/cluster/mysql/<date>
    
    $ mysqldump
     
     --host=127.0.0.1
     
     --port=3306
     
     --user=cluster
     
     --password=$(PASSWORD)
     
     --all-databases
     
     --master-data
     
     --single-transaction
     
     --result-file
     
     /var/lib/squirro/cluster/mysql/<date>/dump.db
     
    
    
    $ mysql -u root -p
    
    mysql> UNLOCK TABLES;
     
    
    
    $ cd
     
     /var/lib/squirro/cluster/mysql/<date>/
    
    $ gzip dump.db
    
    $ scp dump.db.gz $(SSH_USER)@$(IP_SLAVE1):/tmp/
    
    $ scp dump.db.gz $(SSH_USER)@$(IP_SLAVE2):/tmp/
  5. On each slave:

    Code Block
    $ mkdir
     
     /var/lib/squirro/cluster/mysql/restore-<date>
    
    $ mv
     
     /tmp/dump.db.gz
     
     /var/lib/squirro/cluster/mysql/restore-<date>
    
    $ cd
     
     /var/lib/squirro/cluster/mysql/restore-<date>
    
    $ gunzip dump.db.gz
     
    
    
    mysql> stop slave;
     
    
    
    $ mysql
     
     --host=127.0.0.1
     
     --port=3306
     
     --user=cluster
     
     --password=$(PASSWORD)
     
     -e
     
     "source dump.db;"
    
    $ mysql -u root -p
    
    mysql> RESET SLAVE;
    
    mysql> change master to master_user='repl';
    
    mysql> show slave status\G;
        
        -->
     
     if
     
     master_host does
     
     not
     
     point to current master, e.g.: mysql> change master to master_host
     
     =
     
     "$(IP_MASTER)";
     
    
    mysql> CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000001', MASTER_LOG_POS=12268;
                
                --> ensure that the position matches the value
     
     from
     
     SHOW MASTER STATUS; above
    
    mysql> START SLAVE;
    
    mysql> show slave status\G;
          
          --> ensure that Slave_IO_Running:
    Yes  and   
     Yes  and   Slave_SQL_Running: Yes
     
    $ monit start sqclusterd
    $ monit
    
    
    $ sudo systemctl start sqclusterd
    $ monit summary