Tuesday, March 7, 2023

MYSQL:::: Primary node in missing state and other two nodes as in Quorum as one new node as primary.Steps to FIX.

 Scenario: Node 3 is missing and looks in R\O mode from R\W mode and fixed this error after one day.


Solution:


When i try to join or remove instance node 3 or add instance node 3 to check,but it saying mysql gone away error. When i restart mysql services in the issue node 3. then the cluster status showing all good. 


================================================================================

Steps checked:


MySQL  DBCLUSTERNODEIP_3:3306 ssl  JS > var cluster =dba.getCluster();

WARNING: Cluster has no quorum and cannot process write transactions: Group has no quorum

 MySQL  DBCLUSTERNODEIP_3:3306 ssl  JS > cluster.status();

{

    "clusterName": "uatcluster",

    "defaultReplicaSet": {

        "name": "default",

        "primary": "DBCLUSTERNODEIP_3:3306",

        "ssl": "REQUIRED",

        "status": "NO_QUORUM",

        "statusText": "Cluster has no quorum as visible from 'DBCLUSTERNODEIP_3:3306' and cannot process write transactions. 2 members are not active",

        "topology": {

            "DBCLUSTERNODEIP_1:3306": {

                "address": "DBCLUSTERNODEIP_1:3306",

                "mode": "R/O",

                "readReplicas": {},

                "role": "HA",

                "status": "UNREACHABLE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_2:3306": {

                "address": "DBCLUSTERNODEIP_2:3306",

                "mode": "R/O",

                "readReplicas": {},

                "role": "HA",

                "status": "UNREACHABLE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_3:3306": {

                "address": "DBCLUSTERNODEIP_3:3306",

                "mode": "R/O",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            }

        },

        "topologyMode": "Single-Primary"

    },

    "groupInformationSourceMember": "DBCLUSTERNODEIP_3:3306"

}

 MySQL  DBCLUSTERNODEIP_3:3306 ssl  JS >




mysql> SELECT @@GLOBAL.GTID_EXECUTED \G

*************************** 1. row ***************************

@@GLOBAL.GTID_EXECUTED: 175f7950-4ac8-11ed-893f-00505684085f:1-2956230:3303697,

56be8ffc-c8b3-11ea-93d3-00505684f7cd:1-37847672,

d1552329-9fff-11e9-aa0b-00505694881c:1-77

1 row in set (0.00 sec)



 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > var cluster=dba.getCluster()

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > cluster.status();

{

    "clusterName": "uatcluster",

    "defaultReplicaSet": {

        "name": "default",

        "primary": "DBCLUSTERNODEIP_2:3306",

        "ssl": "REQUIRED",

        "status": "OK_NO_TOLERANCE",

        "statusText": "Cluster is NOT tolerant to any failures. 1 member is not active",

        "topology": {

            "DBCLUSTERNODEIP_1:3306": {

                "address": "DBCLUSTERNODEIP_1:3306",

                "mode": "R/O",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_2:3306": {

                "address": "DBCLUSTERNODEIP_2:3306",

                "mode": "R/W",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_3:3306": {

                "address": "DBCLUSTERNODEIP_3:3306",

                "mode": "R/W",

                "readReplicas": {},

                "role": "HA",

                "status": "(MISSING)"

            }

        },

        "topologyMode": "Single-Primary"

    },

    "groupInformationSourceMember": "DBCLUSTERNODEIP_2:3306"

}

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS >

@nODE2 primary

mysql> SELECT @@GLOBAL.GTID_EXECUTED \G

ERROR 2006 (HY000): MySQL server has gone away

No connection. Trying to reconnect...

Connection id:    46843795

Current database: appdatabase


*************************** 1. row ***************************

@@GLOBAL.GTID_EXECUTED: 175f7950-4ac8-11ed-893f-00505684085f:1-2956230:3303697-3340358,

56be8ffc-c8b3-11ea-93d3-00505684f7cd:1-37847672,

d1552329-9fff-11e9-aa0b-00505694881c:1-77

1 row in set (0.00 sec)



@node1:

mysql> select @@GLOBAL.GTID_EXECUTED\G;

ERROR 2006 (HY000): MySQL server has gone away

No connection. Trying to reconnect...

Connection id:    33090026

Current database: *** NONE ***


*************************** 1. row ***************************

@@GLOBAL.GTID_EXECUTED: 175f7950-4ac8-11ed-893f-00505684085f:1-2956230:3303697-3340376,

56be8ffc-c8b3-11ea-93d3-00505684f7cd:1-37847672,

d1552329-9fff-11e9-aa0b-00505694881c:1-77

1 row in set (0.00 sec)


ERROR:

No query specified


@NODE2:

mysql> SELECT * FROM performance_schema.replication_group_members;

+---------------------------+--------------------------------------+--------------+-------------+--------------+-------------+----------------+

| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST  | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |

+---------------------------+--------------------------------------+--------------+-------------+--------------+-------------+----------------+

| group_replication_applier | 503c2c17-c348-11ea-9ad7-00505684085f | DBCLUSTERNODEIP_2 |        3306 | ONLINE       | PRIMARY     | 8.0.20         |

| group_replication_applier | d1552329-9fff-11e9-aa0b-00505694881c | DBCLUSTERNODEIP_1 |        3306 | ONLINE       | SECONDARY   | 8.0.20         |

+---------------------------+--------------------------------------+--------------+-------------+--------------+-------------+----------------+

2 rows in set (0.00 sec)


Error message at issue node 3:


2023-03-07T00:00:05.382906-16:00 0 [Warning] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] local_server: error reading from the signalling connection?'

2023-03-07T00:00:05.400393-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Failure reading from fd=4160 n=18446744073709551615'

2023-03-07T00:00:05.435293-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Failure reading from fd=3768 n=18446744073709551615'

2023-03-07T00:00:05.503907-16:00 318334 [ERROR] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Error pushing message into group communication engine.'

2023-03-07T00:00:05.504716-16:00 318334 [Note] [MY-011725] [Repl] Plugin group_replication reported: 'Error while sending stats message'

2023-03-07T00:00:06.504072-16:00 318334 [ERROR] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Error pushing message into group communication engine.'

2023-03-07T00:00:06.506464-16:00 318334 [Note] [MY-011725] [Repl] Plugin group_replication reported: 'Error while sending stats message'


Error at new primary node 2:


2023-03-06T18:24:03.079983+08:00 31181620 [Warning] [MY-010956] [Server] Invalid replication timestamps: original commit timestamp is more recent than the immediate commit timestamp. This may be an issue if delayed replication is active. Make sure that servers have their clocks set to the correct time. No further message will be emitted until after timestamps become valid again.

2023-03-06T18:24:13.363163+08:00 31181620 [Warning] [MY-010957] [Server] The replication timestamps have returned to normal values.

2023-03-07T00:00:09.969558-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Re-using server node 0 host DBCLUSTERNODEIP_1'

2023-03-07T00:00:09.971275-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Re-using server node 1 host DBCLUSTERNODEIP_2'

2023-03-07T00:00:09.973312-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Installed site start={8506e6d0 7621487 0} boot_key={8506e6d0 7621476 1} event_horizon=10 node 1'

2023-03-07T00:00:11.631615-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Group is able to support up to communication protocol version 8.0.16'

2023-03-07T00:00:11.634347-16:00 0 [Warning] [MY-011499] [Repl] Plugin group_replication reported: 'Members removed from the group: DBCLUSTERNODEIP_3:3306'

2023-03-07T00:00:11.636807-16:00 0 [Note] [MY-011500] [Repl] Plugin group_replication reported: 'Primary server with address DBCLUSTERNODEIP_3:3306 left the group. Electing new Primary.'

2023-03-07T00:00:11.672475-16:00 0 [Note] [MY-013519] [Repl] Plugin group_replication reported: 'Elected primary member gtid_executed: 175f7950-4ac8-11ed-893f-00505684085f:1-2956230:3303697,

56be8ffc-c8b3-11ea-93d3-00505684f7cd:1-37847672,

d1552329-9fff-11e9-aa0b-00505694881c:1-77'

2023-03-07T00:00:11.676694-16:00 0 [Note] [MY-013519] [Repl] Plugin group_replication reported: 'Elected primary member applier channel received_transaction_set: 175f7950-4ac8-11ed-893f-00505684085f:1-2956230:3303697,

56be8ffc-c8b3-11ea-93d3-00505684f7cd:1-37847672,

d1552329-9fff-11e9-aa0b-00505694881c:1-77'

2023-03-07T00:00:12.674089-16:00 0 [Note] [MY-011507] [Repl] Plugin group_replication reported: 'A new primary with address DBCLUSTERNODEIP_2:3306 was elected. The new primary will execute all previous group transactions before allowing writes.'

2023-03-07T00:00:12.681058-16:00 0 [Note] [MY-011503] [Repl] Plugin group_replication reported: 'Group membership changed to DBCLUSTERNODEIP_2:3306, DBCLUSTERNODEIP_1:3306 on view 16735758337453400:8.'

2023-03-07T00:00:12.705600-16:00 46153146 [Note] [MY-011510] [Repl] Plugin group_replication reported: 'This server is working as primary member.'

2023-03-07T00:00:12.705616-16:00 31181617 [Note] [MY-011485] [Repl] Plugin group_replication reported: 'Primary had applied all relay logs, disabled conflict detection.'

2023-03-07T00:00:20.627864-16:00 0 [Note] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] Failure reading from fd=-1 n=18446744073709551615'


=======================================================


Effort tried but no luck:

Cluster.removeInstance: MySQL server has gone away (RuntimeError)

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > cluster.removeInstance("clusteradmin@DBCLUSTERNODEIP_3:3306");

Cluster.removeInstance: MySQL server has gone away (RuntimeError)

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > dba.rebootClusterFromCompleteOutage();

Restoring the default cluster from complete outage...


Dba.rebootClusterFromCompleteOutage: The MySQL instance 'DBCLUSTERNODEIP_2:3306' belongs to an InnoDB Cluster and is reachable. (RuntimeError)

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > cluster.addInstance("DBCLUSTERNODEIP_3:3306");

Cluster.addInstance: MySQL server has gone away (RuntimeError)

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > cluster.removeInstance("clusteradmin@DBCLUSTERNODEIP_3:3306", {force: true})

Cluster.removeInstance: MySQL server has gone away (RuntimeError)

 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > var cluster=dba.getCluster()

=================================================================

Once issue node 3 mysql service restarted. looks all working fine as like below.


 MySQL  DBCLUSTERNODEIP_2:3306 ssl  JS > cluster.status();

{

    "clusterName": "uatcluster",

    "defaultReplicaSet": {

        "name": "default",

        "primary": "DBCLUSTERNODEIP_2:3306",

        "ssl": "REQUIRED",

        "status": "OK",

        "statusText": "Cluster is ONLINE and can tolerate up to ONE failure.",

        "topology": {

            "DBCLUSTERNODEIP_1:3306": {

                "address": "DBCLUSTERNODEIP_1:3306",

                "mode": "R/O",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_2:3306": {

                "address": "DBCLUSTERNODEIP_2:3306",

                "mode": "R/W",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            },

            "DBCLUSTERNODEIP_3:3306": {

                "address": "DBCLUSTERNODEIP_3:3306",

                "mode": "R/O",

                "readReplicas": {},

                "replicationLag": null,

                "role": "HA",

                "status": "ONLINE",

                "version": "8.0.20"

            }

        },

        "topologyMode": "Single-Primary"

    },

    "groupInformationSourceMember": "DBCLUSTERNODEIP_2:3306"

}


MYSQL:::Seamless Data Archiving: Exporting, Importing, and Pruning MySQL Tables

  Seamless Data Archiving: Exporting, Importing, and Pruning MySQL Tables Introduction: In the dynamic landscape of database management...