Skip to content

Commit 745449d

Browse files
committed
BUG#37613510: Ever growing GR Transactions Rows Validating after secondary joins the group
Group Replication start operation checks if there are partial transactions on the `group_replication_applier` channel from a previous group participation. If partial transactions are found, `group_replication_applier` channel is stopped after applying all complete transactions, its relay logs purged and then the channel is restarted. After this step, distributed recovery kicks-in and applies the missing data from a group member. The Group Replication pipeline operation to stop the `group_replication_applier` channel was incorrectly stopping the periodic task from the certifier module, which was causing that some periodic internal operations were not taking place. One of the tasks that was not happening was the periodic send of the committed transactions, which omission was preventing the certification info garbage collection, which on its turn was causing the continuous increase of the column COUNT_TRANSACTIONS_ROWS_VALIDATING of the table performance_schema.replication_group_member_stats. To solve the above issue, the pipeline operation to stop the `group_replication_applier` channel now does not interfere with the certifier module. Change-Id: I1a4c2f7a5b6d0ca65caf43eeae38103a17b2d5ec
1 parent afadc5c commit 745449d

File tree

6 files changed

+30
-32
lines changed

6 files changed

+30
-32
lines changed

mysql-test/suite/group_replication/r/gr_partial_trx_in_applier_relay_log.result

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ INSERT INTO t1 values (12);
3939
SET GLOBAL DEBUG="-d,stop_applier_channel_after_reading_write_rows_log_event";
4040
include/stop_group_replication.inc
4141
include/start_group_replication.inc
42+
include/assert.inc [Certifier broadcast thread must be running]
4243
########################################################################
4344
# 5. On M1: Insert another tuple and do a diff tables with other nodes.
4445
# (just to check that everything is working fine).

mysql-test/suite/group_replication/t/gr_partial_trx_in_applier_relay_log.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ SET GLOBAL DEBUG="-d,stop_applier_channel_after_reading_write_rows_log_event";
9292
--let $wait_timeout=120
9393
--source include/start_group_replication.inc
9494

95+
--let $assert_text= Certifier broadcast thread must be running
96+
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.threads WHERE name = "thread/group_rpl/THD_certifier_broadcast"] = 1
97+
--source include/assert.inc
98+
9599
--echo ########################################################################
96100
--echo # 5. On M1: Insert another tuple and do a diff tables with other nodes.
97101
--echo # (just to check that everything is working fine).

plugin/group_replication/include/certifier.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,8 @@ class Certifier_broadcast_thread {
149149

150150
/**
151151
Terminate broadcast thread.
152-
153-
@return the operation status
154-
@retval 0 OK
155-
@retval !=0 Error
156152
*/
157-
int terminate();
153+
void terminate();
158154

159155
/**
160156
Broadcast thread worker method.
@@ -241,15 +237,6 @@ class Certifier : public Certifier_interface {
241237
*/
242238
int initialize(ulonglong gtid_assignment_block_size);
243239

244-
/**
245-
Terminate certifier.
246-
247-
@return the operation status
248-
@retval 0 OK
249-
@retval !=0 Error
250-
*/
251-
int terminate();
252-
253240
/**
254241
Handle view changes on certifier.
255242
*/

plugin/group_replication/src/certifier.cc

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ int Certifier_broadcast_thread::initialize() {
8484
if ((mysql_thread_create(key_GR_THD_cert_broadcast, &broadcast_pthd,
8585
get_connection_attrib(), launch_broadcast_thread,
8686
(void *)this))) {
87-
mysql_mutex_unlock(&broadcast_run_lock); /* purecov: inspected */
88-
return 1; /* purecov: inspected */
87+
/* purecov: begin inspected */
88+
mysql_mutex_unlock(&broadcast_run_lock);
89+
LogPluginErr(ERROR_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_CREATE_FAILED);
90+
return 1;
91+
/* purecov: end */
8992
}
9093
broadcast_thd_state.set_created();
9194

@@ -98,13 +101,13 @@ int Certifier_broadcast_thread::initialize() {
98101
return 0;
99102
}
100103

101-
int Certifier_broadcast_thread::terminate() {
104+
void Certifier_broadcast_thread::terminate() {
102105
DBUG_TRACE;
103106

104107
mysql_mutex_lock(&broadcast_run_lock);
105108
if (broadcast_thd_state.is_thread_dead()) {
106109
mysql_mutex_unlock(&broadcast_run_lock);
107-
return 0;
110+
return;
108111
}
109112

110113
aborted = true;
@@ -122,8 +125,6 @@ int Certifier_broadcast_thread::terminate() {
122125
mysql_cond_wait(&broadcast_run_cond, &broadcast_run_lock);
123126
}
124127
mysql_mutex_unlock(&broadcast_run_lock);
125-
126-
return 0;
127128
}
128129

129130
void Certifier_broadcast_thread::dispatcher() {
@@ -143,6 +144,8 @@ void Certifier_broadcast_thread::dispatcher() {
143144
mysql_cond_broadcast(&broadcast_run_cond);
144145
mysql_mutex_unlock(&broadcast_run_lock);
145146

147+
LogPluginErr(SYSTEM_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_STARTED);
148+
146149
while (!aborted) {
147150
// Broadcast Transaction identifiers every 30 seconds
148151
if (broadcast_counter % 30 == 0) {
@@ -196,6 +199,8 @@ void Certifier_broadcast_thread::dispatcher() {
196199
mysql_cond_broadcast(&broadcast_run_cond);
197200
mysql_mutex_unlock(&broadcast_run_lock);
198201

202+
LogPluginErr(SYSTEM_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_STOPPED);
203+
199204
my_thread_exit(nullptr);
200205
}
201206

@@ -306,6 +311,10 @@ Certifier::Certifier()
306311
Certifier::~Certifier() {
307312
mysql_mutex_lock(&LOCK_certification_info);
308313
initialized = false;
314+
315+
broadcast_thread->terminate();
316+
delete broadcast_thread;
317+
309318
clear_certification_info();
310319
delete certification_info_sid_map;
311320

@@ -316,7 +325,6 @@ Certifier::~Certifier() {
316325
delete group_gtid_extracted;
317326
delete group_gtid_sid_map;
318327
mysql_mutex_unlock(&LOCK_certification_info);
319-
delete broadcast_thread;
320328

321329
mysql_mutex_lock(&LOCK_members);
322330
clear_members();
@@ -640,15 +648,6 @@ int Certifier::initialize(ulonglong gtid_assignment_block_size) {
640648
return error;
641649
}
642650

643-
int Certifier::terminate() {
644-
DBUG_TRACE;
645-
int error = 0;
646-
647-
if (is_initialized()) error = broadcast_thread->terminate();
648-
649-
return error;
650-
}
651-
652651
void Certifier::update_parallel_applier_indexes(
653652
bool update_parallel_applier_last_committed_global,
654653
bool increment_parallel_applier_sequence_number) {

plugin/group_replication/src/handlers/certification_handler.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,6 @@ int Certification_handler::handle_action(Pipeline_action *action) {
9696
Handler_THD_setup_action *thd_conf_action =
9797
(Handler_THD_setup_action *)action;
9898
applier_module_thd = thd_conf_action->get_THD_object();
99-
} else if (action_type == HANDLER_STOP_ACTION) {
100-
error = cert_module->terminate();
10199
}
102100

103101
if (error) return error;

share/messages_to_error_log.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12316,6 +12316,15 @@ ER_CHECK_TABLE_FUNCTIONS
1231612316
ER_CHECK_TABLE_FUNCTIONS_DETAIL
1231712317
eng "%s"
1231812318

12319+
ER_GRP_RPL_CERT_BROADCAST_THREAD_CREATE_FAILED
12320+
eng "Failed to create the Group Replication certifier broadcast thread (THD_certifier_broadcast)."
12321+
12322+
ER_GRP_RPL_CERT_BROADCAST_THREAD_STARTED
12323+
eng "The Group Replication certifier broadcast thread (THD_certifier_broadcast) started."
12324+
12325+
ER_GRP_RPL_CERT_BROADCAST_THREAD_STOPPED
12326+
eng "The Group Replication certifier broadcast thread (THD_certifier_broadcast) stopped."
12327+
1231912328
# DO NOT add server-to-client messages here;
1232012329
# they go in messages_to_clients.txt
1232112330
# in the same directory as this file.

0 commit comments

Comments
 (0)