Skip to content

Commit 4a4aab1

Browse files
committed
Bug#25475132 GROUP REPLICATION: MYSQL CLIENT CONNECTIONS HANG DURING GROUP REPLICATION START
Bug#26037344 THE DELAYED PLUGIN INITIALIZATION THREAD IS LEFT RUNNING WHEN IT IS NOT NEEDED Client complaints existed about how group replication would make connections hang on server start if the plugin was also starting. i.e., joining the group. To solve this we reworked the delayed initialization mechanism used on server starts, so it only blocks connections until the server is in read mode, not waiting for the member to join the group as before. This rework also solves the issue of the thread responsible for this task being initialized and running even when not needed.
1 parent 3e408ac commit 4a4aab1

11 files changed

+374
-245
lines changed

mysql-test/include/rpl_reconnect.inc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ if ($rpl_server_number == 1)
6262

6363
if ($rpl_group_replication)
6464
{
65+
--let $rpl_connection_name= server1
66+
--source include/rpl_connection.inc
67+
--enable_reconnect
68+
6569
--let $rpl_connection_name= server_1
6670
--source include/rpl_connection.inc
6771
--enable_reconnect
@@ -83,6 +87,10 @@ if ($rpl_server_number == 2)
8387

8488
if ($rpl_group_replication)
8589
{
90+
--let $rpl_connection_name= server2
91+
--source include/rpl_connection.inc
92+
--enable_reconnect
93+
8694
--let $rpl_connection_name= server_2
8795
--source include/rpl_connection.inc
8896
--enable_reconnect
@@ -131,6 +139,10 @@ if ($rpl_server_number == 1)
131139

132140
if ($rpl_group_replication)
133141
{
142+
--let $rpl_connection_name= server1
143+
--source include/rpl_connection.inc
144+
--enable_reconnect
145+
134146
--let $rpl_connection_name= server_1
135147
--source include/rpl_connection.inc
136148
--source include/wait_until_connected_again.inc
@@ -155,6 +167,10 @@ if ($rpl_server_number == 2)
155167

156168
if ($rpl_group_replication)
157169
{
170+
--let $rpl_connection_name= server2
171+
--source include/rpl_connection.inc
172+
--enable_reconnect
173+
158174
--let $rpl_connection_name= server_2
159175
--source include/rpl_connection.inc
160176
--source include/wait_until_connected_again.inc

rapid/plugin/group_replication/include/delayed_plugin_initialization.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,17 @@ class Delayed_initialization_thread
5656
/**
5757
Wait for the initialization thread to do its job.
5858
*/
59-
void wait_for_initialization();
59+
void wait_for_thread_end();
60+
61+
/**
62+
Signal that the read mode is set on the server.
63+
*/
64+
void signal_read_mode_ready();
65+
66+
/**
67+
Wait for the read mode to be set by the thread process.
68+
*/
69+
void wait_for_read_mode();
6070

6171
private:
6272

@@ -68,6 +78,9 @@ class Delayed_initialization_thread
6878
/* Is the server ready*/
6979
bool is_server_ready;
7080

81+
/* Is the read mode already set*/
82+
bool is_super_read_only_set;
83+
7184
/* Thread related structures */
7285

7386
my_thread_handle delayed_init_pthd;

rapid/plugin/group_replication/include/plugin.h

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,26 +82,13 @@ extern Blocked_transaction_handler* blocked_transaction_handler;
8282
//Plugin global methods
8383
bool server_engine_initialized();
8484
void *get_plugin_pointer();
85-
int configure_and_start_applier_module();
86-
int configure_group_member_manager(char *hostname, char *uuid,
87-
uint port, unsigned int server_version);
88-
int configure_compatibility_manager();
89-
int terminate_applier_module();
90-
int initialize_recovery_module();
91-
void initialize_group_partition_handler();
92-
int terminate_recovery_module();
93-
int configure_group_communication(st_server_ssl_variables *ssl_variables);
94-
int start_group_communication();
95-
void declare_plugin_running();
85+
mysql_mutex_t* get_plugin_running_lock();
86+
int initialize_plugin_and_join(enum_plugin_con_isolation sql_api_isolation,
87+
Delayed_initialization_thread *delayed_init_thd);
9688
void register_server_reset_master();
97-
int leave_group();
98-
int terminate_plugin_modules();
9989
bool get_allow_local_lower_version_join();
10090
bool get_allow_local_disjoint_gtids_join();
10191
ulong get_transaction_size_limit();
102-
void initialize_asynchronous_channels_observer();
103-
void terminate_asynchronous_channels_observer();
104-
bool check_async_channel_running_on_secondary();
10592
bool is_plugin_waiting_to_set_server_read_mode();
10693

10794
//Plugin public methods

rapid/plugin/group_replication/src/delayed_plugin_initialization.cc

Lines changed: 35 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void *launch_handler_thread(void* arg)
2929
}
3030

3131
Delayed_initialization_thread::Delayed_initialization_thread()
32-
: thread_running(false), is_server_ready(false)
32+
: thread_running(false), is_server_ready(false), is_super_read_only_set(false)
3333
{
3434
mysql_mutex_init(key_GR_LOCK_delayed_init_run, &run_lock, MY_MUTEX_INIT_FAST);
3535
mysql_mutex_init(key_GR_LOCK_delayed_init_server_ready,
@@ -61,14 +61,14 @@ void Delayed_initialization_thread::signal_thread_ready()
6161
DBUG_VOID_RETURN;
6262
}
6363

64-
void Delayed_initialization_thread::wait_for_initialization()
64+
void Delayed_initialization_thread::wait_for_thread_end()
6565
{
66-
DBUG_ENTER("Delayed_initialization_thread::wait_for_initialization");
66+
DBUG_ENTER("Delayed_initialization_thread::wait_for_thread_end");
6767

6868
mysql_mutex_lock(&run_lock);
6969
while (thread_running)
7070
{
71-
DBUG_PRINT("sleep",("Waiting for the Delayed initialization thread to end"));
71+
DBUG_PRINT("sleep",("Waiting for the Delayed initialization thread to finish"));
7272
mysql_cond_wait(&run_cond, &run_lock);
7373
}
7474
mysql_mutex_unlock(&run_lock);
@@ -79,6 +79,33 @@ void Delayed_initialization_thread::wait_for_initialization()
7979
DBUG_VOID_RETURN;
8080
}
8181

82+
void Delayed_initialization_thread::signal_read_mode_ready()
83+
{
84+
DBUG_ENTER("Delayed_initialization_thread::signal_read_mode_ready");
85+
86+
mysql_mutex_lock(&run_lock);
87+
is_super_read_only_set= true;
88+
mysql_cond_broadcast(&run_cond);
89+
mysql_mutex_unlock(&run_lock);
90+
91+
DBUG_VOID_RETURN;
92+
}
93+
94+
void Delayed_initialization_thread::wait_for_read_mode()
95+
{
96+
DBUG_ENTER("Delayed_initialization_thread::wait_for_read_mode");
97+
98+
mysql_mutex_lock(&run_lock);
99+
while (!is_super_read_only_set)
100+
{
101+
DBUG_PRINT("sleep",("Waiting for the Delayed initialization thread to set super_read_only"));
102+
mysql_cond_wait(&run_cond, &run_lock);
103+
}
104+
mysql_mutex_unlock(&run_lock);
105+
106+
DBUG_VOID_RETURN;
107+
}
108+
82109
int Delayed_initialization_thread::launch_initialization_thread()
83110
{
84111
DBUG_ENTER("Delayed_initialization_thread::launch_initialization_thread");
@@ -128,158 +155,12 @@ int Delayed_initialization_thread::initialization_thread_handler()
128155
}
129156
mysql_mutex_unlock(&server_ready_lock);
130157

131-
//delayed initialization code starts here
132-
133-
int error= 0;
134-
Sql_service_command_interface *sql_command_interface= NULL;
135-
136-
//Just terminate it
137-
if (!wait_on_engine_initialization ||
138-
get_plugin_pointer() == NULL)
139-
{
140-
goto end;
141-
}
142-
143-
/*
144-
The plugin was initialized on server start
145-
so only now we can start the applier
146-
*/
147-
if (wait_on_engine_initialization)
148-
{
149-
DBUG_ASSERT(server_engine_initialized());
150-
wait_on_engine_initialization= false;
151-
152-
//Avoid unnecessary operations
153-
bool enabled_super_read_only= false;
154-
bool read_only_mode= false, super_read_only_mode=false;
155-
156-
char *hostname, *uuid;
157-
uint port;
158-
unsigned int server_version;
159-
st_server_ssl_variables server_ssl_variables=
160-
{false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
161-
162-
get_server_parameters(&hostname, &port, &uuid, &server_version,
163-
&server_ssl_variables);
164-
165-
sql_command_interface= new Sql_service_command_interface();
166-
if (sql_command_interface->
167-
establish_session_connection(PSESSION_INIT_THREAD,
168-
get_plugin_pointer()) ||
169-
sql_command_interface->set_interface_user(GROUPREPL_USER))
170-
{
171-
/* purecov: begin inspected */
172-
log_message(MY_ERROR_LEVEL,
173-
"It was not possible to establish a connection to "
174-
"server SQL service");
175-
error= 1;
176-
goto err;
177-
/* purecov: end */
178-
}
179-
180-
/*
181-
At this point in the code, set the super_read_only mode here on the
182-
server to protect recovery and version module of the Group Replication.
183-
184-
Save the current read mode state to restore it in case Group Replication
185-
fail to start.
186-
*/
187-
188-
get_read_mode_state(sql_command_interface, &read_only_mode,
189-
&super_read_only_mode);
190-
191-
if (enable_super_read_only_mode(sql_command_interface))
192-
{
193-
error =1; /* purecov: inspected */
194-
log_message(MY_ERROR_LEVEL,
195-
"Could not enable the server read only mode and guarantee a "
196-
"safe recovery execution"); /* purecov: inspected */
197-
goto err; /* purecov: inspected */
198-
}
199-
200-
enabled_super_read_only= true;
201-
202-
if ((error= configure_group_communication(&server_ssl_variables)))
203-
goto err; /* purecov: inspected */
204-
205-
if ((error= configure_group_member_manager(hostname, uuid, port,
206-
server_version)))
207-
goto err; /* purecov: inspected */
208-
209-
if (check_async_channel_running_on_secondary())
210-
{
211-
error= 1;
212-
log_message(MY_ERROR_LEVEL, "Can't start group replication on secondary"
213-
" member with single primary-mode while"
214-
" asynchronous replication channels are"
215-
" running.");
216-
goto err; /* purecov: inspected */
217-
}
218-
219-
configure_compatibility_manager();
220-
221-
// need to be initialized before applier, is called on kill_pending_transactions
222-
blocked_transaction_handler= new Blocked_transaction_handler();
223-
224-
if ((error= initialize_recovery_module()))
225-
goto err; /* purecov: inspected */
226-
227-
if (configure_and_start_applier_module())
228-
{
229-
error= GROUP_REPLICATION_REPLICATION_APPLIER_INIT_ERROR;
230-
goto err;
231-
}
232-
233-
initialize_asynchronous_channels_observer();
234-
initialize_group_partition_handler();
235-
236-
if ((error= start_group_communication()))
237-
{
238-
//terminate the before created pipeline
239-
log_message(MY_ERROR_LEVEL,
240-
"Error on group communication initialization methods, "
241-
"killing the Group Replication applier"); /* purecov: inspected */
242-
applier_module->terminate_applier_thread(); /* purecov: inspected */
243-
goto err; /* purecov: inspected */
244-
}
245-
246-
if (view_change_notifier->wait_for_view_modification())
247-
{
248-
/* purecov: begin inspected */
249-
if (!view_change_notifier->is_cancelled())
250-
{
251-
//Only log a error when a view modification was not canceled.
252-
log_message(MY_ERROR_LEVEL,
253-
"Timeout on wait for view after joining group");
254-
}
255-
error= view_change_notifier->get_error();
256-
goto err;
257-
/* purecov: end */
258-
}
259-
declare_plugin_running(); //All is OK
260-
261-
err:
262-
if (error)
263-
{
264-
leave_group();
265-
terminate_plugin_modules();
266-
if (!server_shutdown_status && server_engine_initialized()
267-
&& enabled_super_read_only)
268-
{
269-
set_read_mode_state(sql_command_interface, read_only_mode,
270-
super_read_only_mode);
271-
}
272-
if (certification_latch != NULL)
273-
{
274-
delete certification_latch; /* purecov: inspected */
275-
certification_latch= NULL; /* purecov: inspected */
276-
}
277-
}
278-
}
158+
DBUG_ASSERT(server_engine_initialized());
279159

280-
end:
160+
//Protect this delayed start against other start/stop requests
161+
Mutex_autolock auto_lock_mutex(get_plugin_running_lock());
281162

282-
delete sql_command_interface;
163+
int error= initialize_plugin_and_join(PSESSION_INIT_THREAD, this);
283164

284165
mysql_mutex_lock(&run_lock);
285166
thread_running= false;

rapid/plugin/group_replication/src/observer_server_state.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ int group_replication_before_handle_connection(Server_state_param *param)
2626
if (wait_on_engine_initialization)
2727
{
2828
delayed_initialization_thread->signal_thread_ready();
29-
delayed_initialization_thread->wait_for_initialization();
30-
delete delayed_initialization_thread;
31-
delayed_initialization_thread= NULL;
29+
delayed_initialization_thread->wait_for_read_mode();
3230
}
3331
return 0;
3432
}

0 commit comments

Comments
 (0)