Hi Andrei!
Patch looks good and is OK to push. However, please consider my comments
below.
Just my few cents,
Mats Kindahl
Andrei Elkin wrote:
> Below is the list of changes that have just been committed into a local
> 5.1 repository of aelkin. When aelkin does a push these changes
> will be propagated to the main repository and, within 24 hours after the
> push, to the public repository.
> For information on how to access the public repository
> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>
> ChangeSet@stripped, 2008-02-18 21:46:18+02:00, aelkin@stripped +3 -0
> Bug #31316 Report server id clashes in SHOW SLAVE STATUS
> "Server_IO_State" field
>
> Critical error messages from get_master_version_and_clock() were written
> only to the slave errorlog while Show slave status did not display any
> incident happened.
>
> Although the artifact was reported for a particular --replicate-same-server-id
> related issue the fix refines all critical error reporting with
> deploying rli->report().
>
> The test for the bug covers only --replicate-same-server-id error reporting.
>
>
> mysql-test/suite/rpl/r/rpl_server_id1.result@stripped, 2008-02-18 21:46:15+02:00,
> aelkin@stripped +5 -10
> new results reflecting changes
>
> mysql-test/suite/rpl/t/rpl_server_id1.test@stripped, 2008-02-18 21:46:15+02:00,
> aelkin@stripped +8 -9
> Preserving the idea of the test unnecessary queries and the sleep are
> eliminated.
> In the end the slave must stop with the error displayable via $$$.
>
> sql/slave.cc@stripped, 2008-02-18 21:46:16+02:00, aelkin@stripped +36
> -11
> improving get_master_version_and_clock() code to report a critical incident
> via rli->report() that takes care of bothe the error log and
> the slave's status info placeholders.
>
> A critical error that force the IO slave thread to terminate is handled
> immediately (goto err).
>
> diff -Nrup a/mysql-test/suite/rpl/r/rpl_server_id1.result
> b/mysql-test/suite/rpl/r/rpl_server_id1.result
> --- a/mysql-test/suite/rpl/r/rpl_server_id1.result 2007-06-27 15:28:29 +03:00
> +++ b/mysql-test/suite/rpl/r/rpl_server_id1.result 2008-02-18 21:46:15 +02:00
> @@ -4,10 +4,11 @@ reset master;
> reset slave;
> drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> start slave;
> -create table t1 (n int);
> reset master;
> stop slave;
> change master to master_port=SLAVE_PORT;
> +start slave;
> +must be having the replicate-same-server-id IO thread error
> show slave status;
> Slave_IO_State
> Master_Host 127.0.0.1
> @@ -20,7 +21,7 @@ Relay_Log_File slave-relay-bin.000001
> Relay_Log_Pos 4
> Relay_Master_Log_File
> Slave_IO_Running No
> -Slave_SQL_Running No
> +Slave_SQL_Running Yes
>
Please blank out the SQL running field since it is irrelevant for the
test. It does not really matter if it is Yes or No for the test to succeed.
> Replicate_Do_DB
> Replicate_Ignore_DB
> Replicate_Do_Table
> @@ -43,13 +44,7 @@ Master_SSL_Cipher
> Master_SSL_Key
> Seconds_Behind_Master NULL
> Master_SSL_Verify_Server_Cert No
> -Last_IO_Errno #
> -Last_IO_Error #
> +Last_IO_Errno 1593
> +Last_IO_Error Fatal error: The slave I/O thread stops because master and slave have
> equal MySQL server ids; these ids must be different for replication to work (or the
> --replicate-same-server-id option must be used on slave but this does not always make
> sense; please check the manual before using it).
> Last_SQL_Errno 0
> Last_SQL_Error
>
Please blank out the SQL error fields since they are not relevant for
this test, this also includes the aliases "Last_Error" and "Last_Errno"
above.
> -start slave;
> -insert into t1 values (1);
> -show status like "slave_running";
> -Variable_name Value
> -Slave_running OFF
> -drop table t1;
> diff -Nrup a/mysql-test/suite/rpl/t/rpl_server_id1.test
> b/mysql-test/suite/rpl/t/rpl_server_id1.test
> --- a/mysql-test/suite/rpl/t/rpl_server_id1.test 2007-06-27 15:28:29 +03:00
> +++ b/mysql-test/suite/rpl/t/rpl_server_id1.test 2008-02-18 21:46:15 +02:00
> @@ -6,21 +6,20 @@
>
> source include/master-slave.inc;
> connection slave;
> -create table t1 (n int);
> reset master;
> +
> # replicate ourselves
> stop slave;
> --replace_result $SLAVE_MYPORT SLAVE_PORT
> eval change master to master_port=$SLAVE_MYPORT;
> +start slave;
> +
> +--echo must be having the replicate-same-server-id IO thread error
>
Please add something to the beginning of the line so that it is clear
that this is a comment printed by the test script and not a SQL command
nor a result from a query. Anything goes, for example ">>", "****", or "#" .
> +
> +source include/wait_for_slave_io_to_stop.inc;
> +
> --replace_result $SLAVE_MYPORT SLAVE_PORT
> ---replace_column 16 # 18 # 35 # 36 #
> +--replace_column 16 # 18 #
> query_vertical show slave status;
> -start slave;
> -insert into t1 values (1);
> -# can't MASTER_POS_WAIT(), it does not work in this weird setup
> -# (when slave is its own master without --replicate-same-server-id)
> -sleep 2; # enough time for the event to be replicated (it should not)
> -show status like "slave_running";
> -drop table t1;
>
> # End of 4.1 tests
> diff -Nrup a/sql/slave.cc b/sql/slave.cc
> --- a/sql/slave.cc 2008-02-07 09:48:20 +02:00
> +++ b/sql/slave.cc 2008-02-18 21:46:16 +02:00
> @@ -747,7 +747,11 @@ int init_intvar_from_file(int* var, IO_C
>
> static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
> {
> + char err_buff[MAX_SLAVE_ERRMSG];
> const char* errmsg= 0;
> + int err_code= 0;
> + MYSQL_RES *master_res= 0;
> + MYSQL_ROW master_row;
> DBUG_ENTER("get_master_version_and_clock");
>
> /*
> @@ -758,7 +762,11 @@ static int get_master_version_and_clock(
> mi->rli.relay_log.description_event_for_queue= 0;
>
> if (!my_isdigit(&my_charset_bin,*mysql->server_version))
> + {
> errmsg = "Master reported unrecognized MySQL version";
> + err_code= ER_SLAVE_FATAL_ERROR;
> + sprintf(err_buff, ER(err_code), errmsg);
> + }
> else
> {
> /*
> @@ -770,6 +778,8 @@ static int get_master_version_and_clock(
> case '1':
> case '2':
> errmsg = "Master reported unrecognized MySQL version";
> + err_code= ER_SLAVE_FATAL_ERROR;
> + sprintf(err_buff, ER(err_code), errmsg);
> break;
> case '3':
> mi->rli.relay_log.description_event_for_queue= new
> @@ -802,26 +812,21 @@ static int get_master_version_and_clock(
> */
>
> if (errmsg)
> - {
> - sql_print_error(errmsg);
> - DBUG_RETURN(1);
> - }
> + goto err;
>
> /* as we are here, we tried to allocate the event */
> if (!mi->rli.relay_log.description_event_for_queue)
> {
> - mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
> - ER(ER_SLAVE_CREATE_EVENT_FAILURE),
> - "default Format_description_log_event");
> - DBUG_RETURN(1);
> + errmsg= "default Format_description_log_event";
> + err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
> + sprintf(err_buff, ER(err_code), errmsg);
> + goto err;
> }
>
> /*
> Compare the master and slave's clock. Do not die if master's clock is
> unavailable (very old master not supporting UNIX_TIMESTAMP()?).
> */
> - MYSQL_RES *master_res= 0;
> - MYSQL_ROW master_row;
>
> if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()"))
> &&
> (master_res= mysql_store_result(mysql)) &&
> @@ -858,11 +863,17 @@ static int get_master_version_and_clock(
> if ((master_row= mysql_fetch_row(master_res)) &&
> (::server_id == strtoul(master_row[1], 0, 10)) &&
> !mi->rli.replicate_same_server_id)
> + {
> errmsg= "The slave I/O thread stops because master and slave have equal \
> MySQL server ids; these ids must be different for replication to work (or \
> the --replicate-same-server-id option must be used on slave but this does \
> not always make sense; please check the manual before using it).";
> + err_code= ER_SLAVE_FATAL_ERROR;
> + sprintf(err_buff, ER(err_code), errmsg);
> + }
> mysql_free_result(master_res);
> + if (errmsg)
> + goto err;
> }
>
> /*
> @@ -893,10 +904,16 @@ not always make sense; please check the
> {
> if ((master_row= mysql_fetch_row(master_res)) &&
> strcmp(master_row[0], global_system_variables.collation_server->name))
> + {
> errmsg= "The slave I/O thread stops because master and slave have \
> different values for the COLLATION_SERVER global variable. The values must \
> be equal for replication to work";
> + err_code= ER_SLAVE_FATAL_ERROR;
> + sprintf(err_buff, ER(err_code), errmsg);
> + }
> mysql_free_result(master_res);
> + if (errmsg)
> + goto err;
> }
>
> /*
> @@ -921,16 +938,24 @@ be equal for replication to work";
> if ((master_row= mysql_fetch_row(master_res)) &&
> strcmp(master_row[0],
> global_system_variables.time_zone->get_name()->ptr()))
> + {
> errmsg= "The slave I/O thread stops because master and slave have \
> different values for the TIME_ZONE global variable. The values must \
> be equal for replication to work";
> + err_code= ER_SLAVE_FATAL_ERROR;
> + sprintf(err_buff, ER(err_code), errmsg);
> + }
> mysql_free_result(master_res);
> +
> + if (errmsg)
> + goto err;
> }
>
> err:
> if (errmsg)
> {
> - sql_print_error(errmsg);
> + DBUG_ASSERT(err_code != 0);
> + mi->report(ERROR_LEVEL, err_code, err_buff);
> DBUG_RETURN(1);
> }
>
>
>
--
Mats Kindahl
Lead Software Developer
Replication Team
MySQL AB, www.mysql.com