Mats, hej.
thanks for your review. I agree with your notes.
> Hi Andrei!
>
> Patch looks good and is OK to push. However, please consider my
> comments below.
>
> Just my few cents,
> Mats Kindahl
>
> Andrei Elkin wrote:
>> Below is the list of changes that have just been committed into a local
>> 5.1 repository of aelkin. When aelkin does a push these changes
>> will be propagated to the main repository and, within 24 hours after the
>> push, to the public repository.
>> For information on how to access the public repository
>> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>>
>> ChangeSet@stripped, 2008-02-18 21:46:18+02:00, aelkin@stripped +3 -0
>> Bug #31316 Report server id clashes in SHOW SLAVE STATUS
>> "Server_IO_State" field
>> Critical error messages from get_master_version_and_clock() were
>> written
>> only to the slave errorlog while Show slave status did not display any
>> incident happened.
>> Although the artifact was reported for a particular
>> --replicate-same-server-id
>> related issue the fix refines all critical error reporting with
>> deploying rli->report().
>> The test for the bug covers only --replicate-same-server-id
>> error reporting.
>>
>>
>> mysql-test/suite/rpl/r/rpl_server_id1.result@stripped, 2008-02-18 21:46:15+02:00,
> aelkin@stripped +5 -10
>> new results reflecting changes
>>
>> mysql-test/suite/rpl/t/rpl_server_id1.test@stripped, 2008-02-18 21:46:15+02:00,
> aelkin@stripped +8 -9
>> Preserving the idea of the test unnecessary queries and the sleep are
>> eliminated.
>> In the end the slave must stop with the error displayable via $$$.
>>
>> sql/slave.cc@stripped, 2008-02-18 21:46:16+02:00, aelkin@stripped +36
> -11
>> improving get_master_version_and_clock() code to report a critical incident
>> via rli->report() that takes care of bothe the error log and
>> the slave's status info placeholders.
>> A critical error that force the IO slave thread to terminate
>> is handled
>> immediately (goto err).
>>
>> diff -Nrup a/mysql-test/suite/rpl/r/rpl_server_id1.result
> b/mysql-test/suite/rpl/r/rpl_server_id1.result
>> --- a/mysql-test/suite/rpl/r/rpl_server_id1.result 2007-06-27 15:28:29 +03:00
>> +++ b/mysql-test/suite/rpl/r/rpl_server_id1.result 2008-02-18 21:46:15 +02:00
>> @@ -4,10 +4,11 @@ reset master;
>> reset slave;
>> drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
>> start slave;
>> -create table t1 (n int);
>> reset master;
>> stop slave;
>> change master to master_port=SLAVE_PORT;
>> +start slave;
>> +must be having the replicate-same-server-id IO thread error
>> show slave status;
>> Slave_IO_State
>> Master_Host 127.0.0.1
>> @@ -20,7 +21,7 @@ Relay_Log_File slave-relay-bin.000001
>> Relay_Log_Pos 4
>> Relay_Master_Log_File
>> Slave_IO_Running No
>> -Slave_SQL_Running No
>> +Slave_SQL_Running Yes
>>
>
> Please blank out the SQL running field since it is irrelevant for the
> test. It does not really matter if it is Yes or No for the test to
> succeed.
>
ok
>> Replicate_Do_DB
>> Replicate_Ignore_DB
>> Replicate_Do_Table
>> @@ -43,13 +44,7 @@ Master_SSL_Cipher
>> Master_SSL_Key
>> Seconds_Behind_Master NULL
>> Master_SSL_Verify_Server_Cert No
>> -Last_IO_Errno #
>> -Last_IO_Error #
>> +Last_IO_Errno 1593
>> +Last_IO_Error Fatal error: The slave I/O thread stops because master and slave
> have equal MySQL server ids; these ids must be different for replication to work (or the
> --replicate-same-server-id option must be used on slave but this does not always make
> sense; please check the manual before using it).
>> Last_SQL_Errno 0
>> Last_SQL_Error
>>
>
> Please blank out the SQL error fields since they are not relevant for
> this test, this also includes the aliases "Last_Error" and
> "Last_Errno" above.
ok
>
>> -start slave;
>> -insert into t1 values (1);
>> -show status like "slave_running";
>> -Variable_name Value
>> -Slave_running OFF
>> -drop table t1;
>> diff -Nrup a/mysql-test/suite/rpl/t/rpl_server_id1.test
> b/mysql-test/suite/rpl/t/rpl_server_id1.test
>> --- a/mysql-test/suite/rpl/t/rpl_server_id1.test 2007-06-27 15:28:29 +03:00
>> +++ b/mysql-test/suite/rpl/t/rpl_server_id1.test 2008-02-18 21:46:15 +02:00
>> @@ -6,21 +6,20 @@
>> source include/master-slave.inc;
>> connection slave;
>> -create table t1 (n int);
>> reset master;
>> +
>> # replicate ourselves
>> stop slave;
>> --replace_result $SLAVE_MYPORT SLAVE_PORT
>> eval change master to master_port=$SLAVE_MYPORT;
>> +start slave;
>> +
>> +--echo must be having the replicate-same-server-id IO thread error
>>
>
> Please add something to the beginning of the line so that it is clear
> that this is a comment printed by the test script and not a SQL
> command nor a result from a query. Anything goes, for example ">>",
> "****", or "#" .
>
done, sorry you had to repeat it to me.
>> +
>> +source include/wait_for_slave_io_to_stop.inc;
>> +
>> --replace_result $SLAVE_MYPORT SLAVE_PORT
>> ---replace_column 16 # 18 # 35 # 36 #
>> +--replace_column 16 # 18 #
>> query_vertical show slave status;
>> -start slave;
>> -insert into t1 values (1);
>> -# can't MASTER_POS_WAIT(), it does not work in this weird setup
>> -# (when slave is its own master without --replicate-same-server-id)
>> -sleep 2; # enough time for the event to be replicated (it should not)
>> -show status like "slave_running";
>> -drop table t1;
>> # End of 4.1 tests
>> diff -Nrup a/sql/slave.cc b/sql/slave.cc
>> --- a/sql/slave.cc 2008-02-07 09:48:20 +02:00
>> +++ b/sql/slave.cc 2008-02-18 21:46:16 +02:00
>> @@ -747,7 +747,11 @@ int init_intvar_from_file(int* var, IO_C
>> static int get_master_version_and_clock(MYSQL* mysql, Master_info*
>> mi)
>> {
>> + char err_buff[MAX_SLAVE_ERRMSG];
>> const char* errmsg= 0;
>> + int err_code= 0;
>> + MYSQL_RES *master_res= 0;
>> + MYSQL_ROW master_row;
>> DBUG_ENTER("get_master_version_and_clock");
>> /*
>> @@ -758,7 +762,11 @@ static int get_master_version_and_clock(
>> mi->rli.relay_log.description_event_for_queue= 0;
>> if (!my_isdigit(&my_charset_bin,*mysql->server_version))
>> + {
>> errmsg = "Master reported unrecognized MySQL version";
>> + err_code= ER_SLAVE_FATAL_ERROR;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> + }
>> else
>> {
>> /*
>> @@ -770,6 +778,8 @@ static int get_master_version_and_clock(
>> case '1':
>> case '2':
>> errmsg = "Master reported unrecognized MySQL version";
>> + err_code= ER_SLAVE_FATAL_ERROR;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> break;
>> case '3':
>> mi->rli.relay_log.description_event_for_queue= new
>> @@ -802,26 +812,21 @@ static int get_master_version_and_clock(
>> */
>> if (errmsg)
>> - {
>> - sql_print_error(errmsg);
>> - DBUG_RETURN(1);
>> - }
>> + goto err;
>> /* as we are here, we tried to allocate the event */
>> if (!mi->rli.relay_log.description_event_for_queue)
>> {
>> - mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
>> - ER(ER_SLAVE_CREATE_EVENT_FAILURE),
>> - "default Format_description_log_event");
>> - DBUG_RETURN(1);
>> + errmsg= "default Format_description_log_event";
>> + err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> + goto err;
>> }
>> /*
>> Compare the master and slave's clock. Do not die if master's clock is
>> unavailable (very old master not supporting UNIX_TIMESTAMP()?).
>> */
>> - MYSQL_RES *master_res= 0;
>> - MYSQL_ROW master_row;
>> if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT
>> UNIX_TIMESTAMP()")) &&
>> (master_res= mysql_store_result(mysql)) &&
>> @@ -858,11 +863,17 @@ static int get_master_version_and_clock(
>> if ((master_row= mysql_fetch_row(master_res)) &&
>> (::server_id == strtoul(master_row[1], 0, 10)) &&
>> !mi->rli.replicate_same_server_id)
>> + {
>> errmsg= "The slave I/O thread stops because master and slave have equal \
>> MySQL server ids; these ids must be different for replication to work (or \
>> the --replicate-same-server-id option must be used on slave but this does \
>> not always make sense; please check the manual before using it).";
>> + err_code= ER_SLAVE_FATAL_ERROR;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> + }
>> mysql_free_result(master_res);
>> + if (errmsg)
>> + goto err;
>> }
>> /*
>> @@ -893,10 +904,16 @@ not always make sense; please check the {
>> if ((master_row= mysql_fetch_row(master_res)) &&
>> strcmp(master_row[0],
> global_system_variables.collation_server->name))
>> + {
>> errmsg= "The slave I/O thread stops because master and slave have \
>> different values for the COLLATION_SERVER global variable. The values must \
>> be equal for replication to work";
>> + err_code= ER_SLAVE_FATAL_ERROR;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> + }
>> mysql_free_result(master_res);
>> + if (errmsg)
>> + goto err;
>> }
>> /*
>> @@ -921,16 +938,24 @@ be equal for replication to work";
>> if ((master_row= mysql_fetch_row(master_res)) &&
>> strcmp(master_row[0],
>> global_system_variables.time_zone->get_name()->ptr()))
>> + {
>> errmsg= "The slave I/O thread stops because master and slave have \
>> different values for the TIME_ZONE global variable. The values must \
>> be equal for replication to work";
>> + err_code= ER_SLAVE_FATAL_ERROR;
>> + sprintf(err_buff, ER(err_code), errmsg);
>> + }
>> mysql_free_result(master_res);
>> +
>> + if (errmsg)
>> + goto err;
>> }
>> err:
>> if (errmsg)
>> {
>> - sql_print_error(errmsg);
>> + DBUG_ASSERT(err_code != 0);
>> + mi->report(ERROR_LEVEL, err_code, err_buff);
>> DBUG_RETURN(1);
>> }
>>
>>
>>
>
regards,
Andrei