Hi Luís,
Thanks for this work. It's good that we suppress both errors returned to
client and errors written to the error log. I think the architecture is
fine and it was a good choice to return the three-state value from
compatible_with(). This makes the code clear and everything is explicit.
I see no problems, I'll approve this patch. I have four small
suggestions for improvements though, please fix at least the first one
before you push:
(1) Please move the declaration of ignored_error_code to log_event.h
since it's defined in log_event.cc
(2) Three minor cosmetic fixes, see comments inline. Fix if you like.
/Sven
On 06/02/2011 12:54 PM, Luis Soares wrote:
> #At file:///home/lsoares/Workspace/bzr/work/bugfixing/12428494/mysql-5.1/ based on
> revid:alexander.nozdrin@stripped
>
> 3682 Luis Soares 2011-06-02
> BUG#12428494: IN ROW-BASED REPLICATION SLAVE-SKIP-ERRORS=ALL DOES
> NOT SKIP ALL ERRORS
>
> If setting slave-skip-errors=all and using RBR, then slave
> will not skip errors returned while:
>
> 1. opening tables that do not exist on the slave;
> 2. checking that table definitions on the slave match
> the ones on the master.
>
> On SBR, however, the slave will properly skip the errors
> as instructed by the slave-skip-errors option setting.
>
> We fix this by checking if the error returned on
> simple_open_n_lock_tables and on m_tabledef.compatible_with
> should be ignored or not before actually reporting/returning
> the error.
> @ sql/log_event.cc
> Skips the event if there were errors, but these were meant
> to be ignored.
> @ sql/rpl_rli.h
> Exported prototype for ignored_error_code.
> @ sql/rpl_utility.cc
> Made table_def::compatible_with check if the error got was meant
> to be ignored. If so, do not report the error (but still return
> a value that the caller can check if there were no errors, errors
> or if all errors were ignored.
> @ sql/rpl_utility.h
> Added enumeration as the return value for compatible_with. It can
> now return three different values:
> 1. COMPATIBLE (no errors)
> 2. INCOMPATIBLE (at least one incompatibility was found and not
> ignored)
> 3. INCOMPATIBLE_IGNORED (all incompatibility errors found were
> ignored, thus not reported)
>
> added:
> mysql-test/suite/rpl/r/rpl_skip_error_all.result
> mysql-test/suite/rpl/t/rpl_skip_error_all-slave.opt
> mysql-test/suite/rpl/t/rpl_skip_error_all.test
> modified:
> sql/log_event.cc
> sql/rpl_rli.h
> sql/rpl_utility.cc
> sql/rpl_utility.h
> === added file 'mysql-test/suite/rpl/r/rpl_skip_error_all.result'
> --- a/mysql-test/suite/rpl/r/rpl_skip_error_all.result 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_skip_error_all.result 2011-06-02 10:54:21 +0000
> @@ -0,0 +1,15 @@
> +include/master-slave.inc
> +[connection master]
> +include/rpl_reset.inc
> +SET SQL_LOG_BIN=0;
> +CREATE TABLE t1 (a CHAR, b CHAR);
> +SET SQL_LOG_BIN=1;
> +CREATE TABLE t1 (a DATE, b DATE);
> +INSERT INTO t1 VALUES ('a', 'a'), ('b', 'b');
> +DROP TABLE t1;
> +include/rpl_reset.inc
> +CREATE TABLE t1 (id INT);
> +DROP TABLE t1;
> +INSERT INTO t1 VALUES(1);
> +DROP TABLE t1;
> +include/rpl_end.inc
>
> === added file 'mysql-test/suite/rpl/t/rpl_skip_error_all-slave.opt'
> --- a/mysql-test/suite/rpl/t/rpl_skip_error_all-slave.opt 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_skip_error_all-slave.opt 2011-06-02 10:54:21 +0000
> @@ -0,0 +1 @@
> +--slave-skip-errors=all
>
> === added file 'mysql-test/suite/rpl/t/rpl_skip_error_all.test'
> --- a/mysql-test/suite/rpl/t/rpl_skip_error_all.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_skip_error_all.test 2011-06-02 10:54:21 +0000
> @@ -0,0 +1,44 @@
> +--source include/master-slave.inc
> +
> +#
> +# BUG#12428494: IN ROW-BASED REPLICATION SLAVE-SKIP-ERRORS=ALL DOES NOT SKIP ALL
> ERRORS
> +#
> +
> +# Test 1
> +#
> +# Test that different table definitions error
> +# will be skipped when slave-skip-errors=all
> +--source include/rpl_reset.inc
> +
> +SET SQL_LOG_BIN=0;
> +CREATE TABLE t1 (a CHAR, b CHAR);
> +SET SQL_LOG_BIN=1;
> +
> +--connection slave
> +CREATE TABLE t1 (a DATE, b DATE);
> +
> +--connection master
> +INSERT INTO t1 VALUES ('a', 'a'), ('b', 'b');
> +--sync_slave_with_master
> +--connection master
> +DROP TABLE t1;
> +--sync_slave_with_master
> +
> +# Test 2
> +#
> +# Test that when there is no table at the slave and if
> +# slave-skil-errors=all the slave will skip it
> +--source include/rpl_reset.inc
> +
> +--connection master
> +
> +CREATE TABLE t1 (id INT);
> +--sync_slave_with_master
> +DROP TABLE t1;
> +
> +--connection master
> +INSERT INTO t1 VALUES(1);
> +DROP TABLE t1;
> +--sync_slave_with_master
> +
> +--source include/rpl_end.inc
>
> === modified file 'sql/log_event.cc'
> --- a/sql/log_event.cc 2011-03-24 10:52:40 +0000
> +++ b/sql/log_event.cc 2011-06-02 10:54:21 +0000
> @@ -7362,6 +7362,7 @@ int Rows_log_event::do_apply_event(Relay
> {
> DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)");
> int error= 0;
> + TABLE *table= NULL;
> /*
> If m_table_id == ~0UL, then we have a dummy event that does not
> contain any data. In that case, we just remove all tables in the
> @@ -7446,13 +7447,18 @@ int Rows_log_event::do_apply_event(Relay
> if (simple_open_n_lock_tables(thd, rli->tables_to_lock))
> {
> uint actual_error= thd->main_da.sql_errno();
> - if (thd->is_slave_error || thd->is_fatal_error)
> + if (ignored_error_code(actual_error))
> + {
> + clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
> + error= 0;
> + actual_error= 0;
> + goto end;
> + }
> + else if (thd->is_slave_error || thd->is_fatal_error)
> {
> /*
> Error reporting borrowed from Query_log_event with many excessive
> simplifications.
> - We should not honour --slave-skip-errors at this point as we are
> - having severe errors which should not be skiped.
> */
> rli->report(ERROR_LEVEL, actual_error,
> "Error '%s' on opening tables",
> @@ -7476,16 +7482,26 @@ int Rows_log_event::do_apply_event(Relay
> RPL_TABLE_LIST *ptr= rli->tables_to_lock;
> for ( ; ptr ; ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_global))
> {
> - if (ptr->m_tabledef.compatible_with(rli, ptr->table))
> + table_def::enum_retval_compatible_with compatible;
> + if ((compatible= ptr->m_tabledef.compatible_with(rli, ptr->table)) !=
> + table_def::COMPATIBLE)
This is cosmetic, but I would suggest moving the initialization to the
declaration (create and initialize at the same time, put only the
condition in the if).
> {
> - /*
> - We should not honour --slave-skip-errors at this point as we are
> - having severe errors which should not be skiped.
> - */
> mysql_unlock_tables(thd, thd->lock);
> thd->lock= 0;
> thd->is_slave_error= 1;
> const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
> +
> + /*
> + skip the error if there were errors that were skipped
> + while checking compatibility.
> + */
> + if (compatible == table_def::INCOMPATIBLE_IGNORED)
> + {
> + clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
> + error= 0;
> + goto end;
> + }
> +
> DBUG_RETURN(ERR_BAD_TABLE_DEF);
> }
> }
> @@ -7514,9 +7530,7 @@ int Rows_log_event::do_apply_event(Relay
> #endif
> }
>
> - TABLE*
> - table=
> - m_table=
> const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
> + table= m_table=
> const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
>
> DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table,
> m_table_id));
>
> @@ -7666,6 +7680,8 @@ int Rows_log_event::do_apply_event(Relay
> }
> } // if (table)
>
> +end:
> +
> /*
> We need to delay this clear until here bacause unpack_current_row() uses
> master-side table definitions stored in rli.
>
> === modified file 'sql/rpl_rli.h'
> --- a/sql/rpl_rli.h 2010-09-24 09:44:53 +0000
> +++ b/sql/rpl_rli.h 2011-06-02 10:54:21 +0000
> @@ -431,5 +431,7 @@ private:
> // Defined in rpl_rli.cc
> int init_relay_log_info(Relay_log_info* rli, const char* info_fname);
>
> +// defined in log_event.cc
> +int ignored_error_code(int error);
Please move the declaration to log_event.h if it's defined in log_event.cc
>
> #endif /* RPL_RLI_H */
>
> === modified file 'sql/rpl_utility.cc'
> --- a/sql/rpl_utility.cc 2010-03-17 14:28:49 +0000
> +++ b/sql/rpl_utility.cc 2011-06-02 10:54:21 +0000
> @@ -173,7 +173,7 @@ uint32 table_def::calc_field_size(uint c
> Is the definition compatible with a table?
>
> */
> -int
> +table_def::enum_retval_compatible_with
> table_def::compatible_with(Relay_log_info const *rli_arg, TABLE *table)
> const
> {
> @@ -181,7 +181,7 @@ table_def::compatible_with(Relay_log_inf
> We only check the initial columns for the tables.
> */
> uint const cols_to_check= min(table->s->fields, size());
> - int error= 0;
> + enum_retval_compatible_with retval= COMPATIBLE;
> Relay_log_info const *rli= const_cast<Relay_log_info*>(rli_arg);
>
> TABLE_SHARE const *const tsh= table->s;
> @@ -193,34 +193,62 @@ table_def::compatible_with(Relay_log_inf
> {
> DBUG_ASSERT(col< size()&& col< tsh->fields);
> DBUG_ASSERT(tsh->db.str&& tsh->table_name.str);
> - error= 1;
> - char buf[256];
> - my_snprintf(buf, sizeof(buf), "Column %d type mismatch - "
> - "received type %d, %s.%s has type %d",
> - col, type(col), tsh->db.str, tsh->table_name.str,
> - field->type());
> - rli->report(ERROR_LEVEL, ER_BINLOG_ROW_WRONG_TABLE_DEF,
> - ER(ER_BINLOG_ROW_WRONG_TABLE_DEF), buf);
> +
> + /*
> + Avoid reporting the error (and dumping the error message
> + into the server error log) if the error is to be ignored.
> + */
> + if (ignored_error_code(ER_BINLOG_ROW_WRONG_TABLE_DEF))
> + {
> + if (retval == COMPATIBLE)
> + retval= INCOMPATIBLE_IGNORED;
> + }
> + else
> + {
> + retval= INCOMPATIBLE;
> +
> + char buf[256];
> + my_snprintf(buf, sizeof(buf), "Column %d type mismatch - "
> + "received type %d, %s.%s has type %d",
> + col, type(col), tsh->db.str, tsh->table_name.str,
> + field->type());
> + rli->report(ERROR_LEVEL, ER_BINLOG_ROW_WRONG_TABLE_DEF,
> + ER(ER_BINLOG_ROW_WRONG_TABLE_DEF), buf);
> + }
> }
> /*
> Check the slave's field size against that of the master.
> */
> - if (!error&&
> - !field->compatible_field_size(field_metadata(col), rli_arg, m_flags))
> + if ((retval == COMPATIBLE)&&
> + !field->compatible_field_size(field_metadata(col), rli_arg, m_flags))
The last line above is indented one space too much.
> {
> - error= 1;
> - char buf[256];
> - my_snprintf(buf, sizeof(buf), "Column %d size mismatch - "
> - "master has size %d, %s.%s on slave has size %d."
> - " Master's column size should be<= the slave's "
> - "column size.", col,
> - field->pack_length_from_metadata(m_field_metadata[col]),
> - tsh->db.str, tsh->table_name.str,
> - field->row_pack_length());
> - rli->report(ERROR_LEVEL, ER_BINLOG_ROW_WRONG_TABLE_DEF,
> - ER(ER_BINLOG_ROW_WRONG_TABLE_DEF), buf);
> +
> + /*
> + Avoid reporting the error (and dumping the error message
> + into the server error log) if the error is to be ignored.
> + */
> + if (ignored_error_code(ER_BINLOG_ROW_WRONG_TABLE_DEF))
> + {
> + if (retval == COMPATIBLE)
> + retval= INCOMPATIBLE_IGNORED;
> + }
> + else
> + {
> + retval= INCOMPATIBLE;
> +
> + char buf[256];
> + my_snprintf(buf, sizeof(buf), "Column %d size mismatch - "
> + "master has size %d, %s.%s on slave has size %d."
> + " Master's column size should be<= the slave's "
> + "column size.", col,
> + field->pack_length_from_metadata(m_field_metadata[col]),
> + tsh->db.str, tsh->table_name.str,
> + field->row_pack_length());
> + rli->report(ERROR_LEVEL, ER_BINLOG_ROW_WRONG_TABLE_DEF,
> + ER(ER_BINLOG_ROW_WRONG_TABLE_DEF), buf);
> + }
> }
> }
>
> - return error;
> + return retval;
> }
>
> === modified file 'sql/rpl_utility.h'
> --- a/sql/rpl_utility.h 2010-03-17 18:15:41 +0000
> +++ b/sql/rpl_utility.h 2011-06-02 10:54:21 +0000
> @@ -228,11 +228,24 @@ public:
> @param rli Pointer to relay log info
> @param table Pointer to table to compare with.
>
> - @retval 1 if the table definition is not compatible with @c table
> - @retval 0 if the table definition is compatible with @c table
> + @retval INCOMPATIBLE
> + if the table definition is not compatible with @c table
> + @retval INCOMPATIBLE_IGNORED
> + if the table definition is not compatible with @c table but error
> + was ignored
> + @retval COMPATIBLE
> + if the table definition is compatible with @c table
> */
> #ifndef MYSQL_CLIENT
> - int compatible_with(Relay_log_info const *rli, TABLE *table) const;
> + enum enum_retval_compatible_with {
> + /* tables are compatible */
> + COMPATIBLE= 0,
> + /* an incompatibility error was found and reported */
> + INCOMPATIBLE= 1,
> + /* an incompatibility error was found but ignored as per slave_skip_error option
> */
> + INCOMPATIBLE_IGNORED= 2
Numerical values don't matter for this enumeration type: I would suggest
to remove the initializers.
> + };
> + enum_retval_compatible_with compatible_with(Relay_log_info const *rli, TABLE
> *table) const;
> #endif
>
> private:
>
>
>
>
>