Thanks for the patch, Chuck.
Maybe not the right time to raise this question, but why do we need a
separate timeout for the ddl blocker? Why not use the existing
table_lock_wait_timeout for this? Seems a bit cumbersome to have to
set several different timeout variables in order to control when a
query times out.
I have not been able to understand the mechanism used for variables,
but since you are following the same pattern as for existing
variables, I guess it is OK. However, it puzzles me a bit that I am
not able to find any code that instantiate the new
sys_var_backup_wait_timeout class nor implements the declared
sys_backup_wait_timeout function. In addition, the following note in
set_var.cc concerns me:
@note
Be careful with var->save_result: sys_var::check() only updates
ulonglong_value; so other members of the union are garbage then; to use
them you must first assign a value to them (in specific ::check() for
example).
I see that sys_var_backup_wait_timeout::update accesses
save_result.ulong_value. Is the above note relevant in that context?
Another concern is that the ddl_blocker seems very brittle with
respect to errors. It seems to me that in case of errors in DDL
operations, it may happen that only one of the start_DDL()/end_DDL()
functions are called. This is a more general problem than related to
this patch, but I think this patch adds new risks in this area (see
comments below).
More comments in-line.
Chuck Bell wrote:
...
> === added file 'mysql-test/t/backup_timeout.test'
> --- a/mysql-test/t/backup_timeout.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/backup_timeout.test 2008-08-08 14:07:00 +0000
> @@ -0,0 +1,210 @@
> +#
> +# This test is for the DDL blocker timeout feature.
> +# TODO : Add a native driver to the test when one becomes available
> +#
> +# Notes
> +# You must use a dedicated connection for getting and releasing locks.
> +# Do not issue a get_lock() or release_lock() in the same connection
> +# (thread) as code that calls BACKUP_BREAKPOINT(). Using the same connection
> +# to get/release locks and run code that issues BACKUP_BREAKPOINTs will result
> +# in an assertion using DEBUG_ASSERT(thd->ull == NULL) from debug_sync_point()
> +# in item_func.cc.
> +#
I think it would be better if the above note was written with the
context of the operations performed in this test script. People
reading and writing test scripts do not necessarily know how
get_lock(), release_lock(), BACKUP_BREAKPOINT(), and
debug_sync_point() maps to operations in this script.
> +
> +--source include/have_innodb.inc
> +--source include/have_debug.inc
> +--source include/not_embedded.inc
Don't you neeed have_debug_sync.inc too?
> +
> +SET DEBUG_SYNC= 'reset';
> +
> +#
> +# Remove backup files (if they exist)
> +#
> +
> +--error 0,1
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker_orig.bak;
Why delete this file? It is not used in this test.
> +--error 0,1
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker.bak;
This file is deleted before each backup. Why delete it here too?
> +
> +#
> +# Connections used in this test
> +#
> +# con1 used to create data, load data, and run the backup
> +# con2-con5 used for DDL statements: 2 before backup and 2 during backup
> +# con6 used for setting and releasing breakpoints
> +#
It is con5 that is used for setting and releasing breakpoints in this
test.
> +
> +connect (con1,localhost,root,,);
> +connect (con2,localhost,root,,);
> +connect (con3,localhost,root,,);
> +connect (con4,localhost,root,,);
> +connect (con5,localhost,root,,);
> +connect (con6,localhost,root,,);
> +
> +connection con1;
> +
> +# Create data for this test and tailor it to the test.
> +--source include/backup_ddl_create_data.inc
> +
> +--disable_warnings
> +DROP TABLE IF EXISTS bup_ddl_blocker.t1, bup_ddl_blocker.t2,
> + bup_ddl_blocker.t3, bup_ddl_blocker.t4;
> +--enable_warnings
What is the point in first running a script to create some tables and
then drop them right afterwards?
> +
> +# Create a table and load it with data.
> +--echo con1: Creating tables
> +CREATE TABLE bup_ddl_blocker.t1 (col_a CHAR(40)) ENGINE=INNODB;
> +
> +--echo con1: Loading data
> +INSERT INTO bup_ddl_blocker.t1 VALUES ("01 Some data to test");
> +INSERT INTO bup_ddl_blocker.t1 VALUES ("02 Some data to test");
> +INSERT INTO bup_ddl_blocker.t1 VALUES ("03 Some data to test");
> +
> +#
> +# Part A - test timeout for one session
> +#
> +--echo Part A
> +
> +--error 0,1
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker.bak
> +
> +connection con1;
> +
> +--echo con1: Activate synchronization points for backup.
> +SET DEBUG_SYNC= 'after_block_ddl SIGNAL bup_blocked WAIT_FOR timeout_done';
> +
> +--echo con1: Get a backup going and stop after the DDL blocker is fired.
> +send BACKUP DATABASE bup_ddl_blocker TO "bup_ddl_blocker.bak";
> +
> +connection con2;
> +
> +SET DEBUG_SYNC= 'now WAIT_FOR bup_blocked';
> +
> +--echo Set ddl timeout to 1 second
> +SET backup_wait_timeout = 1;
> +SHOW VARIABLES LIKE 'backup_wait%';
> +
> +--echo con2: Try a ddl operation and watch it expire
> +--error ER_DDL_TIMEOUT
> +CREATE TABLE bup_ddl_blocker.t2 (col_a CHAR(40)) ENGINE=MEMORY;
Would it be possible to check that the timing is right? That is, that
it times out after approx. 1 sec. and not 10?
> +
> +connection con5;
> +--echo release the lock.
> +--echo con5: Resume all.
> +SET DEBUG_SYNC= 'now SIGNAL timeout_done';
> +
> +# Reconnect to con1 and let backup finish.
> +
> +connection con1;
> +--replace_column 1 #
> +reap;
> +
> +#
> +# Part B - test timeout for a session with a timeout,
> +# and a session with no timeout (backup_wait_timeout = 0)
> +#
> +--echo Part B
> +
> +--error 0,1
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker.bak
> +
> +connection con1;
> +
> +--echo con1: Activate synchronization points for backup.
> +SET DEBUG_SYNC= 'after_block_ddl SIGNAL bup_blocked WAIT_FOR timeout_done';
> +
> +--echo con1: Get a backup going and stop after the DDL blocker is fired.
> +send BACKUP DATABASE bup_ddl_blocker TO "bup_ddl_blocker.bak";
> +
> +connection con2;
> +
> +SET DEBUG_SYNC= 'now WAIT_FOR bup_blocked';
> +
> +--echo Set ddl timeout to 0 seconds
> +SET backup_wait_timeout = 0;
> +SHOW VARIABLES LIKE 'backup_wait%';
> +
> +--echo con2: Try a ddl operation and it should expire
> +--error ER_DDL_TIMEOUT
> +CREATE TABLE bup_ddl_blocker.t3 (col_a CHAR(40)) ENGINE=MEMORY;
> +
> +connection con3;
> +
> +SET backup_wait_timeout = 100;
> +SHOW VARIABLES LIKE 'backup_wait%';
> +
> +--echo con3: Try a ddl operation and it should not expire
> +send CREATE TABLE bup_ddl_blocker.t3 (col_a CHAR(40)) ENGINE=MEMORY;
> +
> +connection con5;
> +--echo release the lock.
> +--echo con5: Resume all.
> +SET DEBUG_SYNC= 'now SIGNAL timeout_done';
> +
> +# Reconnect to con1 and let backup finish.
> +
> +connection con1;
> +--replace_column 1 #
> +reap;
> +
> +connection con3;
> +reap;
> +
> +USE bup_ddl_blocker;
> +SHOW FULL TABLES;
> +
> +#
> +# Part C - test default behavior: timeout if backup_wait_timeout = 0
What do you mean by default behavior? Default for backup_wait_timeout
is 50.
> +#
> +--echo Part C
> +
> +--error 0,1
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker.bak
> +
> +connection con1;
> +
> +# Set the breakpoint for DDL in progress.
> +--echo con5: Activate synchronization points for backup.
> +SET DEBUG_SYNC= 'after_block_ddl SIGNAL bup_blocked WAIT_FOR timeout_done';
> +
> +--echo con1: Get a backup going and stop after the DDL blocker is fired.
> +send BACKUP DATABASE bup_ddl_blocker TO "bup_ddl_blocker.bak";
> +
> +connection con2;
> +
> +SET DEBUG_SYNC= 'now WAIT_FOR bup_blocked';
> +
> +--echo Set ddl timeout to 0 seconds
> +SET backup_wait_timeout = 0;
> +SHOW VARIABLES LIKE 'backup_wait%';
> +
> +--echo con2: Try a ddl operation and it should expire
> +--error ER_DDL_TIMEOUT
> +CREATE TABLE bup_ddl_blocker.t4 (col_a CHAR(40)) ENGINE=MEMORY;
> +
> +connection con3;
> +
> +SET backup_wait_timeout = 0;
> +SHOW VARIABLES LIKE 'backup_wait%';
> +
> +--echo con3: Try a ddl operation and it should expire
> +--error ER_DDL_TIMEOUT
> +CREATE TABLE bup_ddl_blocker.t4 (col_a CHAR(40)) ENGINE=MEMORY;
> +
> +connection con5;
> +--echo release the lock.
> +--echo con5: Resume all.
> +SET DEBUG_SYNC= 'now SIGNAL timeout_done';
> +
> +# Reconnect to con1 and let backup finish.
> +
> +connection con1;
> +--replace_column 1 #
> +reap;
> +
> +USE bup_ddl_blocker;
> +SHOW FULL TABLES;
> +
> +DROP DATABASE bup_ddl_blocker;
> +
> +--remove_file $MYSQLTEST_VARDIR/master-data/bup_ddl_blocker.bak
>
> === modified file 'sql/ddl_blocker.cc'
> --- a/sql/ddl_blocker.cc 2008-06-25 13:39:04 +0000
> +++ b/sql/ddl_blocker.cc 2008-08-08 14:07:00 +0000
> @@ -101,29 +101,49 @@ void DDL_blocker_class::end_DDL()
> /**
> check_DDL_blocker
>
> - Check to see if we are blocked from continuing. If so,
> - wait until the blocked process signals the condition.
> + Check to see if we are blocked from continuing. If so, wait until block is
> + removed or the timeout specified by backup_wait_timeout variable occurs.
> +
> + @param[in] thd The THD object from the caller.
>
> - @param thd The THD object from the caller.
> - @returns TRUE
> + @note: This operation blocks DDL operations until end_ddl() is called.
> +
> + @returns TRUE if not blocked
> + @returns FALSE if ddl is blocked and timeout occurs
> */
> my_bool DDL_blocker_class::check_DDL_blocker(THD *thd)
> {
> + int ret = 0;
> + struct timespec ddl_timeout;
> DBUG_ENTER("check_DDL_blocker()");
> DEBUG_SYNC(thd, "before_check_ddl_blocked");
>
> + set_timespec(ddl_timeout, thd->backup_wait_timeout);
> +
> /*
> Check the ddl blocker condition. Rest until ddl blocker is released.
> */
> pthread_mutex_lock(&THR_LOCK_DDL_is_blocked);
> thd->enter_cond(&COND_DDL_blocker, &THR_LOCK_DDL_is_blocked,
> "DDL blocker: DDL is blocked");
> - while (DDL_blocked && !thd->DDL_exception)
> - pthread_cond_wait(&COND_DDL_blocker, &THR_LOCK_DDL_is_blocked);
> - start_DDL();
> - thd->exit_cond("DDL blocker: Ok to run DDL");
> - DEBUG_SYNC(thd, "after_start_ddl");
> - DBUG_RETURN(TRUE);
> + while (DDL_blocked && !thd->DDL_exception && (ret == 0))
> + {
> + if (thd->backup_wait_timeout == 0)
> + ret = -1;
> + else
> + ret= pthread_cond_timedwait(&COND_DDL_blocker,
> &THR_LOCK_DDL_is_blocked,
> + &ddl_timeout);
> + }
> + thd->exit_cond("DDL blocker: DDL is not blocked");
> + if (ret == 0)
> + {
> + start_DDL();
> + DEBUG_SYNC(thd, "after_start_ddl");
> + }
> + else
> + my_error(ER_DDL_TIMEOUT, MYF(0), thd->query);
> +
This assumes that all errors in pthread_cond_timedwait is timeouts.
At least in theory, pthread_cond_timedwait may return other errors
than ETIMEDOUT, but I guess it is not a big problem that any such
errors are reported as a timeout.
> + DBUG_RETURN(ret == 0);
> }
>
> /**
>
> === modified file 'sql/set_var.cc'
> --- a/sql/set_var.cc 2008-07-09 07:12:43 +0000
> +++ b/sql/set_var.cc 2008-08-08 14:07:00 +0000
> @@ -227,6 +227,7 @@ static sys_var_long_ptr sys_concurrent_i
> static sys_var_long_ptr sys_connect_timeout(&vars, "connect_timeout",
> &connect_timeout);
> static sys_var_const_str sys_datadir(&vars, "datadir",
> mysql_real_data_home);
> +static sys_var_backup_wait_timeout sys_backup_wait_timeout(&vars,
> "backup_wait_timeout");
> #ifndef DBUG_OFF
> static sys_var_thd_dbug sys_dbug(&vars, "debug");
> #endif
> @@ -2688,6 +2689,24 @@ uchar *sys_var_last_insert_id::value_ptr
> bool sys_var_insert_id::update(THD *thd, set_var *var)
> {
> thd->force_one_auto_inc_interval(var->save_result.ulonglong_value);
> + return 0;
> +}
> +
> +
> +uchar *sys_var_backup_wait_timeout::value_ptr(THD *thd, enum_var_type type,
> + LEX_STRING *base)
> +{
> + thd->sys_var_tmp.ulong_value= thd->backup_wait_timeout;
> + return (uchar*) &thd->sys_var_tmp.ulonglong_value;
> +}
> +
> +
> +bool sys_var_backup_wait_timeout::update(THD *thd, set_var *var)
> +{
> + if (var->save_result.ulong_value > (LONG_MAX/1000))
> + thd->backup_wait_timeout= LONG_MAX/1000;
> + else
> + thd->backup_wait_timeout= var->save_result.ulong_value;
> return 0;
> }
>
>
> === modified file 'sql/set_var.h'
> --- a/sql/set_var.h 2008-07-09 07:12:43 +0000
> +++ b/sql/set_var.h 2008-08-08 14:07:00 +0000
> @@ -655,6 +655,19 @@ public:
> };
>
>
> +class sys_var_backup_wait_timeout :public sys_var
> +{
> +public:
> + sys_var_backup_wait_timeout(sys_var_chain *chain, const char *name_arg)
> + :sys_var(name_arg)
> + { chain_sys_var(chain); }
> + bool update(THD *thd, set_var *var);
> + bool check_type(enum_var_type type) { return type == OPT_GLOBAL; }
> + SHOW_TYPE show_type() { return SHOW_LONG; }
> + uchar *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
> +};
> +
> +
> class sys_var_insert_id :public sys_var
> {
> public:
>
> === modified file 'sql/share/errmsg.txt'
> --- a/sql/share/errmsg.txt 2008-07-09 07:12:43 +0000
> +++ b/sql/share/errmsg.txt 2008-08-08 14:07:00 +0000
> @@ -6372,3 +6372,7 @@ ER_BACKUP_OBTAIN_NAME_LOCK_FAILED
> eng "Restore failed to obtain the name locks on the tables."
> ER_BACKUP_RELEASE_NAME_LOCK_FAILED
> eng "Restore failed to release the name locks on the tables."
> +
> +ER_DDL_TIMEOUT
> + eng "The backup wait timeout has expired for query '%-64s'."
> +
>
> === modified file 'sql/sql_class.cc'
> --- a/sql/sql_class.cc 2008-07-14 22:06:19 +0000
> +++ b/sql/sql_class.cc 2008-08-08 14:07:00 +0000
> @@ -536,6 +536,7 @@ THD::THD()
> when the DDL blocker is engaged.
> */
> DDL_exception(FALSE),
> + backup_wait_timeout(50),
> #if defined(ENABLED_DEBUG_SYNC)
> debug_sync_control(0),
> #endif /* defined(ENABLED_DEBUG_SYNC) */
>
> === modified file 'sql/sql_class.h'
> --- a/sql/sql_class.h 2008-07-14 22:06:19 +0000
> +++ b/sql/sql_class.h 2008-08-08 14:07:00 +0000
> @@ -1933,6 +1933,7 @@ public:
> when the DDL blocker is engaged.
> */
> my_bool DDL_exception; // Allow some DDL if there is an exception
> + ulong backup_wait_timeout;
>
> Locked_tables_list locked_tables_list;
>
>
> === modified file 'sql/sql_parse.cc'
> --- a/sql/sql_parse.cc 2008-07-14 14:56:49 +0000
> +++ b/sql/sql_parse.cc 2008-08-08 14:07:00 +0000
> @@ -2326,7 +2326,11 @@ mysql_execute_command(THD *thd)
> TABLE in the same way. That way we avoid that a new table is
> created during a gobal read lock.
> */
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + {
> + res= 1;
> + goto end_with_restore_list;
> + }
The got leads to code where end_DDL is called. However, if DDL op
timed out, start_DDL has not been called. This can lead to accounting
problems wrt DDL_blocks counter.
> if (!thd->locked_tables_mode &&
> !(need_start_waiting= !wait_if_global_read_lock(thd, 0, 1)))
> {
> @@ -2467,7 +2471,8 @@ end_with_restore_list:
> table without having to do a full rebuild.
> */
> {
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
This error handling is OK for the call to check_DDL_blocker, but if
later errors causes goto error, end_DDL will not be called even if
check_DDL_blocker called start_DDL. (This is not introduced by this
patch. Maybe another bug report should be created for this problem.)
> /* Prepare stack copies to be re-execution safe */
> HA_CREATE_INFO create_info;
> Alter_info alter_info(lex->alter_info, thd->mem_root);
> @@ -2537,7 +2542,8 @@ end_with_restore_list:
>
> case SQLCOM_ALTER_TABLE:
> {
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> ulong priv=0;
> ulong priv_needed= ALTER_ACL;
>
> @@ -2652,7 +2658,8 @@ end_with_restore_list:
> goto error;
> }
>
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> if (end_active_trans(thd) || mysql_rename_tables(thd, first_table, 0))
> {
> DDL_blocker->end_DDL();
> @@ -2752,7 +2759,8 @@ end_with_restore_list:
> if (check_table_access(thd, SELECT_ACL | INSERT_ACL, all_tables, FALSE, FALSE,
> UINT_MAX))
> goto error; /* purecov: inspected */
> thd->enable_slow_log= opt_log_slow_admin_statements;
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_repair_table(thd, first_table, &lex->check_opt);
> DDL_blocker->end_DDL();
> /* ! we write after unlocking the table */
> @@ -2804,7 +2812,8 @@ end_with_restore_list:
> if (check_table_access(thd, SELECT_ACL | INSERT_ACL, all_tables, FALSE, FALSE,
> UINT_MAX))
> goto error; /* purecov: inspected */
> thd->enable_slow_log= opt_log_slow_admin_statements;
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= (specialflag & (SPECIAL_SAFE_MODE | SPECIAL_NO_NEW_FUNC)) ?
> mysql_recreate_table(thd, first_table) :
> mysql_optimize_table(thd, first_table, &lex->check_opt);
> @@ -3050,7 +3059,8 @@ end_with_restore_list:
> goto error;
> }
>
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_truncate(thd, first_table, 0);
> DDL_blocker->end_DDL();
>
> @@ -3155,7 +3165,8 @@ end_with_restore_list:
> /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */
> thd->options|= OPTION_KEEP_LOG;
> }
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> /* DDL and binlog write order protected by LOCK_open */
> res= mysql_rm_table(thd, first_table, lex->drop_if_exists,
> lex->drop_temporary);
> @@ -3409,7 +3420,8 @@ end_with_restore_list:
> if (check_access(thd,CREATE_ACL,lex->name.str, 0, 1, 0,
> is_schema_db(lex->name.str)))
> break;
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_create_db(thd,(lower_case_table_names == 2 ? alias :
> lex->name.str), &create_info, 0);
> DDL_blocker->end_DDL();
> @@ -3452,7 +3464,8 @@ end_with_restore_list:
> ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
> goto error;
> }
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_rm_db(thd, lex->name.str, lex->drop_if_exists, 0);
> DDL_blocker->end_DDL();
> break;
> @@ -3495,7 +3508,8 @@ end_with_restore_list:
> goto error;
> }
>
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_upgrade_db(thd, db);
> DDL_blocker->end_DDL();
> if (!res)
> @@ -3535,7 +3549,8 @@ end_with_restore_list:
> ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
> goto error;
> }
> - DDL_blocker->check_DDL_blocker(thd);
> + if (!DDL_blocker->check_DDL_blocker(thd))
> + goto error;
> res= mysql_alter_db(thd, db->str, &create_info);
> DDL_blocker->end_DDL();
> break;
>
>
--
Øystein