Below is the list of changes that have just been committed into a local
6.0 repository of tsmith. When tsmith does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-05-15 22:56:38+02:00, tsmith@stripped +27 -0
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files.
These changes happened in the mysql-6.0-build tree at these changesets:
ChangeSet@stripped, 2008-03-31 11:37:03+02:00, df@stripped +30 -0
ChangeSet@stripped, 2008-03-31 10:10:48+02:00, df@stripped +6 -0
ChangeSet@stripped, 2008-03-31 10:06:25+02:00, df@stripped +8 -0
Also, fix rpl_row_mysqlbinlog.test: update include filenames to have rpl_ prefix.
mysql-test/suite/binlog/r/binlog_innodb.result@stripped, 2008-05-15 22:39:30+02:00, tsmith@stripped +1 -0
Undo previous result set change - it was made to accommodate some weird 5.1 changes
that found their way into 6.0. Those changes are now backed out.
mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test@stripped, 2008-05-15 22:51:43+02:00, tsmith@stripped +6 -6
Update include filenames to have rpl_ prefix
storage/innobase/btr/btr0cur.c@stripped, 2008-05-15 22:39:30+02:00, tsmith@stripped +1 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/dict/dict0dict.c@stripped, 2008-05-15 22:39:30+02:00, tsmith@stripped +3 -3
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/handler/ha_innodb.cc@stripped, 2008-05-15 22:39:31+02:00, tsmith@stripped +224 -152
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/handler/ha_innodb.h@stripped, 2008-05-15 22:39:31+02:00, tsmith@stripped +38 -2
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/dict0dict.h@stripped, 2008-05-15 22:39:32+02:00, tsmith@stripped +3 -3
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/dict0mem.h@stripped, 2008-05-15 22:39:32+02:00, tsmith@stripped +1 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/os0sync.h@stripped, 2008-05-15 22:39:32+02:00, tsmith@stripped +6 -32
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/read0read.h@stripped, 2008-05-15 22:39:33+02:00, tsmith@stripped +4 -0
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/row0mysql.h@stripped, 2008-05-15 22:39:33+02:00, tsmith@stripped +10 -11
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/row0sel.h@stripped, 2008-05-15 22:39:33+02:00, tsmith@stripped +1 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/sync0arr.h@stripped, 2008-05-15 22:39:34+02:00, tsmith@stripped +13 -8
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/sync0rw.h@stripped, 2008-05-15 22:39:34+02:00, tsmith@stripped +0 -12
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/sync0rw.ic@stripped, 2008-05-15 22:39:34+02:00, tsmith@stripped +2 -10
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/sync0sync.h@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +0 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/sync0sync.ic@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +1 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/include/univ.i@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +0 -3
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/os/os0sync.c@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +11 -100
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/read/read0read.c@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +5 -0
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/row/row0mysql.c@stripped, 2008-05-15 22:39:35+02:00, tsmith@stripped +16 -119
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/row/row0sel.c@stripped, 2008-05-15 22:39:36+02:00, tsmith@stripped +73 -20
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/srv/srv0srv.c@stripped, 2008-05-15 22:39:36+02:00, tsmith@stripped +8 -8
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/sync/sync0arr.c@stripped, 2008-05-15 22:39:37+02:00, tsmith@stripped +235 -157
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/sync/sync0rw.c@stripped, 2008-05-15 22:39:37+02:00, tsmith@stripped +1 -19
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/sync/sync0sync.c@stripped, 2008-05-15 22:39:37+02:00, tsmith@stripped +9 -42
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
storage/innobase/trx/trx0trx.c@stripped, 2008-05-15 22:39:38+02:00, tsmith@stripped +0 -1
Revert bad merge changes (weird file copy mess from 5.1 -> 6.0) in several InnoDB files
diff -Nrup a/mysql-test/suite/binlog/r/binlog_innodb.result b/mysql-test/suite/binlog/r/binlog_innodb.result
--- a/mysql-test/suite/binlog/r/binlog_innodb.result 2008-05-15 01:24:08 +02:00
+++ b/mysql-test/suite/binlog/r/binlog_innodb.result 2008-05-15 22:39:30 +02:00
@@ -110,6 +110,7 @@ master-bin.000001 # Table_map # # table_
master-bin.000001 # Update_rows # # table_id: #
master-bin.000001 # Update_rows # # table_id: #
master-bin.000001 # Update_rows # # table_id: #
+master-bin.000001 # Update_rows # # table_id: #
master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Query # # use `test`; BEGIN
diff -Nrup a/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test b/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test
--- a/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test 2008-04-01 17:13:54 +02:00
+++ b/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test 2008-05-15 22:51:43 +02:00
@@ -117,14 +117,14 @@ sync_slave_with_master;
--disable_warnings
stop slave;
--enable_warnings
---require r/slave-stopped.result
+--require r/rpl_slave-stopped.result
show status like 'Slave_running';
connection master;
reset master;
connection slave;
reset slave;
start slave;
---require r/slave-running.result
+--require r/rpl_slave-running.result
show status like 'Slave_running';
connection master;
@@ -206,14 +206,14 @@ sync_slave_with_master;
--disable_warnings
stop slave;
--enable_warnings
---require r/slave-stopped.result
+--require r/rpl_slave-stopped.result
show status like 'Slave_running';
connection master;
reset master;
connection slave;
reset slave;
start slave;
---require r/slave-running.result
+--require r/rpl_slave-running.result
show status like 'Slave_running';
connection master;
@@ -284,14 +284,14 @@ sync_slave_with_master;
--disable_warnings
stop slave;
--enable_warnings
---require r/slave-stopped.result
+--require r/rpl_slave-stopped.result
show status like 'Slave_running';
connection master;
reset master;
connection slave;
reset slave;
start slave;
---require r/slave-running.result
+--require r/rpl_slave-running.result
show status like 'Slave_running';
connection master;
diff -Nrup a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
--- a/storage/innobase/btr/btr0cur.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/btr/btr0cur.c 2008-05-15 22:39:30 +02:00
@@ -52,7 +52,7 @@ can be released by page reorganize, then
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
-/* When estimating number of different key values in an index, sample
+/* When estimating number of different kay values in an index sample
this many index pages */
#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
diff -Nrup a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
--- a/storage/innobase/dict/dict0dict.c 2008-04-03 15:46:21 +02:00
+++ b/storage/innobase/dict/dict0dict.c 2008-05-15 22:39:30 +02:00
@@ -429,7 +429,7 @@ void
dict_table_autoinc_initialize(
/*==========================*/
dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: next value to assign to a row */
+ ib_longlong value) /* in: next value to assign to a row */
{
ut_ad(mutex_own(&table->autoinc_mutex));
@@ -441,7 +441,7 @@ dict_table_autoinc_initialize(
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized. */
-ib_ulonglong
+ib_longlong
dict_table_autoinc_read(
/*====================*/
/* out: value for a new row, or 0 */
@@ -470,7 +470,7 @@ dict_table_autoinc_update(
/*======================*/
dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: value which was assigned to a row */
+ ib_longlong value) /* in: value which was assigned to a row */
{
if (table->autoinc_inited && value > table->autoinc) {
diff -Nrup a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
--- a/storage/innobase/handler/ha_innodb.cc 2008-05-14 15:41:17 +02:00
+++ b/storage/innobase/handler/ha_innodb.cc 2008-05-15 22:39:31 +02:00
@@ -62,6 +62,12 @@ static bool innodb_inited = 0;
*/
static handlerton *innodb_hton_ptr;
+C_MODE_START
+static my_bool index_cond_func_innodb(void *arg);
+C_MODE_END
+
+
+
#define INSIDE_HA_INNOBASE_CC
/* Include necessary InnoDB headers */
@@ -129,7 +135,7 @@ static my_bool innobase_locks_unsafe_for
static my_bool innobase_rollback_on_timeout = FALSE;
static my_bool innobase_create_status_file = FALSE;
static my_bool innobase_stats_on_metadata = TRUE;
-static my_bool innobase_adaptive_hash_index = TRUE;
+static my_bool innobase_use_adaptive_hash_indexes = TRUE;
static char* internal_innobase_data_file_path = NULL;
@@ -600,9 +606,7 @@ convert_error_code_to_mysql(
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
+ thd_mark_transaction_to_rollback(thd, TRUE);
return(HA_ERR_LOCK_DEADLOCK);
} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
@@ -611,10 +615,8 @@ convert_error_code_to_mysql(
latest SQL statement in a lock wait timeout. Previously, we
rolled back the whole transaction. */
- if (thd) {
- thd_mark_transaction_to_rollback(
- thd, (bool)row_rollback_on_timeout);
- }
+ thd_mark_transaction_to_rollback(thd,
+ (bool)row_rollback_on_timeout);
return(HA_ERR_LOCK_WAIT_TIMEOUT);
@@ -666,9 +668,7 @@ convert_error_code_to_mysql(
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
+ thd_mark_transaction_to_rollback(thd, TRUE);
return(HA_ERR_LOCK_TABLE_FULL);
} else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
@@ -969,10 +969,18 @@ ha_innobase::ha_innobase(handlerton *hto
HA_PRIMARY_KEY_IN_READ_INDEX |
HA_BINLOG_ROW_CAPABLE |
HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX),
+ HA_TABLE_SCAN_ON_INDEX | HA_NEED_READ_RANGE_BUFFER |
+ HA_MRR_CANT_SORT),
+ primary_key(0), /* needs initialization because index_flags() may be called
+ before this is set to the real value. It's ok to have any
+ value here because it doesn't matter if we return the
+ HA_DO_INDEX_COND_PUSHDOWN bit from those "early" calls */
start_of_scan(0),
num_write_row(0)
-{}
+{
+// ds_mrr.init(this, table, (DsMrr_impl::range_check_toggle_func_t)
+// &ha_innobase::toggle_range_check);
+}
/*************************************************************************
Updates the user_thd field in a handle and also allocates a new InnoDB
@@ -1627,7 +1635,7 @@ innobase_init(
srv_stats_on_metadata = (ibool) innobase_stats_on_metadata;
srv_use_adaptive_hash_indexes =
- (ibool) innobase_adaptive_hash_index;
+ (ibool) innobase_use_adaptive_hash_indexes;
srv_print_verbose_log = mysqld_embedded ? 0 : 1;
@@ -2275,8 +2283,6 @@ ha_innobase::open(
dict_table_t* ib_table;
char norm_name[1000];
THD* thd;
- ulint retries = 0;
- char* is_part = NULL;
DBUG_ENTER("ha_innobase::open");
@@ -2310,29 +2316,11 @@ ha_innobase::open(
DBUG_RETURN(1);
}
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. The retry logic for partitioned tables is a
- workaround for http://bugs.mysql.com/bug.php?id=33349. Look
- at support issue https://support.mysql.com/view.php?id=21080
- for more details. */
- is_part = strstr(norm_name, "#P#");
-retry:
/* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_get(norm_name, TRUE);
-
- if (NULL == ib_table) {
- if (is_part && retries < 10) {
- ++retries;
- os_thread_sleep(100000);
- goto retry;
- }
- if (is_part) {
- sql_print_error("Failed to open table %s after "
- "%lu attemtps.\n", norm_name,
- retries);
- }
+ ib_table = dict_table_get(norm_name, TRUE);
+ if (NULL == ib_table) {
sql_print_error("Cannot find or open table %s from\n"
"the internal data dictionary of InnoDB "
"though the .frm file for the\n"
@@ -2375,6 +2363,7 @@ retry:
prebuilt = row_create_prebuilt(ib_table);
prebuilt->mysql_row_len = table->s->reclength;
+ prebuilt->idx_cond_func= NULL;
/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
@@ -3048,6 +3037,7 @@ build_template(
only if templ_type is
ROW_MYSQL_REC_FIELDS */
TABLE* table, /* in: MySQL table */
+ ha_innobase* file, /* in: ha_innobase handler */
uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
ROW_MYSQL_REC_FIELDS */
{
@@ -3062,7 +3052,9 @@ build_template(
ulint i;
/* byte offset of the end of last requested column */
ulint mysql_prefix_len = 0;
-
+ ibool do_idx_cond_push= FALSE;
+ ibool need_second_pass= FALSE;
+
if (prebuilt->select_lock_type == LOCK_X) {
/* We always retrieve the whole clustered index record if we
use exclusive row level locks, for example, if the read is
@@ -3132,8 +3124,23 @@ build_template(
prebuilt->templ_contains_blob = FALSE;
- /* Note that in InnoDB, i is the column number. MySQL calls columns
- 'fields'. */
+
+ /*
+ Setup index condition pushdown (note: we don't need to check if
+ this is a scan on primary key as that is checked in idx_cond_push)
+ */
+ if (file->active_index == file->pushed_idx_cond_keyno &&
+ file->active_index != MAX_KEY)
+ do_idx_cond_push= need_second_pass= TRUE;
+
+ /*
+ Ok, now build an array of mysql_row_templ_struct structures.
+ If index condition pushdown is used, the array is split into two
+ parts: first go index fields, then go table fields.
+
+ Note that in InnoDB, i is the column number. MySQL calls columns
+ 'fields'.
+ */
for (i = 0; i < n_fields; i++) {
templ = prebuilt->mysql_template + n_requested_fields;
field = table->field[i];
@@ -3143,6 +3150,8 @@ build_template(
and which we can skip. */
register const ibool index_contains_field =
dict_index_contains_col_or_prefix(index, i);
+ register const ibool index_covers_field =
+ field->part_of_key.is_set(file->active_index);
if (!index_contains_field && prebuilt->read_just_key) {
/* If this is a 'key read', we do not need
@@ -3175,8 +3184,12 @@ build_template(
/* This field is not needed in the query, skip it */
goto skip_field;
- }
include_field:
+ if (do_idx_cond_push &&
+ (need_second_pass && !index_covers_field ||
+ !need_second_pass && index_covers_field))
+ goto skip_field;
+ }
n_requested_fields++;
templ->col_no = i;
@@ -3230,18 +3243,35 @@ include_field:
prebuilt->templ_contains_blob = TRUE;
}
skip_field:
- ;
+ if (need_second_pass && (i+1 == n_fields))
+ {
+ prebuilt->n_index_fields= n_requested_fields;
+ need_second_pass= FALSE;
+ i= (~(ulint)0); /* to start from 0 */
+ }
}
prebuilt->n_template = n_requested_fields;
prebuilt->mysql_prefix_len = mysql_prefix_len;
+ if (do_idx_cond_push)
+ {
+ prebuilt->idx_cond_func= index_cond_func_innodb;
+ prebuilt->idx_cond_func_arg= file;
+ }
+ else
+ {
+ prebuilt->idx_cond_func= NULL;
+ prebuilt->n_index_fields= n_requested_fields;
+ }
+ // file->in_range_read= FALSE;
+
if (index != clust_index && prebuilt->need_to_access_clustered) {
/* Change rec_field_no's to correspond to the clustered index
record */
- for (i = 0; i < n_requested_fields; i++) {
+ for (i = do_idx_cond_push? prebuilt->n_index_fields : 0;
+ i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i;
-
templ->rec_field_no = dict_col_get_clust_pos_noninline(
&index->table->cols[templ->col_no],
clust_index);
@@ -3484,7 +3514,8 @@ no_commit:
/* Build the template used in converting quickly between
the two database formats */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table,
+ this, ROW_MYSQL_WHOLE_ROW);
}
innodb_srv_conc_enter_innodb(prebuilt->trx);
@@ -3545,19 +3576,7 @@ no_commit:
if (auto_inc > prebuilt->last_value) {
set_max_autoinc:
ut_a(prebuilt->table->autoinc_increment > 0);
-
- ulonglong have;
- ulonglong need;
-
- /* Check for overflow conditions. */
- need = prebuilt->table->autoinc_increment;
- have = ~0x0ULL - auto_inc;
-
- if (have < need) {
- need = have;
- }
-
- auto_inc += need;
+ auto_inc += prebuilt->table->autoinc_increment;
err = innobase_set_max_autoinc(auto_inc);
@@ -3807,16 +3826,6 @@ ha_innobase::update_row(
error = convert_error_code_to_mysql(error, user_thd);
- if (error == 0 /* success */
- && uvect->n_fields == 0 /* no columns were updated */) {
-
- /* This is the same as success, but instructs
- MySQL that the row is not really updated and it
- should not increase the count of updated rows.
- This is fix for http://bugs.mysql.com/29157 */
- error = HA_ERR_RECORD_IS_THE_SAME;
- }
-
/* Tell InnoDB server that there might be work for
utility threads: */
@@ -3983,6 +3992,8 @@ ha_innobase::index_end(void)
int error = 0;
DBUG_ENTER("index_end");
active_index=MAX_KEY;
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
DBUG_RETURN(error);
}
@@ -4133,7 +4144,7 @@ ha_innobase::index_read(
necessarily prebuilt->index, but can also be the clustered index */
if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table,
+ build_template(prebuilt, user_thd, table, this,
ROW_MYSQL_REC_FIELDS);
}
@@ -4295,7 +4306,7 @@ ha_innobase::change_active_index(
the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
copying. Starting from MySQL-4.1 we use a more efficient flag here. */
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS);
DBUG_RETURN(0);
}
@@ -4560,7 +4571,6 @@ ha_innobase::rnd_pos(
length of data in pos has to be ref_length */
{
int error;
- uint keynr = active_index;
DBUG_ENTER("rnd_pos");
DBUG_DUMP("key", pos, ref_length);
@@ -4568,22 +4578,6 @@ ha_innobase::rnd_pos(
ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from the row id: the
- row reference is the row id, not any key value
- that MySQL knows of */
-
- error = change_active_index(MAX_KEY);
- } else {
- error = change_active_index(primary_key);
- }
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- DBUG_RETURN(error);
- }
-
/* Note that we assume the length of the row reference is fixed
for the table, and it is == ref_length */
@@ -4593,8 +4587,6 @@ ha_innobase::rnd_pos(
DBUG_PRINT("error", ("Got error: %d", error));
}
- change_active_index(keynr);
-
DBUG_RETURN(error);
}
@@ -4616,6 +4608,10 @@ ha_innobase::position(
ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ /*if (ds_mrr.call_position_for != this) {
+ ((ha_innobase*)ds_mrr.call_position_for)->position(record);
+ return;
+ }*/
if (prebuilt->clust_index_was_generated) {
/* No primary key was defined for the table and we
generated the clustered index from row id: the
@@ -4657,12 +4653,6 @@ innodb_check_for_record_too_big_error(
}
}
-/* limit innodb monitor access to users with PROCESS privilege.
-See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
-#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
- (row_is_magic_monitor_table(table_name) \
- && check_global_access(thd, PROCESS_ACL))
-
/*********************************************************************
Creates a table definition to an InnoDB database. */
static
@@ -4699,12 +4689,6 @@ create_table_def(
DBUG_ENTER("create_table_def");
DBUG_PRINT("enter", ("table_name: %s", table_name));
- ut_a(trx->mysql_thd != NULL);
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
- (THD*) trx->mysql_thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
n_cols = form->s->fields;
/* We pass 0 as the space id, and determine at a lower level the space
@@ -5117,15 +5101,8 @@ ha_innobase::create(
DBUG_ASSERT(innobase_table != 0);
- /* Note: We can't call update_thd() as prebuilt will not be
- setup at this stage and so we use thd. */
-
- /* We need to copy the AUTOINC value from the old table if
- this is an ALTER TABLE. */
-
- if (((create_info->used_fields & HA_CREATE_USED_AUTO)
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE)
- && create_info->auto_increment_value != 0) {
+ if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
+ (create_info->auto_increment_value != 0)) {
/* Query was ALTER TABLE...AUTO_INCREMENT = x; or
CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
@@ -5252,14 +5229,6 @@ ha_innobase::delete_table(
DBUG_ENTER("ha_innobase::delete_table");
- /* Strangely, MySQL passes the table name without the '.frm'
- extension, in contrast to ::create */
- normalize_table_name(norm_name, name);
-
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
/* Get the transaction associated with the current thd, or create one
if not yet created */
@@ -5293,6 +5262,11 @@ ha_innobase::delete_table(
assert(name_len < 1000);
+ /* Strangely, MySQL passes the table name without the '.frm'
+ extension, in contrast to ::create */
+
+ normalize_table_name(norm_name, name);
+
/* Drop the table in InnoDB */
error = row_drop_table_for_mysql(norm_name, trx,
@@ -5790,9 +5764,7 @@ ha_innobase::info(
stats.index_file_length = ((ulonglong)
ib_table->stat_sum_of_other_index_sizes)
* UNIV_PAGE_SIZE;
- stats.delete_length =
- fsp_get_available_space_in_free_extents(
- ib_table->space);
+ stats.delete_length = 0;
stats.check_time = 0;
if (stats.records == 0) {
@@ -5875,7 +5847,7 @@ ha_innobase::info(
}
if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
- ulonglong auto_inc;
+ longlong auto_inc;
int ret;
/* The following function call can the first time fail in
@@ -5960,7 +5932,7 @@ ha_innobase::check(
/* Build the template; we will use a dummy template
in index scans done in checking */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW);
}
ret = row_check_table_for_mysql(prebuilt);
@@ -6323,6 +6295,12 @@ ha_innobase::extra(
break;
case HA_EXTRA_RESET_STATE:
reset_template(prebuilt);
+
+ /* Reset index condition pushdown state */
+ pushed_idx_cond= FALSE;
+ pushed_idx_cond_keyno= MAX_KEY;
+ //in_range_read= FALSE;
+ prebuilt->idx_cond_func= NULL;
break;
case HA_EXTRA_NO_KEYREAD:
prebuilt->read_just_key = 0;
@@ -6366,6 +6344,11 @@ int ha_innobase::reset()
row_mysql_prebuilt_free_blob_heap(prebuilt);
}
reset_template(prebuilt);
+ /* Reset index condition pushdown state */
+ pushed_idx_cond_keyno= MAX_KEY;
+ pushed_idx_cond= NULL;
+ ds_mrr.dsmrr_close();
+ prebuilt->idx_cond_func= NULL;
return 0;
}
@@ -7228,9 +7211,9 @@ ha_innobase::innobase_read_and_init_auto
/*=========================================*/
/* out: 0 or generic MySQL
error code */
- ulonglong* value) /* out: the autoinc value */
+ longlong* value) /* out: the autoinc value */
{
- ulonglong auto_inc;
+ longlong auto_inc;
ibool stmt_start;
int mysql_error = 0;
dict_table_t* innodb_table = prebuilt->table;
@@ -7281,9 +7264,7 @@ ha_innobase::innobase_read_and_init_auto
index, autoinc_col_name, &auto_inc);
if (error == DB_SUCCESS) {
- if (auto_inc < ~0x0ULL) {
- ++auto_inc;
- }
+ ++auto_inc;
dict_table_autoinc_initialize(innodb_table, auto_inc);
} else {
ut_print_timestamp(stderr);
@@ -7322,7 +7303,6 @@ On return if there is no error then the
ulong
ha_innobase::innobase_get_auto_increment(
-/*=====================================*/
ulonglong* value) /* out: autoinc value */
{
ulong error;
@@ -7336,14 +7316,14 @@ ha_innobase::innobase_get_auto_increment
error = innobase_autoinc_lock();
if (error == DB_SUCCESS) {
- ulonglong autoinc;
+ ib_longlong autoinc;
/* Determine the first value of the interval */
autoinc = dict_table_autoinc_read(prebuilt->table);
/* We need to initialize the AUTO-INC value, for
that we release all locks.*/
- if (autoinc == 0) {
+ if (autoinc <= 0) {
trx_t* trx;
trx = prebuilt->trx;
@@ -7362,11 +7342,14 @@ ha_innobase::innobase_get_auto_increment
mysql_error = innobase_read_and_init_auto_inc(
&autoinc);
- if (mysql_error) {
+ if (!mysql_error) {
+ /* Should have read the proper value */
+ ut_a(autoinc > 0);
+ } else {
error = DB_ERROR;
}
} else {
- *value = autoinc;
+ *value = (ulonglong) autoinc;
}
/* A deadlock error during normal processing is OK
and can be ignored. */
@@ -7451,19 +7434,10 @@ ha_innobase::get_auto_increment(
/* With old style AUTOINC locking we only update the table's
AUTOINC counter after attempting to insert the row. */
if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
- ulonglong have;
- ulonglong need;
-
- /* Check for overflow conditions. */
- need = *nb_reserved_values * increment;
- have = ~0x0ULL - *first_value;
-
- if (have < need) {
- need = have;
- }
/* Compute the last value in the interval */
- prebuilt->last_value = *first_value + need;
+ prebuilt->last_value = *first_value +
+ (*nb_reserved_values * increment);
ut_a(prebuilt->last_value >= *first_value);
@@ -8052,10 +8026,9 @@ static MYSQL_SYSVAR_BOOL(stats_on_metada
"Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
NULL, NULL, TRUE);
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
+static MYSQL_SYSVAR_BOOL(use_adaptive_hash_indexes, innobase_use_adaptive_hash_indexes,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB adaptive hash index (enabled by default). "
- "Disable with --skip-innodb-adaptive-hash-index.",
+ "Enable the InnoDB adaptive hash indexes (enabled by default)",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
@@ -8145,11 +8118,10 @@ static MYSQL_SYSVAR_STR(data_file_path,
static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The AUTOINC lock modes supported by InnoDB: "
- "0 => Old style AUTOINC locking (for backward"
- " compatibility) "
- "1 => New style AUTOINC locking "
- "2 => No AUTOINC locking (unsafe for SBR)",
+ "The AUTOINC lock modes supported by InnoDB:\n"
+ " 0 => Old style AUTOINC locking (for backward compatibility)\n"
+ " 1 => New style AUTOINC locking\n"
+ " 2 => No AUTOINC locking (unsafe for SBR)",
NULL, NULL,
AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
@@ -8187,7 +8159,7 @@ static struct st_mysql_sys_var* innobase
MYSQL_SYSVAR(open_files),
MYSQL_SYSVAR(rollback_on_timeout),
MYSQL_SYSVAR(stats_on_metadata),
- MYSQL_SYSVAR(adaptive_hash_index),
+ MYSQL_SYSVAR(use_adaptive_hash_indexes),
MYSQL_SYSVAR(status_file),
MYSQL_SYSVAR(support_xa),
MYSQL_SYSVAR(sync_spin_loops),
@@ -8214,3 +8186,103 @@ mysql_declare_plugin(innobase)
NULL /* reserved */
}
mysql_declare_plugin_end;
+
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
+
+/**
+ * Multi Range Read interface, DS-MRR calls
+ */
+
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, &table->key_info[active_index],
+ seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_innobase::multi_range_read_next(char **range_info)
+{
+ return ds_mrr.dsmrr_next(this, range_info);
+}
+
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags,
+ COST_VECT *cost)
+{
+ /* See comments in ha_myisam::multi_range_read_info_const */
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+ flags, cost);
+}
+
+int ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+}
+
+
+
+/**
+ * Index Condition Pushdown interface implementation
+ */
+
+C_MODE_START
+
+/* Index condition check function to be called from within Innobase */
+
+static my_bool index_cond_func_innodb(void *arg)
+{
+ ha_innobase *h= (ha_innobase*)arg;
+ if (h->end_range) //was: h->in_range_read
+ {
+ if (h->compare_key2(h->end_range) > 0)
+ return 2; /* caller should return HA_ERR_END_OF_FILE already */
+ }
+ return (my_bool)h->pushed_idx_cond->val_int();
+}
+
+C_MODE_END
+
+
+Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ if (keyno_arg != primary_key)
+ {
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ return NULL; /* Table handler will check the entire condition */
+ }
+ return idx_cond_arg; /* Table handler will not make any checks */
+}
+
+
+int ha_innobase::read_range_first(const key_range *start_key,
+ const key_range *end_key,
+ bool eq_range_arg,
+ bool sorted /* ignored */)
+{
+ int res;
+ //if (!eq_range_arg)
+ //in_range_read= TRUE;
+ res= handler::read_range_first(start_key, end_key, eq_range_arg, sorted);
+ //if (res)
+ // in_range_read= FALSE;
+ return res;
+}
+
+
+int ha_innobase::read_range_next()
+{
+ int res= handler::read_range_next();
+ //if (res)
+ // in_range_read= FALSE;
+ return res;
+}
+
diff -Nrup a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
--- a/storage/innobase/handler/ha_innodb.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/handler/ha_innodb.h 2008-05-15 22:39:31 +02:00
@@ -72,7 +72,7 @@ class ha_innobase: public handler
int update_thd(THD* thd);
int change_active_index(uint keynr);
int general_fetch(uchar* buf, uint direction, uint match_mode);
- int innobase_read_and_init_auto_inc(ulonglong* ret);
+ int innobase_read_and_init_auto_inc(longlong* ret);
ulong innobase_autoinc_lock();
ulong innobase_set_max_autoinc(ulonglong auto_inc);
ulong innobase_reset_autoinc(ulonglong auto_inc);
@@ -99,7 +99,8 @@ class ha_innobase: public handler
HA_READ_PREV |
HA_READ_ORDER |
HA_READ_RANGE |
- HA_KEYREAD_ONLY);
+ HA_KEYREAD_ONLY |
+ ((idx == primary_key)? 0 : HA_DO_INDEX_COND_PUSHDOWN));
}
uint max_supported_keys() const { return MAX_KEY; }
/* An InnoDB page must store >= 2 keys;
@@ -150,6 +151,22 @@ class ha_innobase: public handler
int discard_or_import_tablespace(my_bool discard);
int extra(enum ha_extra_function operation);
int reset();
+ int lock_table(THD *thd, int lock_type, int lock_timeout)
+ {
+ /*
+ Preliminarily call the pre-existing internal method for
+ transactional locking and ignore non-transactional locks.
+ */
+ if (!lock_timeout)
+ {
+ /* Preliminarily show both possible errors for NOWAIT. */
+ if (lock_type == F_WRLCK)
+ return HA_ERR_UNSUPPORTED;
+ else
+ return HA_ERR_LOCK_WAIT_TIMEOUT;
+ }
+ return transactional_table_lock(thd, lock_type);
+ }
int external_lock(THD *thd, int lock_type);
int transactional_table_lock(THD *thd, int lock_type);
int start_stmt(THD *thd, thr_lock_type lock_type);
@@ -196,6 +213,25 @@ class ha_innobase: public handler
int cmp_ref(const uchar *ref1, const uchar *ref2);
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
+public:
+ /**
+ * Multi Range Read interface
+ */
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(char **range_info);
+ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint *bufsz, uint *flags, COST_VECT *cost);
+ DsMrr_impl ds_mrr;
+
+ int read_range_first(const key_range *start_key, const key_range *end_key,
+ bool eq_range_arg, bool sorted);
+ int read_range_next();
+ Item *idx_cond_push(uint keyno, Item* idx_cond);
};
/* Some accessor functions which the InnoDB plugin needs, but which
diff -Nrup a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
--- a/storage/innobase/include/dict0dict.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/dict0dict.h 2008-05-15 22:39:32 +02:00
@@ -185,12 +185,12 @@ void
dict_table_autoinc_initialize(
/*==========================*/
dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: next value to assign to a row */
+ ib_longlong value); /* in: next value to assign to a row */
/************************************************************************
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized. */
-ib_ulonglong
+ib_longlong
dict_table_autoinc_read(
/*====================*/
/* out: value for a new row, or 0 */
@@ -204,7 +204,7 @@ dict_table_autoinc_update(
/*======================*/
dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: value which was assigned to a row */
+ ib_longlong value); /* in: value which was assigned to a row */
/************************************************************************
Release the autoinc lock.*/
diff -Nrup a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
--- a/storage/innobase/include/dict0mem.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/dict0mem.h 2008-05-15 22:39:32 +02:00
@@ -409,7 +409,7 @@ struct dict_table_struct{
/* TRUE if the autoinc counter has been
inited; MySQL gets the init value by executing
SELECT MAX(auto inc column) */
- ib_ulonglong autoinc;/* autoinc counter value to give to the
+ ib_longlong autoinc;/* autoinc counter value to give to the
next inserted row */
ib_longlong autoinc_increment;
diff -Nrup a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
--- a/storage/innobase/include/os0sync.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/os0sync.h 2008-05-15 22:39:32 +02:00
@@ -112,13 +112,9 @@ os_event_set(
os_event_t event); /* in: event to set */
/**************************************************************
Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
+stop to wait for the event. */
-ib_longlong
+void
os_event_reset(
/*===========*/
os_event_t event); /* in: event to reset */
@@ -129,38 +125,16 @@ void
os_event_free(
/*==========*/
os_event_t event); /* in: event to free */
-
/**************************************************************
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-
-#define os_event_wait(event) os_event_wait_low((event), 0)
+event is already in the signaled state). */
void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count);/* in: zero or the value
- returned by previous call of
- os_event_reset(). */
-
+os_event_wait(
+/*==========*/
+ os_event_t event); /* in: event to wait */
/**************************************************************
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite. */
diff -Nrup a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
--- a/storage/innobase/include/read0read.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/read0read.h 2008-05-15 22:39:33 +02:00
@@ -111,6 +111,10 @@ struct read_view_struct{
dulint undo_no; /* (0, 0) or if type is VIEW_HIGH_GRANULARITY
transaction undo_no when this high-granularity
consistent read view was created */
+ ibool can_be_too_old; /* TRUE if the system has had to purge old
+ versions which this read view should be able
+ to access: the read view can bump into the
+ DB_MISSING_HISTORY error */
dulint low_limit_no; /* The view does not need to see the undo
logs for transactions whose transaction number
is strictly smaller (<) than this value: they
diff -Nrup a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
--- a/storage/innobase/include/row0mysql.h 2008-04-03 15:46:21 +02:00
+++ b/storage/innobase/include/row0mysql.h 2008-05-15 22:39:33 +02:00
@@ -319,7 +319,7 @@ row_mysql_unfreeze_data_dictionary(
/*===============================*/
trx_t* trx); /* in: transaction */
/*************************************************************************
-Creates a table for MySQL. If the name of the table ends in
+Drops a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
@@ -464,16 +464,6 @@ row_check_table_for_mysql(
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name); /* in: name of the table, in the
- form database/table_name */
-
/* A struct describing a place for an individual column in the MySQL
row format which is presented to the table handler in ha_innobase.
This template struct is used to speed up row transformations between
@@ -524,6 +514,8 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
+typedef my_bool (*index_cond_func_t)(void *param);
+
/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
@@ -684,6 +676,13 @@ struct row_prebuilt_struct {
mem_heap_t* old_vers_heap; /* memory heap where a previous
version is built in consistent read */
ulonglong last_value; /* last value of AUTO-INC interval */
+
+ index_cond_func_t idx_cond_func;/* Index Condition Pushdown function,
+ or NULL if there is none set */
+ void* idx_cond_func_arg;/* ICP function argument */
+ ulint n_index_fields; /* Number of fields at the start of
+ mysql_template. Valid only when using
+ ICP. */
ulint magic_n2; /* this should be the same as
magic_n */
};
diff -Nrup a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
--- a/storage/innobase/include/row0sel.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/row0sel.h 2008-05-15 22:39:33 +02:00
@@ -181,7 +181,7 @@ row_search_max_autoinc(
error code */
dict_index_t* index, /* in: index to search */
const char* col_name, /* in: autoinc column name */
- ib_ulonglong* value); /* out: AUTOINC value read */
+ ib_longlong* value); /* out: AUTOINC value read */
/* A structure for caching column values for prefetched rows */
struct sel_buf_struct{
diff -Nrup a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
--- a/storage/innobase/include/sync0arr.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/sync0arr.h 2008-05-15 22:39:34 +02:00
@@ -66,21 +66,26 @@ sync_array_wait_event(
sync_array_t* arr, /* in: wait array */
ulint index); /* in: index of the reserved cell */
/**********************************************************************
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
+Frees the cell safely by reserving the sync array mutex and decrementing
+n_reserved if necessary. Should only be called from mutex_spin_wait. */
void
-sync_array_free_cell(
-/*=================*/
+sync_array_free_cell_protected(
+/*===========================*/
sync_array_t* arr, /* in: wait array */
ulint index); /* in: index of the cell in array */
/**************************************************************************
-Note that one of the wait objects was signalled. */
+Looks for the cells in the wait array which refer
+to the wait object specified,
+and sets their corresponding events to the signaled state. In this
+way releases the threads waiting for the object to contend for the object.
+It is possible that no such cell is found, in which case does nothing. */
void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr); /* in: wait array */
+sync_array_signal_object(
+/*=====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object);/* in: wait object */
/**************************************************************************
If the wakeup algorithm does not work perfectly at semaphore relases,
this function will do the waking (see the comment in mutex_exit). This
diff -Nrup a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
--- a/storage/innobase/include/sync0rw.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/sync0rw.h 2008-05-15 22:39:34 +02:00
@@ -421,18 +421,6 @@ blocked by readers, a writer may queue f
field. Then no new readers are allowed in. */
struct rw_lock_struct {
- os_event_t event; /* Used by sync0arr.c for thread queueing */
-
-#ifdef __WIN__
- os_event_t wait_ex_event; /* This windows specific event is
- used by the thread which has set the
- lock state to RW_LOCK_WAIT_EX. The
- rw_lock design guarantees that this
- thread will be the next one to proceed
- once the current the event gets
- signalled. See LEMMA 2 in sync0sync.c */
-#endif
-
ulint reader_count; /* Number of readers who have locked this
lock in the shared mode */
ulint writer; /* This field is set to RW_LOCK_EX if there
diff -Nrup a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
--- a/storage/innobase/include/sync0rw.ic 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/sync0rw.ic 2008-05-15 22:39:34 +02:00
@@ -381,11 +381,7 @@ rw_lock_s_unlock_func(
mutex_exit(mutex);
if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
- os_event_set(lock->wait_ex_event);
-#endif
- os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
+ sync_array_signal_object(sync_primary_wait_array, lock);
}
ut_ad(rw_lock_validate(lock));
@@ -465,11 +461,7 @@ rw_lock_x_unlock_func(
mutex_exit(&(lock->mutex));
if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
- os_event_set(lock->wait_ex_event);
-#endif
- os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
+ sync_array_signal_object(sync_primary_wait_array, lock);
}
ut_ad(rw_lock_validate(lock));
diff -Nrup a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
--- a/storage/innobase/include/sync0sync.h 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/sync0sync.h 2008-05-15 22:39:35 +02:00
@@ -470,7 +470,6 @@ Do not use its fields directly! The stru
implementation of a mutual exclusion semaphore. */
struct mutex_struct {
- os_event_t event; /* Used by sync0arr.c for the wait queue */
ulint lock_word; /* This ulint is the target of the atomic
test-and-set instruction in Win32 */
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
diff -Nrup a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
--- a/storage/innobase/include/sync0sync.ic 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/sync0sync.ic 2008-05-15 22:39:35 +02:00
@@ -211,7 +211,7 @@ mutex_exit(
perform the read first, which could leave a waiting
thread hanging indefinitely.
- Our current solution call every second
+ Our current solution call every 10 seconds
sync_arr_wake_threads_if_sema_free()
to wake up possible hanging threads if
they are missed in mutex_signal_object. */
diff -Nrup a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
--- a/storage/innobase/include/univ.i 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/include/univ.i 2008-05-15 22:39:35 +02:00
@@ -212,11 +212,8 @@ typedef long int lint;
#ifdef __WIN__
typedef __int64 ib_longlong;
-typedef unsigned __int64 ib_ulonglong;
#else
-/* Note: longlong and ulonglong come from MySQL headers. */
typedef longlong ib_longlong;
-typedef ulonglong ib_ulonglong;
#endif
typedef unsigned long long int ullint;
diff -Nrup a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c
--- a/storage/innobase/os/os0sync.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/os/os0sync.c 2008-05-15 22:39:35 +02:00
@@ -21,7 +21,6 @@ Created 9/6/1995 Heikki Tuuri
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
- os_event_t event; /* Used by sync0arr.c for queing threads */
void* handle; /* OS handle to mutex */
ulint count; /* we use this counter to check
that the same thread does not
@@ -36,7 +35,6 @@ struct os_mutex_struct{
/* Mutex protecting counts and the lists of OS mutexes and events */
os_mutex_t os_sync_mutex;
ibool os_sync_mutex_inited = FALSE;
-ibool os_sync_free_called = FALSE;
/* This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
@@ -52,10 +50,6 @@ ulint os_event_count = 0;
ulint os_mutex_count = 0;
ulint os_fast_mutex_count = 0;
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t event);
/*************************************************************
Initializes global event and OS 'slow' mutex lists. */
@@ -82,7 +76,6 @@ os_sync_free(void)
os_event_t event;
os_mutex_t mutex;
- os_sync_free_called = TRUE;
event = UT_LIST_GET_FIRST(os_event_list);
while (event) {
@@ -106,7 +99,6 @@ os_sync_free(void)
mutex = UT_LIST_GET_FIRST(os_mutex_list);
}
- os_sync_free_called = FALSE;
}
/*************************************************************
@@ -152,31 +144,17 @@ os_event_create(
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
#endif
event->is_set = FALSE;
-
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
+ event->signal_count = 0;
#endif /* __WIN__ */
- /* The os_sync_mutex can be NULL because during startup an event
- can be created [ because it's embedded in the mutex/rwlock ] before
- this module has been initialized */
- if (os_sync_mutex != NULL) {
- os_mutex_enter(os_sync_mutex);
- }
-
/* Put to the list of events */
+ os_mutex_enter(os_sync_mutex);
+
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
os_event_count++;
- if (os_sync_mutex != NULL) {
- os_mutex_exit(os_sync_mutex);
- }
+ os_mutex_exit(os_sync_mutex);
return(event);
}
@@ -253,20 +231,13 @@ os_event_set(
/**************************************************************
Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
+stop to wait for the event. */
-ib_longlong
+void
os_event_reset(
/*===========*/
- /* out: current signal_count. */
os_event_t event) /* in: event to reset */
{
- ib_longlong ret = 0;
-
#ifdef __WIN__
ut_a(event);
@@ -281,40 +252,9 @@ os_event_reset(
} else {
event->is_set = FALSE;
}
- ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex));
#endif
- return(ret);
-}
-
-/**************************************************************
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
- os_event_t event) /* in: event to free */
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
-
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- ut_free(event);
}
/**************************************************************
@@ -353,38 +293,18 @@ os_event_free(
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
+event is already in the signaled state). */
void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count)/* in: zero or the value
- returned by previous call of
- os_event_reset(). */
+os_event_wait(
+/*==========*/
+ os_event_t event) /* in: event to wait */
{
#ifdef __WIN__
DWORD err;
ut_a(event);
- UT_NOT_USED(reset_sig_count);
-
/* Specify an infinite time limit for waiting */
err = WaitForSingleObject(event->handle, INFINITE);
@@ -398,11 +318,7 @@ os_event_wait_low(
os_fast_mutex_lock(&(event->os_mutex));
- if (reset_sig_count) {
- old_signal_count = reset_sig_count;
- } else {
- old_signal_count = event->signal_count;
- }
+ old_signal_count = event->signal_count;
for (;;) {
if (event->is_set == TRUE
@@ -542,7 +458,6 @@ os_mutex_create(
mutex_str->handle = mutex;
mutex_str->count = 0;
- mutex_str->event = os_event_create(NULL);
if (os_sync_mutex_inited) {
/* When creating os_sync_mutex itself we cannot reserve it */
@@ -618,10 +533,6 @@ os_mutex_free(
os_mutex_t mutex) /* in: mutex to free */
{
ut_a(mutex);
-
- if (!os_sync_free_called) {
- os_event_free_internal(mutex->event);
- }
if (os_sync_mutex_inited) {
os_mutex_enter(os_sync_mutex);
diff -Nrup a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
--- a/storage/innobase/read/read0read.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/read/read0read.c 2008-05-15 22:39:35 +02:00
@@ -212,6 +212,7 @@ read_view_oldest_copy_or_open_new(
view_copy->low_limit_no = old_view->low_limit_no;
view_copy->low_limit_id = old_view->low_limit_id;
+ view_copy->can_be_too_old = FALSE;
if (n > 0) {
/* The last active transaction has the smallest id: */
@@ -257,6 +258,8 @@ read_view_open_now(
view->low_limit_no = trx_sys->max_trx_id;
view->low_limit_id = view->low_limit_no;
+ view->can_be_too_old = FALSE;
+
n = 0;
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
@@ -428,6 +431,8 @@ read_cursor_view_create_for_mysql(
view->low_limit_no = trx_sys->max_trx_id;
view->low_limit_id = view->low_limit_no;
+
+ view->can_be_too_old = FALSE;
n = 0;
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
diff -Nrup a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
--- a/storage/innobase/row/row0mysql.c 2008-05-14 11:24:12 +02:00
+++ b/storage/innobase/row/row0mysql.c 2008-05-15 22:39:35 +02:00
@@ -57,12 +57,6 @@ static const char S_innodb_tablespace_mo
static const char S_innodb_table_monitor[] = "innodb_table_monitor";
static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-/* Evaluates to true if str1 equals str2_onstack, used for comparing
-the above strings. */
-#define STR_EQ(str1, str1_len, str2_onstack) \
- ((str1_len) == sizeof(str2_onstack) \
- && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-
/***********************************************************************
Determine if the given name is a name reserved for MySQL system tables. */
static
@@ -1734,7 +1728,7 @@ row_mysql_unlock_data_dictionary(
}
/*************************************************************************
-Creates a table for MySQL. If the name of the table ends in
+Drops a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
@@ -1815,7 +1809,9 @@ row_create_table_for_mysql(
table_name++;
table_name_len = strlen(table_name) + 1;
- if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
+ if (table_name_len == sizeof S_innodb_monitor
+ && !memcmp(table_name, S_innodb_monitor,
+ sizeof S_innodb_monitor)) {
/* Table equals "innodb_monitor":
start monitor prints */
@@ -1826,24 +1822,28 @@ row_create_table_for_mysql(
of InnoDB monitor prints */
os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_lock_monitor)) {
+ } else if (table_name_len == sizeof S_innodb_lock_monitor
+ && !memcmp(table_name, S_innodb_lock_monitor,
+ sizeof S_innodb_lock_monitor)) {
srv_print_innodb_monitor = TRUE;
srv_print_innodb_lock_monitor = TRUE;
os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_tablespace_monitor)) {
+ } else if (table_name_len == sizeof S_innodb_tablespace_monitor
+ && !memcmp(table_name, S_innodb_tablespace_monitor,
+ sizeof S_innodb_tablespace_monitor)) {
srv_print_innodb_tablespace_monitor = TRUE;
os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_table_monitor)) {
+ } else if (table_name_len == sizeof S_innodb_table_monitor
+ && !memcmp(table_name, S_innodb_table_monitor,
+ sizeof S_innodb_table_monitor)) {
srv_print_innodb_table_monitor = TRUE;
os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_mem_validate)) {
+ } else if (table_name_len == sizeof S_innodb_mem_validate
+ && !memcmp(table_name, S_innodb_mem_validate,
+ sizeof S_innodb_mem_validate)) {
/* We define here a debugging feature intended for
developers */
@@ -3312,66 +3312,6 @@ funct_exit:
return((int) err);
}
-/***********************************************************************
-Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql(). */
-static
-ulint
-drop_all_foreign_keys_in_db(
-/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx) /* in: transaction handle */
-{
- pars_info_t* pinfo;
- ulint err;
-
- ut_a(name[strlen(name) - 1] == '/');
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "dbname", name);
-
-/* true if for_name is not prefixed with dbname */
-#define TABLE_NOT_IN_THIS_DB \
-"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
-
- err = que_eval_sql(pinfo,
- "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
- "foreign_id CHAR;\n"
- "for_name CHAR;\n"
- "found INT;\n"
- "DECLARE CURSOR cur IS\n"
- "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME >= :dbname\n"
- "LOCK IN SHARE MODE\n"
- "ORDER BY FOR_NAME;\n"
- "BEGIN\n"
- "found := 1;\n"
- "OPEN cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur INTO foreign_id, for_name;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
- " found := 0;\n"
- " ELSIF (1=1) THEN\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur;\n"
- "COMMIT WORK;\n"
- "END;\n",
- FALSE, /* do not reserve dict mutex,
- we are already holding it */
- trx);
-
- return(err);
-}
-
/*************************************************************************
Drops a database for MySQL. */
@@ -3442,19 +3382,6 @@ loop:
}
}
- if (err == DB_SUCCESS) {
- /* after dropping all tables try to drop all leftover
- foreign keys in case orphaned ones exist */
- err = (int) drop_all_foreign_keys_in_db(name, trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %d while "
- "dropping all foreign keys", err);
- }
- }
-
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
@@ -4129,34 +4056,4 @@ row_check_table_for_mysql(
prebuilt->trx->op_info = "";
return(ret);
-}
-
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name) /* in: name of the table, in the
- form database/table_name */
-{
- const char* name; /* table_name without database/ */
- ulint len;
-
- name = strchr(table_name, '/');
- ut_a(name != NULL);
- name++;
- len = strlen(name) + 1;
-
- if (STR_EQ(name, len, S_innodb_monitor)
- || STR_EQ(name, len, S_innodb_lock_monitor)
- || STR_EQ(name, len, S_innodb_tablespace_monitor)
- || STR_EQ(name, len, S_innodb_table_monitor)
- || STR_EQ(name, len, S_innodb_mem_validate)) {
-
- return(TRUE);
- }
-
- return(FALSE);
}
diff -Nrup a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
--- a/storage/innobase/row/row0sel.c 2008-05-14 11:24:12 +02:00
+++ b/storage/innobase/row/row0sel.c 2008-05-15 22:39:36 +02:00
@@ -2585,8 +2585,10 @@ row_sel_store_mysql_rec(
rec_t* rec, /* in: Innobase record in the index
which was described in prebuilt's
template */
- const ulint* offsets) /* in: array returned by
+ const ulint* offsets, /* in: array returned by
rec_get_offsets() */
+ ulint start_field_no,
+ ulint end_field_no)
{
mysql_row_templ_t* templ;
mem_heap_t* extern_field_heap = NULL;
@@ -2603,7 +2605,7 @@ row_sel_store_mysql_rec(
prebuilt->blob_heap = NULL;
}
- for (i = 0; i < prebuilt->n_template; i++) {
+ for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) {
templ = prebuilt->mysql_template + i;
@@ -3063,7 +3065,9 @@ row_sel_push_cache_row_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
rec_t* rec, /* in: record to push */
- const ulint* offsets) /* in: rec_get_offsets() */
+ const ulint* offsets, /* in: rec_get_offsets() */
+ ulint start_field_no, /* psergey: start from this field */
+ byte* remainder_buf) /* if above !=0 -> where to take prev fields */
{
byte* buf;
ulint i;
@@ -3096,9 +3100,27 @@ row_sel_push_cache_row_for_mysql(
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
- prebuilt, rec, offsets))) {
+ prebuilt, rec, offsets, start_field_no,
+ prebuilt->n_template))) {
ut_error;
}
+ if (start_field_no) {
+ for (i=0; i < start_field_no; i++) {
+ register ulint offs;
+ mysql_row_templ_t* templ;
+ templ = prebuilt->mysql_template + i;
+
+ if (templ->mysql_null_bit_mask) {
+ offs= templ->mysql_null_byte_offset;
+ *(prebuilt->fetch_cache[prebuilt->n_fetch_cached] + offs) ^=
+ (*(remainder_buf + offs) & templ->mysql_null_bit_mask);
+ }
+ offs= templ->mysql_col_offset;
+ memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached] + offs,
+ remainder_buf + offs,
+ templ->mysql_col_len);
+ }
+ }
prebuilt->n_fetch_cached++;
}
@@ -3238,6 +3260,8 @@ row_search_for_mysql(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
+ ibool some_fields_in_buffer;
+ ibool get_clust_rec= 0;
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
@@ -3490,7 +3514,8 @@ row_search_for_mysql(
rec, offsets));
#endif
if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, offsets)) {
+ rec, offsets, 0,
+ prebuilt->n_template)) {
err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the
@@ -4090,8 +4115,8 @@ no_gap_lock:
information via the clustered index record. */
ut_ad(index != clust_index);
-
- goto requires_clust_rec;
+ get_clust_rec= TRUE;
+ goto idx_cond_check;
}
}
@@ -4135,18 +4160,36 @@ no_gap_lock:
goto next_rec;
}
+
+idx_cond_check:
+ if (prebuilt->idx_cond_func)
+ {
+ int res;
+ ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ row_sel_store_mysql_rec(buf, prebuilt, rec,
+ offsets, 0, prebuilt->n_index_fields);
+ res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
+ if (res == 0)
+ goto next_rec;
+ if (res == 2)
+ {
+ err = DB_RECORD_NOT_FOUND;
+ goto idx_cond_failed;
+ }
+ }
+
/* Get the clustered index record if needed, if we did not do the
search using the clustered index. */
+ if (get_clust_rec || (index != clust_index &&
+ prebuilt->need_to_access_clustered)) {
- if (index != clust_index && prebuilt->need_to_access_clustered) {
-
-requires_clust_rec:
/* We use a 'goto' to the preceding label if a consistent
read of a secondary index record requires us to look up old
versions of the associated clustered index record. */
ut_ad(rec_offs_validate(rec, index, offsets));
-
+
/* It was a non-clustered index and we must fetch also the
clustered index record */
@@ -4230,9 +4273,14 @@ requires_clust_rec:
are BLOBs in the fields to be fetched. In HANDLER we do
not cache rows because there the cursor is a scrollable
cursor. */
+ some_fields_in_buffer= (index != clust_index &&
+ prebuilt->idx_cond_func);
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
+ offsets,
+ some_fields_in_buffer?
+ prebuilt->n_index_fields: 0,
+ buf);
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
goto got_row;
@@ -4248,7 +4296,10 @@ requires_clust_rec:
rec_offs_extra_size(offsets) + 4);
} else {
if (!row_sel_store_mysql_rec(buf, prebuilt,
- result_rec, offsets)) {
+ result_rec, offsets,
+ prebuilt->idx_cond_func?
+ prebuilt->n_index_fields: 0,
+ prebuilt->n_template)) {
err = DB_TOO_BIG_RECORD;
goto lock_wait_or_error;
@@ -4276,6 +4327,9 @@ got_row:
HANDLER command where the user can move the cursor with PREV or NEXT
even after a unique search. */
+ err = DB_SUCCESS;
+
+idx_cond_failed:
if (!unique_search_from_clust_index
|| prebuilt->select_lock_type != LOCK_NONE
|| prebuilt->used_in_HANDLER) {
@@ -4285,12 +4339,11 @@ got_row:
btr_pcur_store_position(pcur, &mtr);
}
- err = DB_SUCCESS;
-
goto normal_return;
next_rec:
/* Reset the old and new "did semi-consistent read" flags. */
+ get_clust_rec= FALSE;
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
@@ -4529,7 +4582,7 @@ row_search_check_if_query_cache_permitte
Read the AUTOINC column from the current row. If the value is less than
0 and the type is not unsigned then we reset the value to 0. */
static
-ib_ulonglong
+ib_longlong
row_search_autoinc_read_column(
/*===========================*/
/* out: value read from the column */
@@ -4540,7 +4593,7 @@ row_search_autoinc_read_column(
{
ulint len;
const byte* data;
- ib_ulonglong value;
+ ib_longlong value;
mem_heap_t* heap = NULL;
/* Our requirement is that dest should be word aligned. */
byte dest[sizeof(value)];
@@ -4567,7 +4620,7 @@ row_search_autoinc_read_column(
and that dest is word aligned. */
switch (len) {
case 8:
- value = *(ib_ulonglong*) dest;
+ value = *(ib_longlong*) dest;
break;
case 4:
@@ -4595,7 +4648,7 @@ row_search_autoinc_read_column(
mem_heap_free(heap);
}
- if (!unsigned_type && (ib_longlong) value < 0) {
+ if (!unsigned_type && value < 0) {
value = 0;
}
@@ -4634,7 +4687,7 @@ row_search_max_autoinc(
column name can't be found in index */
dict_index_t* index, /* in: index to search */
const char* col_name, /* in: name of autoinc column */
- ib_ulonglong* value) /* out: AUTOINC value read */
+ ib_longlong* value) /* out: AUTOINC value read */
{
ulint i;
ulint n_cols;
diff -Nrup a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
--- a/storage/innobase/srv/srv0srv.c 2008-04-03 15:46:22 +02:00
+++ b/storage/innobase/srv/srv0srv.c 2008-05-15 22:39:36 +02:00
@@ -1904,6 +1904,12 @@ loop:
os_thread_sleep(1000000);
+ /* In case mutex_exit is not a memory barrier, it is
+ theoretically possible some threads are left waiting though
+ the semaphore is already released. Wake up those threads: */
+
+ sync_arr_wake_threads_if_sema_free();
+
current_time = time(NULL);
time_elapsed = difftime(current_time, last_monitor_time);
@@ -2100,15 +2106,9 @@ loop:
srv_refresh_innodb_monitor_stats();
}
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
if (sync_array_print_long_waits()) {
fatal_cnt++;
- if (fatal_cnt > 10) {
+ if (fatal_cnt > 5) {
fprintf(stderr,
"InnoDB: Error: semaphore wait has lasted"
@@ -2128,7 +2128,7 @@ loop:
fflush(stderr);
- os_thread_sleep(1000000);
+ os_thread_sleep(2000000);
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
diff -Nrup a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
--- a/storage/innobase/sync/sync0arr.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/sync/sync0arr.c 2008-05-15 22:39:37 +02:00
@@ -40,23 +40,24 @@ because we can do with a very small numb
say 200. In NT 3.51, allocating events seems to be a quadratic
algorithm, because 10 000 events are created fast, but
100 000 events takes a couple of minutes to create.
-
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
-any waiting threads who have missed the signal. */
+*/
/* A cell where an individual thread may wait suspended
until a resource is released. The suspending is implemented
using an operating system event semaphore. */
struct sync_cell_struct {
+ /* State of the cell. SC_WAKING_UP means
+ sync_array_struct->n_reserved has been decremented, but the thread
+ in this cell has not waken up yet. When it does, it will set the
+ state to SC_FREE. Note that this is done without the protection of
+ any mutex. */
+ enum { SC_FREE, SC_RESERVED, SC_WAKING_UP } state;
+
void* wait_object; /* pointer to the object the
- thread is waiting for; if NULL
- the cell is free for use */
+ thread is waiting for; this is not
+ reseted to NULL when a cell is
+ freed. */
+
mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
ulint request_type; /* lock type requested on the
@@ -70,23 +71,13 @@ struct sync_cell_struct {
ibool waiting; /* TRUE if the thread has already
called sync_array_event_wait
on this cell */
- ib_longlong signal_count; /* We capture the signal_count
- of the wait_object when we
- reset the event. This value is
- then passed on to os_event_wait
- and we wait only if the event
- has not been signalled in the
- period between the reset and
- wait call. */
+ ibool event_set; /* TRUE if the event is set */
+ os_event_t event; /* operating system event
+ semaphore handle */
time_t reservation_time;/* time when the thread reserved
the wait cell */
};
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
struct sync_array_struct {
ulint n_reserved; /* number of currently reserved
cells in the wait array */
@@ -229,9 +220,12 @@ sync_array_create(
for (i = 0; i < n_cells; i++) {
cell = sync_array_get_nth_cell(arr, i);
- cell->wait_object = NULL;
- cell->waiting = FALSE;
- cell->signal_count = 0;
+ cell->state = SC_FREE;
+ cell->wait_object = NULL;
+
+ /* Create an operating system event semaphore with no name */
+ cell->event = os_event_create(NULL);
+ cell->event_set = FALSE; /* it is created in reset state */
}
return(arr);
@@ -245,12 +239,19 @@ sync_array_free(
/*============*/
sync_array_t* arr) /* in, own: sync wait array */
{
+ ulint i;
+ sync_cell_t* cell;
ulint protection;
ut_a(arr->n_reserved == 0);
sync_array_validate(arr);
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+ os_event_free(cell->event);
+ }
+
protection = arr->protection;
/* Release the mutex protecting the wait array complex */
@@ -284,7 +285,8 @@ sync_array_validate(
for (i = 0; i < arr->n_cells; i++) {
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
+
+ if (cell->state == SC_RESERVED) {
count++;
}
}
@@ -294,29 +296,6 @@ sync_array_validate(
sync_array_exit(arr);
}
-/***********************************************************************
-Puts the cell event in reset state. */
-static
-ib_longlong
-sync_cell_event_reset(
-/*==================*/
- /* out: value of signal_count
- at the time of reset. */
- ulint type, /* in: lock type mutex/rw_lock */
- void* object) /* in: the rw_lock/mutex object */
-{
- if (type == SYNC_MUTEX) {
- return(os_event_reset(((mutex_t *) object)->event));
-#ifdef __WIN__
- } else if (type == RW_LOCK_WAIT_EX) {
- return(os_event_reset(
- ((rw_lock_t *) object)->wait_ex_event));
-#endif
- } else {
- return(os_event_reset(((rw_lock_t *) object)->event));
- }
-}
-
/**********************************************************************
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state. */
@@ -345,9 +324,21 @@ sync_array_reserve_cell(
for (i = 0; i < arr->n_cells; i++) {
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object == NULL) {
+ if (cell->state == SC_FREE) {
+
+ /* We do not check cell->event_set because it is
+ set outside the protection of the sync array mutex
+ and we had a bug regarding it, and since resetting
+ an event when it is not needed does no harm it is
+ safer always to do it. */
+
+ cell->event_set = FALSE;
+ os_event_reset(cell->event);
+
+ cell->state = SC_RESERVED;
+ cell->reservation_time = time(NULL);
+ cell->thread = os_thread_get_curr_id();
- cell->waiting = FALSE;
cell->wait_object = object;
if (type == SYNC_MUTEX) {
@@ -357,6 +348,7 @@ sync_array_reserve_cell(
}
cell->request_type = type;
+ cell->waiting = FALSE;
cell->file = file;
cell->line = line;
@@ -367,16 +359,6 @@ sync_array_reserve_cell(
sync_array_exit(arr);
- /* Make sure the event is reset and also store
- the value of signal_count at which the event
- was reset. */
- cell->signal_count = sync_cell_event_reset(type,
- object);
-
- cell->reservation_time = time(NULL);
-
- cell->thread = os_thread_get_curr_id();
-
return;
}
}
@@ -387,6 +369,68 @@ sync_array_reserve_cell(
}
/**********************************************************************
+Frees the cell. Note that we don't have any mutex reserved when calling
+this. */
+static
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the cell in array */
+{
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->state == SC_WAKING_UP);
+ ut_a(cell->wait_object != NULL);
+
+ cell->state = SC_FREE;
+}
+
+/**********************************************************************
+Frees the cell safely by reserving the sync array mutex and decrementing
+n_reserved if necessary. Should only be called from mutex_spin_wait. */
+
+void
+sync_array_free_cell_protected(
+/*===========================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the cell in array */
+{
+ sync_cell_t* cell;
+
+ sync_array_enter(arr);
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->state != SC_FREE);
+ ut_a(cell->wait_object != NULL);
+
+ /* We only need to decrement n_reserved if it has not already been
+ done by sync_array_signal_object. */
+ if (cell->state == SC_RESERVED) {
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+ } else if (cell->state == SC_WAKING_UP) {
+ /* This is tricky; if we don't wait for the event to be
+ signaled, signal_object can set the state of a cell to
+ SC_WAKING_UP, mutex_spin_wait can call this and set the
+ state to SC_FREE, and then signal_object gets around to
+ calling os_set_event for the cell but since it's already
+ been freed things break horribly. */
+
+ sync_array_exit(arr);
+ os_event_wait(cell->event);
+ sync_array_enter(arr);
+ }
+
+ cell->state = SC_FREE;
+
+ sync_array_exit(arr);
+}
+
+/**********************************************************************
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
@@ -403,28 +447,15 @@ sync_array_wait_event(
ut_a(arr);
- sync_array_enter(arr);
-
cell = sync_array_get_nth_cell(arr, index);
+ ut_a((cell->state == SC_RESERVED) || (cell->state == SC_WAKING_UP));
ut_a(cell->wait_object);
ut_a(!cell->waiting);
ut_ad(os_thread_get_curr_id() == cell->thread);
- if (cell->request_type == SYNC_MUTEX) {
- event = ((mutex_t*) cell->wait_object)->event;
-#ifdef __WIN__
- /* On windows if the thread about to wait is the one which
- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
- it waits on a special event i.e.: wait_ex_event. */
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
- event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
-#endif
- } else {
- event = ((rw_lock_t*) cell->wait_object)->event;
- }
-
- cell->waiting = TRUE;
+ event = cell->event;
+ cell->waiting = TRUE;
#ifdef UNIV_SYNC_DEBUG
@@ -433,6 +464,7 @@ sync_array_wait_event(
recursively sync_array routines, leading to trouble.
rw_lock_debug_mutex freezes the debug lists. */
+ sync_array_enter(arr);
rw_lock_debug_mutex_enter();
if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
@@ -442,16 +474,16 @@ sync_array_wait_event(
}
rw_lock_debug_mutex_exit();
-#endif
sync_array_exit(arr);
-
- os_event_wait_low(event, cell->signal_count);
+#endif
+ os_event_wait(event);
sync_array_free_cell(arr, index);
}
/**********************************************************************
-Reports info of a wait array cell. */
+Reports info of a wait array cell. Note: sync_array_print_long_waits()
+calls this without mutex protection. */
static
void
sync_array_cell_print(
@@ -471,8 +503,17 @@ sync_array_cell_print(
(ulong) os_thread_pf(cell->thread), cell->file,
(ulong) cell->line,
difftime(time(NULL), cell->reservation_time));
+ fprintf(file, "Wait array cell state %lu\n", (ulong)cell->state);
+
+ /* If the memory area pointed to by old_wait_mutex /
+ old_wait_rw_lock has been freed, this can crash. */
- if (type == SYNC_MUTEX) {
+ if (cell->state != SC_RESERVED) {
+ /* If cell has this state, then even if we are holding the sync
+ array mutex, the wait object may get freed meanwhile. Do not
+ print the wait object then. */
+
+ } else if (type == SYNC_MUTEX) {
/* We use old_wait_mutex in case the cell has already
been freed meanwhile */
mutex = cell->old_wait_mutex;
@@ -490,11 +531,7 @@ sync_array_cell_print(
#endif /* UNIV_SYNC_DEBUG */
(ulong) mutex->waiters);
- } else if (type == RW_LOCK_EX
-#ifdef __WIN__
- || type == RW_LOCK_WAIT_EX
-#endif
- || type == RW_LOCK_SHARED) {
+ } else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) {
fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
@@ -528,8 +565,8 @@ sync_array_cell_print(
ut_error;
}
- if (!cell->waiting) {
- fputs("wait has ended\n", file);
+ if (cell->event_set) {
+ fputs("wait is ending\n", file);
}
}
@@ -552,7 +589,7 @@ sync_array_find_thread(
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL
+ if ((cell->state == SC_RESERVED)
&& os_thread_eq(cell->thread, thread)) {
return(cell); /* Found */
@@ -642,7 +679,7 @@ sync_array_detect_deadlock(
depth++;
- if (!cell->waiting) {
+ if (cell->event_set || !cell->waiting) {
return(FALSE); /* No deadlock here */
}
@@ -667,8 +704,10 @@ sync_array_detect_deadlock(
depth);
if (ret) {
fprintf(stderr,
- "Mutex %p owned by thread %lu file %s line %lu\n",
- mutex, (ulong) os_thread_pf(mutex->thread_id),
+ "Mutex %p owned by thread %lu"
+ " file %s line %lu\n",
+ (void*) mutex,
+ (ulong) os_thread_pf(mutex->thread_id),
mutex->file_name, (ulong) mutex->line);
sync_array_cell_print(stderr, cell);
@@ -678,8 +717,7 @@ sync_array_detect_deadlock(
return(FALSE); /* No deadlock */
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
+ } else if (cell->request_type == RW_LOCK_EX) {
lock = cell->wait_object;
@@ -778,8 +816,7 @@ sync_arr_cell_can_wake_up(
return(TRUE);
}
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
+ } else if (cell->request_type == RW_LOCK_EX) {
lock = cell->wait_object;
@@ -808,47 +845,101 @@ sync_arr_cell_can_wake_up(
return(FALSE);
}
-/**********************************************************************
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
+/**************************************************************************
+Looks for the cells in the wait array which refer to the wait object
+specified, and sets their corresponding events to the signaled state. In this
+way releases the threads waiting for the object to contend for the object.
+It is possible that no such cell is found, in which case does nothing. */
void
-sync_array_free_cell(
-/*=================*/
+sync_array_signal_object(
+/*=====================*/
sync_array_t* arr, /* in: wait array */
- ulint index) /* in: index of the cell in array */
+ void* object) /* in: wait object */
{
sync_cell_t* cell;
+ ulint count;
+ ulint i;
+ ulint res_count;
+
+ /* We store the addresses of cells we need to signal and signal
+ them only after we have released the sync array's mutex (for
+ performance reasons). cell_count is the number of such cells, and
+ cell_ptr points to the first one. If there are less than
+ UT_ARR_SIZE(cells) of them, cell_ptr == &cells[0], otherwise
+ cell_ptr points to malloc'd memory that we must free. */
+
+ sync_cell_t* cells[100];
+ sync_cell_t** cell_ptr = &cells[0];
+ ulint cell_count = 0;
+ ulint cell_max_count = UT_ARR_SIZE(cells);
+
+ ut_a(100 == cell_max_count);
sync_array_enter(arr);
- cell = sync_array_get_nth_cell(arr, index);
+ arr->sg_count++;
- ut_a(cell->wait_object != NULL);
+ i = 0;
+ count = 0;
- cell->waiting = FALSE;
- cell->wait_object = NULL;
- cell->signal_count = 0;
+ /* We need to store this to a local variable because it is modified
+ inside the loop */
+ res_count = arr->n_reserved;
- ut_a(arr->n_reserved > 0);
- arr->n_reserved--;
+ while (count < res_count) {
- sync_array_exit(arr);
-}
+ cell = sync_array_get_nth_cell(arr, i);
-/**************************************************************************
-Increments the signalled count. */
+ if (cell->state == SC_RESERVED) {
-void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr) /* in: wait array */
-{
- sync_array_enter(arr);
+ count++;
+ if (cell->wait_object == object) {
+ cell->state = SC_WAKING_UP;
- arr->sg_count++;
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+
+ if (cell_count == cell_max_count) {
+ sync_cell_t** old_cell_ptr = cell_ptr;
+ size_t old_size, new_size;
+
+ old_size = cell_max_count
+ * sizeof(sync_cell_t*);
+ cell_max_count *= 2;
+ new_size = cell_max_count
+ * sizeof(sync_cell_t*);
+
+ cell_ptr = malloc(new_size);
+ ut_a(cell_ptr);
+ memcpy(cell_ptr, old_cell_ptr,
+ old_size);
+
+ if (old_cell_ptr != &cells[0]) {
+ free(old_cell_ptr);
+ }
+ }
+
+ cell_ptr[cell_count] = cell;
+ cell_count++;
+ }
+ }
+
+ i++;
+ }
sync_array_exit(arr);
+
+ for (i = 0; i < cell_count; i++) {
+ cell = cell_ptr[i];
+
+ cell->event_set = TRUE;
+ os_event_set(cell->event);
+ }
+
+ if (cell_ptr != &cells[0]) {
+ free(cell_ptr);
+ }
}
/**************************************************************************
@@ -868,41 +959,33 @@ sync_arr_wake_threads_if_sema_free(void)
sync_cell_t* cell;
ulint count;
ulint i;
+ ulint res_count;
sync_array_enter(arr);
i = 0;
count = 0;
- while (count < arr->n_reserved) {
+ /* We need to store this to a local variable because it is modified
+ inside the loop */
+
+ res_count = arr->n_reserved;
+
+ while (count < res_count) {
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
+ if (cell->state == SC_RESERVED) {
count++;
if (sync_arr_cell_can_wake_up(cell)) {
+ cell->state = SC_WAKING_UP;
+ cell->event_set = TRUE;
+ os_event_set(cell->event);
- if (cell->request_type == SYNC_MUTEX) {
- mutex_t* mutex;
-
- mutex = cell->wait_object;
- os_event_set(mutex->event);
-#ifdef __WIN__
- } else if (cell->request_type
- == RW_LOCK_WAIT_EX) {
- rw_lock_t* lock;
-
- lock = cell->wait_object;
- os_event_set(lock->wait_ex_event);
-#endif
- } else {
- rw_lock_t* lock;
-
- lock = cell->wait_object;
- os_event_set(lock->event);
- }
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
}
}
@@ -932,7 +1015,7 @@ sync_array_print_long_waits(void)
cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
- if (cell->wait_object != NULL && cell->waiting
+ if ((cell->state != SC_FREE)
&& difftime(time(NULL), cell->reservation_time) > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr);
@@ -940,7 +1023,7 @@ sync_array_print_long_waits(void)
noticed = TRUE;
}
- if (cell->wait_object != NULL && cell->waiting
+ if ((cell->state != SC_FREE)
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE;
@@ -989,25 +1072,20 @@ sync_array_output_info(
mutex */
{
sync_cell_t* cell;
- ulint count;
ulint i;
fprintf(file,
- "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
- (long) arr->res_count, (long) arr->sg_count);
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
+ "OS WAIT ARRAY INFO: reservation count %ld,"
+ " signal count %ld\n",
+ (long) arr->res_count,
+ (long) arr->sg_count);
+ for (i = 0; i < arr->n_cells; i++) {
cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
- count++;
+ if (cell->state != SC_FREE) {
sync_array_cell_print(file, cell);
}
-
- i++;
}
}
diff -Nrup a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
--- a/storage/innobase/sync/sync0rw.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/sync/sync0rw.c 2008-05-15 22:39:37 +02:00
@@ -151,11 +151,6 @@ rw_lock_create_func(
lock->last_x_file_name = "not yet reserved";
lock->last_s_line = 0;
lock->last_x_line = 0;
- lock->event = os_event_create(NULL);
-
-#ifdef __WIN__
- lock->wait_ex_event = os_event_create(NULL);
-#endif
mutex_enter(&rw_lock_list_mutex);
@@ -189,11 +184,6 @@ rw_lock_free(
mutex_free(rw_lock_get_mutex(lock));
mutex_enter(&rw_lock_list_mutex);
- os_event_free(lock->event);
-
-#ifdef __WIN__
- os_event_free(lock->wait_ex_event);
-#endif
if (UT_LIST_GET_PREV(list, lock)) {
ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
@@ -554,15 +544,7 @@ lock_loop:
rw_x_system_call_count++;
sync_array_reserve_cell(sync_primary_wait_array,
- lock,
-#ifdef __WIN__
- /* On windows RW_LOCK_WAIT_EX signifies
- that this thread should wait on the
- special wait_ex_event. */
- (state == RW_LOCK_WAIT_EX)
- ? RW_LOCK_WAIT_EX :
-#endif
- RW_LOCK_EX,
+ lock, RW_LOCK_EX,
file_name, line,
&index);
diff -Nrup a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
--- a/storage/innobase/sync/sync0sync.c 2008-03-31 11:36:53 +02:00
+++ b/storage/innobase/sync/sync0sync.c 2008-05-15 22:39:37 +02:00
@@ -95,47 +95,17 @@ have happened that the thread which was
it and did not see the waiters byte set to 1, a case which would lead the
other thread to an infinite wait.
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-=======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
+LEMMA 1: After a thread resets the event of the cell it reserves for waiting
+========
+for a mutex, some thread will eventually call sync_array_signal_object with
+the mutex as an argument. Thus no infinite wait is possible.
Proof: After making the reservation the thread sets the waiters field in the
mutex to 1. Then it checks that the mutex is still reserved by some thread,
or it reserves the mutex for itself. In any case, some thread (which may be
also some earlier thread, not necessarily the one currently holding the mutex)
will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-=======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
+sync_array_signal_object with the mutex as an argument.
Q.E.D. */
/* The number of system calls made in this module. Intended for performance
@@ -247,7 +217,6 @@ mutex_create_func(
os_fast_mutex_init(&(mutex->os_fast_mutex));
mutex->lock_word = 0;
#endif
- mutex->event = os_event_create(NULL);
mutex_set_waiters(mutex, 0);
#ifdef UNIV_DEBUG
mutex->magic_n = MUTEX_MAGIC_N;
@@ -331,8 +300,6 @@ mutex_free(
mutex_exit(&mutex_list_mutex);
}
- os_event_free(mutex->event);
-
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
os_fast_mutex_free(&(mutex->os_fast_mutex));
#endif
@@ -542,7 +509,8 @@ spin_loop:
if (mutex_test_and_set(mutex) == 0) {
/* Succeeded! Free the reserved wait cell */
- sync_array_free_cell(sync_primary_wait_array, index);
+ sync_array_free_cell_protected(sync_primary_wait_array,
+ index);
ut_d(mutex->thread_id = os_thread_get_curr_id());
#ifdef UNIV_SYNC_DEBUG
@@ -623,8 +591,8 @@ mutex_signal_object(
/* The memory order of resetting the waiters field and
signaling the object is important. See LEMMA 1 above. */
- os_event_set(mutex->event);
- sync_array_object_signalled(sync_primary_wait_array);
+
+ sync_array_signal_object(sync_primary_wait_array, mutex);
}
#ifdef UNIV_SYNC_DEBUG
@@ -1162,7 +1130,6 @@ sync_thread_add_level(
break;
case SYNC_TREE_NODE:
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
- || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
break;
case SYNC_TREE_NODE_NEW:
diff -Nrup a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
--- a/storage/innobase/trx/trx0trx.c 2008-04-03 15:46:22 +02:00
+++ b/storage/innobase/trx/trx0trx.c 2008-05-15 22:39:38 +02:00
@@ -933,7 +933,6 @@ trx_commit_off_kernel(
trx->rseg = NULL;
trx->undo_no = ut_dulint_zero;
trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
- trx->mysql_query_str = NULL;
ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
| Thread |
|---|
| • bk commit into 6.0 tree (tsmith:1.2639) | tim | 15 May |