3482 Marko Mäkelä 2011-02-07
WL#5458 clean-up and optimizations
When releasing multiple mutexes, release the coarsest mutex first,
hoping to help performance when a context switch occurs between the
releases. That is, release lock_sys->mutex before trx_sys->lock before
trx->mutex.
Reduce the hold time of trx->mutex.
Replace some if-else if-else checks with switch.
Add UNIV_UNLIKELY hints to or assertion-like checks or tests for rare
conditions.
lock_grant(), lock_rec_cancel(): lock_reset_lock_and_trx_wait() does
not need trx->mutex.
lock_deadlock_occurs(): Remove unnecessary local variable ret. On
LOCK_VICTIM_EXCEED_MAX_DEPTH, release trx->mutex immediately, because
lock_deadlock_trx_print() does not need it.
lock_print_info_all_transactions(): Remove a redundant goto statement.
lock_rec_insert_check_and_lock(): Do not acquire trx->mutex except for
the lock_rec_enqueue_waiting() call, because this code is invoked for
a running transaction by its serving thread.
lock_wait_table_reserve_slot(): Do not test twice for the end-of-loop
condition. Fix some formatting.
lock_wait_check_and_cancel(): Only acquire the mutexes when cancelling
the lock request.
que_thr_end_lock_wait(): Add an assertion and remove a redundant one.
que_thr_stop(): Remove a local variable. Use direct return instead.
row_search_for_mysql(): Replace if-else if-else with switch.
trx_purge_sys_create(): Remove unnecessary acquisition of
purge_sys->trx->mutex. The purge_sys is allocated in this function;
therefore the mutex does not provide any protection.
trx_purge_sys_close(): Assert that purge_sys->sess->trx == purge_sys->trx.
trx_rollback_or_clean_recovered(): Correct a typo.
trx_prepare(), trx_prepare_for_mysql(): Do not acquire or release
trx->mutex. This code is supposed to be invoked on a running
transaction by the serving thread. trx->state changes are protected by
lock_sys->mutex. Add assertions.
rb://546
modified:
storage/innobase/lock/lock0lock.c
storage/innobase/lock/lock0wait.c
storage/innobase/que/que0que.c
storage/innobase/row/row0sel.c
storage/innobase/trx/trx0purge.c
storage/innobase/trx/trx0roll.c
storage/innobase/trx/trx0trx.c
3481 Vasil Dimov 2011-02-07 [merge]
Merge mysql-5.5-innodb -> mysql-trunk-innodb
modified:
mysql-test/valgrind.supp
=== modified file 'storage/innobase/lock/lock0lock.c'
--- a/storage/innobase/lock/lock0lock.c revid:vasil.dimov@stripped10207092103-usuct0u785h0dno2
+++ b/storage/innobase/lock/lock0lock.c revid:marko.makela@strippede0adanjba07bzre4
@@ -2261,14 +2261,13 @@ lock_grant(
{
ut_ad(lock_mutex_own());
- trx_mutex_enter(lock->trx);
-
lock_reset_lock_and_trx_wait(lock);
+ trx_mutex_enter(lock->trx);
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
dict_table_t* table = lock->un_member.tab_lock.table;
- if (table->autoinc_trx == lock->trx) {
+ if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
fprintf(stderr,
"InnoDB: Error: trx already had"
" an AUTO-INC lock!\n");
@@ -2319,8 +2318,6 @@ lock_rec_cancel(
ut_ad(lock_mutex_own());
ut_ad(lock_get_type_low(lock) == LOCK_REC);
- trx_mutex_enter(lock->trx);
-
/* Reset the bit (there can be only one set bit) in the lock bitmap */
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2330,6 +2327,8 @@ lock_rec_cancel(
/* The following function releases the trx from lock wait */
+ trx_mutex_enter(lock->trx);
+
thr = que_thr_end_lock_wait(lock->trx);
if (thr != NULL) {
@@ -3488,7 +3487,6 @@ lock_deadlock_occurs(
trx_t* trx) /*!< in/out: transaction */
{
trx_t* mark_trx;
- ulint ret;
ulint cost = 0;
ut_ad(trx);
@@ -3520,15 +3518,22 @@ retry:
trx_mutex_enter(trx);
- ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
-
- switch (ret) {
+ switch (lock_deadlock_recursive(trx, trx, lock, &cost, 0)) {
case LOCK_VICTIM_IS_OTHER:
/* We chose some other trx as a victim: retry if there still
is a deadlock */
goto retry;
case LOCK_VICTIM_EXCEED_MAX_DEPTH:
+ /* Release the mutex to obey the latching order.
+ This is safe, because lock_deadlock_occurs() is invoked
+ when a lock wait is enqueued for the currently running
+ transaction. Because trx is a running transaction
+ (it is not currently suspended because of a lock wait),
+ its state can only be changed by this thread, which is
+ currently associated with the transaction. */
+ trx_mutex_exit(trx);
+
/* If the lock search exceeds the max step
or the max depth, the current trx will be
the victim. Print its information. */
@@ -3540,32 +3545,31 @@ retry:
" FOLLOWING TRANSACTION \n\n"
"*** TRANSACTION:\n");
- /* To obey the latching order */
- trx_mutex_exit(trx);
-
lock_deadlock_trx_print(trx, 3000);
- trx_mutex_enter(trx);
-
lock_deadlock_fputs(
"*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
lock_deadlock_lock_print(lock);
+ trx_mutex_enter(trx);
break;
case LOCK_VICTIM_IS_START:
lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
break;
- default:
- /* No deadlock detected*/
+ case LOCK_VICTIM_NONE:
+ /* No deadlock detected */
+ ut_ad(trx_mutex_own(trx));
+ ut_ad(lock_mutex_own());
return(FALSE);
}
lock_deadlock_found = TRUE;
ut_ad(trx_mutex_own(trx));
+ ut_ad(lock_mutex_own());
return(TRUE);
}
@@ -3658,7 +3662,6 @@ lock_deadlock_recursive(
if (lock_trx == start) {
- /* To obey the latching order */
trx_mutex_exit(start);
/* We came back to the recursion starting
@@ -4771,9 +4774,8 @@ loop:
if (trx == NULL) {
- rw_lock_s_unlock(&trx_sys->lock);
-
lock_mutex_exit();
+ rw_lock_s_unlock(&trx_sys->lock);
ut_ad(lock_validate());
@@ -4858,9 +4860,8 @@ loop:
goto print_rec;
}
- rw_lock_s_unlock(&trx_sys->lock);
-
lock_mutex_exit();
+ rw_lock_s_unlock(&trx_sys->lock);
mtr_start(&mtr);
@@ -4897,8 +4898,6 @@ print_rec:
nth_trx++;
nth_lock = 0;
-
- goto loop;
}
goto loop;
@@ -5257,10 +5256,10 @@ lock_validate(void)
}
}
- rw_lock_s_unlock(&trx_sys->lock);
-
lock_mutex_exit();
+ rw_lock_s_unlock(&trx_sys->lock);
+
return(TRUE);
}
#endif /* UNIV_DEBUG */
@@ -5307,8 +5306,9 @@ lock_rec_insert_check_and_lock(
next_rec_heap_no = page_rec_get_heap_no(next_rec);
lock_mutex_enter();
-
- trx_mutex_enter(trx);
+ /* Because this code is invoked for a running transaction by
+ the thread that is serving the transaction, it is not necessary
+ to hold trx->mutex here. */
/* When inserting a record into an index, the table must be at
least IX-locked or we must be building an index, in which case
@@ -5322,8 +5322,6 @@ lock_rec_insert_check_and_lock(
if (UNIV_LIKELY(lock == NULL)) {
/* We optimize CPU time usage in the simplest case */
- trx_mutex_exit(trx);
-
lock_mutex_exit();
if (!dict_index_is_clust(index)) {
@@ -5355,16 +5353,16 @@ lock_rec_insert_check_and_lock(
block, next_rec_heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */
+ trx_mutex_enter(trx);
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION,
block, next_rec_heap_no,
index, thr);
+ trx_mutex_exit(trx);
} else {
err = DB_SUCCESS;
}
- trx_mutex_exit(trx);
-
lock_mutex_exit();
switch (err) {
@@ -6222,9 +6220,8 @@ lock_trx_handle_wait(
err = DB_SUCCESS;
}
- trx_mutex_exit(trx);
-
lock_mutex_exit();
+ trx_mutex_exit(trx);
return(err);
}
=== modified file 'storage/innobase/lock/lock0wait.c'
--- a/storage/innobase/lock/lock0wait.c revid:vasil.dimov@oracle.com-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/lock/lock0wait.c revid:marko.makela@stripped10207112317-e0adanjba07bzre4
@@ -150,59 +150,48 @@ lock_wait_table_reserve_slot(
slot = lock_sys->waiting_threads;
- for (i = 0; i < OS_THREAD_MAX_N; ++i, ++slot) {
+ for (i = OS_THREAD_MAX_N; i--; ++slot) {
if (!slot->in_use) {
- break;
- }
- }
+ slot->in_use = TRUE;
+ slot->thr = thr;
+ slot->thr->slot = slot;
+ slot->id = os_thread_get_curr_id();
+ slot->handle = os_thread_get_curr();
- /* Check if we have run out of slots. */
- if (slot == lock_sys->waiting_threads+ OS_THREAD_MAX_N) {
+ if (slot->event == NULL) {
+ slot->event = os_event_create(NULL);
+ ut_a(slot->event);
+ }
- ut_print_timestamp(stderr);
+ os_event_reset(slot->event);
+ slot->suspended = TRUE;
+ slot->suspend_time = ut_time();
+ slot->wait_timeout = wait_timeout;
- fprintf(stderr,
- " InnoDB: There appear to be %lu user"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: We intentionally generate"
- " a seg fault to print a stack trace\n"
- "InnoDB: on Linux. But first we print"
- " a list of waiting threads.\n", (ulong) i);
+ if (slot == lock_sys->last_slot) {
+ ++lock_sys->last_slot;
+ }
- lock_wait_table_print();
-
- ut_error;
- } else {
+ ut_ad(lock_sys->last_slot
+ <= lock_sys->waiting_threads + OS_THREAD_MAX_N);
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->thr = thr;
- slot->thr->slot = slot;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
-
- if (slot->event == NULL) {
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
+ return(slot);
}
-
- os_event_reset(slot->event);
- slot->suspended = TRUE;
- slot->suspend_time = ut_time();
- slot->wait_timeout = wait_timeout;
}
- if (slot == lock_sys->last_slot) {
- ++lock_sys->last_slot;
- }
+ ut_print_timestamp(stderr);
- ut_ad(lock_sys->last_slot
- <= lock_sys->waiting_threads+ OS_THREAD_MAX_N);
+ fprintf(stderr,
+ " InnoDB: There appear to be %lu user"
+ " threads currently waiting\n"
+ "InnoDB: inside InnoDB, which is the"
+ " upper limit. Cannot continue operation.\n"
+ "InnoDB: As a last thing, we print"
+ " a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
- return(slot);
+ lock_wait_table_print();
+
+ ut_error;
}
/***************************************************************//**
@@ -245,7 +234,7 @@ lock_wait_suspend_thread(
if (thr->state == QUE_THR_RUNNING) {
- ut_ad(thr->is_active == TRUE);
+ ut_ad(thr->is_active);
/* The lock has already been released or this transaction
was chosen as a deadlock victim: no need to suspend */
@@ -256,13 +245,12 @@ lock_wait_suspend_thread(
trx->lock.was_chosen_as_deadlock_victim = FALSE;
}
- trx_mutex_exit(trx);
-
lock_wait_mutex_exit();
+ trx_mutex_exit(trx);
return;
}
- ut_ad(thr->is_active == FALSE);
+ ut_ad(!thr->is_active);
slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
@@ -283,8 +271,8 @@ lock_wait_suspend_thread(
os_event_set(srv_timeout_event);
- trx_mutex_exit(trx);
lock_wait_mutex_exit();
+ trx_mutex_exit(trx);
if (trx->declared_to_be_inside_innodb) {
@@ -395,7 +383,7 @@ lock_wait_release_thread_if_suspended(
/* We own both the lock mutex and the trx_t::mutex but not the
lock wait mutex. This is OK because other threads will see the state
- of this mutex as being in use and no other thread can change the state
+ of this slot as being in use and no other thread can change the state
of the slot to free unless that thread also owns the lock mutex. */
if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) {
=== modified file 'storage/innobase/que/que0que.c'
--- a/storage/innobase/que/que0que.c revid:vasil.dimov@oracle.com-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/que/que0que.c revid:marko.makela@stripped0207112317-e0adanjba07bzre4
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -206,6 +206,7 @@ que_thr_end_lock_wait(
ibool was_active;
ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(trx));
thr = trx->lock.wait_thr;
@@ -215,8 +216,6 @@ que_thr_end_lock_wait(
/* In MySQL this is the only possible state here */
ut_a(thr->state == QUE_THR_LOCK_WAIT);
- ut_ad(thr->state == QUE_THR_LOCK_WAIT);
-
was_active = thr->is_active;
que_thr_move_to_run_state(thr);
@@ -704,7 +703,6 @@ que_thr_stop(
que_thr_t* thr) /*!< in: query thread */
{
que_t* graph;
- ibool ret = TRUE;
trx_t* trx = thr_get_trx(thr);;
graph = thr->graph;
@@ -732,10 +730,10 @@ que_thr_stop(
} else {
ut_ad(graph->state == QUE_FORK_ACTIVE);
- ret = FALSE;
+ return(FALSE);
}
- return(ret);
+ return(TRUE);
}
/**********************************************************************//**
=== modified file 'storage/innobase/row/row0sel.c'
--- a/storage/innobase/row/row0sel.c revid:vasil.dimov@strippedm-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/row/row0sel.c revid:marko.makela@stripped7-e0adanjba07bzre4
@@ -4366,7 +4366,8 @@ no_gap_lock:
err = lock_trx_handle_wait(trx);
- if (err == DB_SUCCESS) {
+ switch (err) {
+ case DB_SUCCESS:
/* The lock was granted while we were
searching for the last committed version.
Do a normal locking read. */
@@ -4374,12 +4375,14 @@ no_gap_lock:
offsets = rec_get_offsets(
rec, index, offsets, ULINT_UNDEFINED,
&heap);
- break;
- } else if (err == DB_DEADLOCK) {
+ goto locks_ok;
+ case DB_DEADLOCK:
goto lock_wait_or_error;
- } else {
- ut_a(err == DB_LOCK_WAIT);
+ case DB_LOCK_WAIT:
err = DB_SUCCESS;
+ break;
+ default:
+ ut_error;
}
if (old_vers == NULL) {
@@ -4469,6 +4472,7 @@ no_gap_lock:
}
}
+locks_ok:
/* NOTE that at this point rec can be an old version of a clustered
index record built for a consistent read. We cannot assume after this
point that rec is on a buffer pool page. Functions like
=== modified file 'storage/innobase/trx/trx0purge.c'
--- a/storage/innobase/trx/trx0purge.c revid:vasil.dimov@stripped20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/trx/trx0purge.c revid:marko.makela@stripped7-e0adanjba07bzre4
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -160,8 +160,6 @@ trx_purge_sys_create(
ut_a(purge_sys->trx->sess == purge_sys->sess);
- trx_mutex_enter(purge_sys->trx);
-
/* A purge transaction is not a real transaction, we use a transaction
here only because the query threads code requires it. It is otherwise
quite unnecessary. We should get rid of it eventually. */
@@ -169,8 +167,6 @@ trx_purge_sys_create(
purge_sys->trx->start_time = ut_time();
purge_sys->trx->state = TRX_STATE_ACTIVE;
- trx_mutex_exit(purge_sys->trx);
-
purge_sys->query = trx_purge_graph_build(
purge_sys->trx, n_purge_threads);
@@ -187,8 +183,9 @@ trx_purge_sys_close(void)
que_graph_free(purge_sys->query);
ut_a(purge_sys->trx->id == 0);
+ ut_a(purge_sys->sess->trx == purge_sys->trx);
- purge_sys->sess->trx->state = TRX_STATE_NOT_STARTED;
+ purge_sys->trx->state = TRX_STATE_NOT_STARTED;
sess_close(purge_sys->sess);
=== modified file 'storage/innobase/trx/trx0roll.c'
--- a/storage/innobase/trx/trx0roll.c revid:vasil.dimov@strippedom-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/trx/trx0roll.c revid:marko.makela@stripped317-e0adanjba07bzre4
@@ -743,7 +743,7 @@ trx_rollback_or_clean_recovered(
ut_ad(trx->in_trx_list);
/* If this function does a cleanup or rollback
- then it will release the trx sys mutex, therefore
+ then it will release the trx_sys->lock, therefore
we need to reacquire it before retrying the loop. */
if (trx_rollback_resurrected(trx, all)) {
=== modified file 'storage/innobase/trx/trx0trx.c'
--- a/storage/innobase/trx/trx0trx.c revid:vasil.dimov@stripped0dno2
+++ b/storage/innobase/trx/trx0trx.c revid:marko.makela@stripped
@@ -1445,18 +1445,19 @@ static
void
trx_prepare(
/*========*/
- trx_t* trx) /*!< in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
{
trx_rseg_t* rseg;
- ib_uint64_t lsn = 0;
+ ib_uint64_t lsn;
mtr_t mtr;
rseg = trx->rseg;
+ /* Only fresh user transactions can be prepared.
+ Recovered transactions cannot. */
+ ut_a(!trx->is_recovered);
if (trx->insert_undo != NULL || trx->update_undo != NULL) {
- trx_mutex_exit(trx);
-
mtr_start(&mtr);
/* Change the undo log segment states from TRX_UNDO_ACTIVE
@@ -1489,16 +1490,13 @@ trx_prepare(
world */
/*--------------*/
lsn = mtr.end_lsn;
-
- trx_mutex_enter(trx);
+ ut_ad(lsn);
+ } else {
+ lsn = 0;
}
- ut_ad(trx_mutex_own(trx));
-
- /* Note: This state change is only covered by the trx_t::mutex and
- not the trx_sys_t::lock. */
-
/*--------------------------------------*/
+ ut_a(trx->state == TRX_STATE_ACTIVE);
trx->state = TRX_STATE_PREPARED;
/*--------------------------------------*/
@@ -1520,8 +1518,6 @@ trx_prepare(
TODO: find out if MySQL holds some mutex when calling this.
That would spoil our group prepare algorithm. */
- trx_mutex_exit(trx);
-
if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
@@ -1544,8 +1540,6 @@ trx_prepare(
} else {
ut_error;
}
-
- trx_mutex_enter(trx);
}
}
@@ -1559,15 +1553,11 @@ trx_prepare_for_mysql(
{
trx_start_if_not_started_xa(trx);
- trx_mutex_enter(trx);
-
trx->op_info = "preparing";
trx_prepare(trx);
trx->op_info = "";
-
- trx_mutex_exit(trx);
}
/**********************************************************************//**
Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20110207112317-e0adanjba07bzre4.bundle
| Thread |
|---|
| • bzr push into mysql-trunk-innodb branch (marko.makela:3481 to 3482) WL#5458 | marko.makela | 7 Feb |