#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-6.0-falcon/
2699 Vladislav Vaintroub 2008-06-11
Bug#37251 - Livelock between UPDATE and DELETE threads
The problem was a bug in deadlock detection code.
2 threads with mutual dependencies doing deadlock at exactly
same time both decide there is no deadlock, add themselves
to waitFor list, thus building a cycle in it. Both threads
will wait for each other until lock wait timeout happens
(a deadlock). Meanwhile, any other thread that traverses
waitFor list may enter the cycle described above and will
loop until deadlock is resolved.
Solution here is to modify deadlock detection to add dependency
first, than check, then remove dependency if cycle is found.
Care is taken to prevent compiler optimizations and hopefully
the combination of volatile specifier and COMPARE_EXHANGE will
generate a memory barrier to prevent reading elements while
they are modified.
Note, that instead of this fancy solution, plain old SyncObject
could be used. The loop is short, the lock is cheap,
but I can't estimate contention for the moment. If current lock-free
implementation turns out to be buggy, this alternative can be
considered.
No testcase provided, the situation described in bug report
can not be reliably reproduced.
modified:
storage/falcon/Transaction.cpp
storage/falcon/Transaction.h
per-file messages:
storage/falcon/Transaction.cpp
New overload for waitForTransaction, put all deadlock detection logic
here instead of having the same code on 2 different places.
storage/falcon/Transaction.h
Transaction::waitingFor member has been made volatile (can be used concurrently by
different threads)
New overload for Transaction::waitForTransaction, put all deadlock detection logic
here instead of having the same code on 2 different places.
=== modified file 'storage/falcon/Transaction.cpp'
--- a/storage/falcon/Transaction.cpp 2008-05-21 14:58:08 +0000
+++ b/storage/falcon/Transaction.cpp 2008-06-11 13:03:37 +0000
@@ -44,6 +44,8 @@
#include "SRLSavepointRollback.h"
#include "Bitmap.h"
#include "BackLog.h"
+#include "Interlock.h"
+#include "Error.h"
extern uint falcon_lock_wait_timeout;
@@ -865,19 +867,17 @@ State Transaction::getRelativeState(Tran
if (flags & DO_NOT_WAIT)
return Active;
- // If waiting would cause a deadlock, don't try it
+ Sync syncActiveTransactions(
+ &(database->transactionManager->activeTransactions.syncObject),
+ "Transaction::getRelativeState");
- for (Transaction *trans = transaction->waitingFor; trans; trans =
trans->waitingFor)
- if (trans == this)
- return Deadlock;
+ syncActiveTransactions.lock(Shared);
- // OK, add a reference to the transaction to keep the object around, then wait for it
go go away
+ bool isDeadlock;
+ waitForTransaction(transaction, &syncActiveTransactions, &isDeadlock);
- transaction->addRef();
- waitingFor = transaction;
- transaction->waitForTransaction();
- waitingFor = NULL;
- transaction->release();
+ if (isDeadlock)
+ return Deadlock;
return WasActive; // caller will need to re-fetch
}
@@ -968,31 +968,69 @@ bool Transaction::waitForTransaction(Tra
break;
// If the transction is no longer active, see if it is committed
-
+
if (!transaction || transaction->state == Available)
return true;
if (transaction->state == Committed)
return true;
- // If waiting would cause a deadlock, don't try it
+ bool deadlock;
+ State state = waitForTransaction(transaction, &syncActiveTransactions,
&deadlock);
- for (Transaction *trans = transaction->waitingFor; trans; trans =
trans->waitingFor)
- if (trans == this)
- return true;
+ if (deadlock)
+ return true;
- // OK, add a reference to the transaction to keep the object around, then wait for it to
go away
+ return (state == Committed);
- waitingFor = transaction;
+}
+
+// Wait for transaction, unless it would lead to deadlock.
+// Returns the state of transation.
+//
+// Note:
+// Deadlock check could use locking, because there are potentially concurrent
+// threads checking and modifying the waitFor list.
+// Instead, it implements a fancy lock-free algorithm that works reliably only
+// with full memory barriers. Thus "volatile"-specifier and COMPARE_EXCHANGE
+// are used when traversing and modifying waitFor list. Maybe it is better to
+// use inline assembly or intrinsics to generate memory barrier instead of
+// volatile.
+
+State Transaction::waitForTransaction(Transaction *transaction,Sync *sync, bool
*deadlock)
+{
+
+ *deadlock = false;
transaction->addRef();
- syncActiveTransactions.unlock();
- transaction->waitForTransaction();
+
+ if (!COMPARE_EXCHANGE_POINTER(&waitingFor, NULL, transaction))
+ FATAL("waitingFor was not NULL");
+
+ volatile Transaction *trans;
+ for (trans = transaction->waitingFor; trans; trans = trans->waitingFor)
+ if (trans == this)
+ {
+ *deadlock = true;
+ break;
+ }
+
+ if (!(*deadlock))
+ {
+ if (sync)
+ sync->unlock();
+
+ transaction->waitForTransaction();
+ }
+
+ if (!COMPARE_EXCHANGE_POINTER(&waitingFor, transaction, NULL))
+ FATAL("waitingFor was not %p",transaction);
+
+ State state = (State)transaction->state;
transaction->release();
- waitingFor = NULL;
- return transaction->state == Committed;
-}
+ return state;
+}
void Transaction::waitForTransaction()
{
/***
=== modified file 'storage/falcon/Transaction.h'
--- a/storage/falcon/Transaction.h 2008-05-21 14:58:08 +0000
+++ b/storage/falcon/Transaction.h 2008-06-11 13:03:37 +0000
@@ -102,6 +102,7 @@ public:
void addRef();
void waitForTransaction();
bool waitForTransaction (TransId transId);
+ State waitForTransaction (Transaction *transaction, Sync *sync, bool *deadlock);
void dropTable(Table* table);
void truncateTable(Table* table);
bool hasUncommittedRecords(Table* table);
@@ -145,7 +146,7 @@ public:
int curSavePointId;
Transaction *next; // next in database
Transaction *prior; // next in database
- Transaction *waitingFor;
+ volatile Transaction *waitingFor;
SavePoint *savePoints;
SavePoint *freeSavePoints;
SavePoint localSavePoints[LOCAL_SAVE_POINTS];