#At file:///home/marty/MySQL/mysql-5.1-telco-7.0_new/
2881 Martin Skold 2009-05-05 [merge]
WL#4331 Ensuring resilience against master node failures (Ndb): Fixing failures in testDict -n SchemaTrans
modified:
storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp
storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp
storage/ndb/test/ndbapi/testDict.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2009-04-15 14:08:26 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2009-05-05 07:42:07 +0000
@@ -17859,7 +17859,16 @@ void Dbdict::check_takeover_replies(Sign
pending_trans = c_schemaTransList.next(trans_ptr);
}
- masterNodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
+ /*
+ Initialize all node recovery states
+ */
+ for (unsigned i = 1; i < MAX_NDB_NODES; i++) {
+ jam();
+ NodeRecordPtr nodePtr;
+ c_nodes.getPtr(nodePtr, i);
+ nodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
+ }
+
pending_trans = c_schemaTransList.first(trans_ptr);
while (pending_trans)
{
@@ -17875,7 +17884,6 @@ void Dbdict::check_takeover_replies(Sign
{
jam();
c_nodes.getPtr(nodePtr, i);
- nodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
#ifdef VM_TRACE
ndbout_c("Node %u had %u operations, master has %u",i , nodePtr.p->takeOverConf.op_count, masterNodePtr.p->takeOverConf.op_count);
#endif
@@ -17892,7 +17900,6 @@ void Dbdict::check_takeover_replies(Sign
#ifdef VM_TRACE
ndbout_c("Node %u had no operations for transaction %u, ignore it when aborting", i, trans_ptr.p->trans_key);
#endif
- nodePtr.p->recoveryState = NodeRecord::RS_PARTIAL_ROLLBACK;
nodePtr.p->start_op = 0;
nodePtr.p->start_op_state = SchemaOp::OS_PARSED;
}
@@ -22257,6 +22264,9 @@ Dbdict::seizeSchemaTrans(SchemaTransPtr&
c_opRecordSequence = trans_key;
return true;
}
+#ifdef MARTIN
+ ndbout_c("Dbdict::seizeSchemaTrans: Failed to seize schema trans");
+#endif
return false;
}
@@ -23304,7 +23314,7 @@ Dbdict::check_partial_trans_abort_parse_
jam();
c_nodes.getPtr(nodePtr, i);
#ifdef VM_TRACE
- ndbout_c("Checking node %u(%u), %u<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, op_ptr.p->op_key);
+ ndbout_c("Checking node %u(%u), %u(%u)<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, nodePtr.p->start_op_state, op_ptr.p->op_key);
#endif
if (nodePtr.p->recoveryState == NodeRecord::RS_PARTIAL_ROLLBACK &&
//nodePtr.p->start_op_state == SchemaOp::OS_PARSED &&
@@ -23461,13 +23471,15 @@ Dbdict::check_partial_trans_abort_prepar
{
c_nodes.getPtr(nodePtr, i);
#ifdef VM_TRACE
- ndbout_c("Checking node %u(%u), %u<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, op_ptr.p->op_key);
+ ndbout_c("Checking node %u(%u), %u(%u)<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, nodePtr.p->start_op_state, op_ptr.p->op_key);
#endif
if (nodePtr.p->recoveryState == NodeRecord::RS_PARTIAL_ROLLBACK &&
+ (nodePtr.p->start_op_state == SchemaOp::OS_PARSED &&
+ nodePtr.p->start_op <= op_ptr.p->op_key) ||
(nodePtr.p->start_op_state == SchemaOp::OS_PREPARED &&
nodePtr.p->start_op < op_ptr.p->op_key) ||
(nodePtr.p->start_op_state == SchemaOp::OS_ABORTED_PREPARE &&
- nodePtr.p->start_op > op_ptr.p->op_key))
+ nodePtr.p->start_op >= op_ptr.p->op_key))
{
#ifdef VM_TRACE
@@ -24857,7 +24869,8 @@ Dbdict::slave_run_flush(Signal *signal,
else
{
jam();
- ndbrequire(trans_ptr.p->m_state == SchemaTrans::TS_STARTED);
+ ndbrequire(trans_ptr.p->m_state == SchemaTrans::TS_STARTED ||
+ trans_ptr.p->m_state == SchemaTrans::TS_ABORTING_PARSE);
trans_ptr.p->m_state = SchemaTrans::TS_FLUSH_PREPARE;
}
do_flush = trans_ptr.p->m_flush_prepare;
=== modified file 'storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp'
--- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp 2009-03-30 13:11:17 +0000
+++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp 2009-04-17 12:32:02 +0000
@@ -7629,19 +7629,21 @@ int
NdbDictionaryImpl::beginSchemaTrans()
{
DBUG_ENTER("beginSchemaTrans");
- if (m_tx.m_transOn) {
+ if (m_tx.m_state == NdbDictInterface::Tx::Started) {
m_error.code = 4410;
DBUG_RETURN(-1);
}
// TODO real transId
m_tx.m_transId = rand();
+ m_tx.m_state = NdbDictInterface::Tx::Started;
+ m_tx.m_error.code = 0;
if (m_tx.m_transId == 0)
m_tx.m_transId = 1;
int ret = m_receiver.beginSchemaTrans();
if (ret == -1) {
+ m_tx.m_state = NdbDictInterface::Tx::NotStarted;
DBUG_RETURN(-1);
}
- m_tx.m_transOn = true;
DBUG_PRINT("info", ("transId: %x transKey: %x",
m_tx.m_transId, m_tx.m_transKey));
DBUG_RETURN(0);
@@ -7651,40 +7653,48 @@ int
NdbDictionaryImpl::endSchemaTrans(Uint32 flags)
{
DBUG_ENTER("endSchemaTrans");
- if (! m_tx.m_transOn) {
+ if (m_tx.m_state == NdbDictInterface::Tx::NotStarted) {
DBUG_RETURN(0);
}
/*
Check if schema transaction has been aborted
already, for example because of master node failure.
*/
- if (m_error.code == 787)
+ if (m_tx.m_state != NdbDictInterface::Tx::Started)
{
m_tx.m_op.clear();
- if (flags & NdbDictionary::Dictionary::SchemaTransAbort)
+ DBUG_PRINT("info", ("endSchemaTrans: state %u, flags 0x%x\n", m_tx.m_state, flags));
+ if (m_tx.m_state == NdbDictInterface::Tx::Aborted && // rollback at master takeover
+ flags & NdbDictionary::Dictionary::SchemaTransAbort)
{
- m_error.code = 0;
+ m_tx.m_error.code = 0;
DBUG_RETURN(0);
}
+ m_error.code = m_tx.m_error.code;
DBUG_RETURN(-1);
}
DBUG_PRINT("info", ("transId: %x transKey: %x",
m_tx.m_transId, m_tx.m_transKey));
int ret = m_receiver.endSchemaTrans(flags);
- m_tx.m_transOn = false;
- if (ret == -1) {
+ if (ret == -1 || m_tx.m_error.code != 0) {
+ DBUG_PRINT("info", ("endSchemaTrans: state %u, flags 0x%x\n", m_tx.m_state, flags));
+ if (m_tx.m_state == NdbDictInterface::Tx::Committed && // rollforward at master takeover
+ !(flags & NdbDictionary::Dictionary::SchemaTransAbort))
+ goto committed;
m_tx.m_op.clear();
- if (m_error.code == 787)
+ if (m_tx.m_state == NdbDictInterface::Tx::Aborted && // rollback at master takeover
+ flags & NdbDictionary::Dictionary::SchemaTransAbort)
{
- if (flags & NdbDictionary::Dictionary::SchemaTransAbort)
- {
- m_error.code = 0;
- DBUG_RETURN(0);
- }
+ m_error.code = m_tx.m_error.code = 0;
+ m_tx.m_state = NdbDictInterface::Tx::NotStarted;
+ DBUG_RETURN(0);
}
+ if (m_tx.m_error.code != 0)
+ m_error.code = m_tx.m_error.code;
+ m_tx.m_state = NdbDictInterface::Tx::NotStarted;
DBUG_RETURN(-1);
}
-
+committed:
// invalidate old version of altered table
uint i;
for (i = 0; i < m_tx.m_op.size(); i++) {
@@ -7699,6 +7709,7 @@ NdbDictionaryImpl::endSchemaTrans(Uint32
abort();
}
}
+ m_tx.m_state = NdbDictInterface::Tx::NotStarted;
m_tx.m_op.clear();
DBUG_RETURN(0);
}
@@ -7813,6 +7824,7 @@ NdbDictInterface::execSCHEMA_TRANS_END_R
const SchemaTransEndRef* ref =
CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
m_error.code = ref->errorCode;
+ m_tx.m_error.code = ref->errorCode;
m_masterNodeId = ref->masterNodeId;
m_waiter.signal(NO_WAIT);
}
@@ -7823,7 +7835,11 @@ NdbDictInterface::execSCHEMA_TRANS_END_R
{
const SchemaTransEndRep* rep =
CAST_CONSTPTR(SchemaTransEndRep, signal->getDataPtr());
- m_error.code = rep->errorCode;
+ (rep->errorCode == 0) ?
+ m_tx.m_state = Tx::Committed
+ :
+ m_tx.m_state = Tx::Aborted;
+ m_tx.m_error.code = rep->errorCode;
m_masterNodeId = rep->masterNodeId;
m_waiter.signal(NO_WAIT);
}
=== modified file 'storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp 2009-03-31 14:35:37 +0000
+++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp 2009-04-03 12:52:34 +0000
@@ -553,20 +553,29 @@ public:
Uint32 m_gsn;
NdbTableImpl* m_impl;
};
- bool m_transOn;
+ enum State {
+ NotStarted,
+ Started,
+ Committed,
+ Aborted
+ };
+ State m_state;
+ NdbError m_error;
Uint32 m_transId; // API
Uint32 m_transKey; // DICT
Vector<Op> m_op;
Tx() :
- m_transOn(false),
+ m_state(NotStarted),
m_transId(0),
m_transKey(0)
- {}
+ {
+ m_error.code = 0;
+ }
Uint32 transId() const {
- return m_transOn ? m_transId : 0;
+ return (m_state == Started) ? m_transId : 0;
}
Uint32 transKey() const {
- return m_transOn ? m_transKey : 0;
+ return (m_state == Started) ? m_transKey : 0;
}
Uint32 requestFlags() const {
Uint32 flags = 0;
@@ -851,7 +860,8 @@ public:
int beginSchemaTrans();
int endSchemaTrans(Uint32 flags);
- bool hasSchemaTrans() const { return m_tx.m_transOn; }
+ bool hasSchemaTrans() const
+ { return (m_tx.m_state == NdbDictInterface::Tx::Started); }
NdbDictInterface::Tx m_tx;
const NdbError & getNdbError() const;
=== modified file 'storage/ndb/test/ndbapi/testDict.cpp'
--- a/storage/ndb/test/ndbapi/testDict.cpp 2009-03-17 15:49:46 +0000
+++ b/storage/ndb/test/ndbapi/testDict.cpp 2009-04-03 12:55:18 +0000
@@ -3958,6 +3958,7 @@ st_do_errins(ST_Con& c, ST_Errins& errin
}
g_info << "errins: " << errins << endl;
chk2(c.restarter->insertErrorInNode(errins.node, errins.value) == 0, errins);
+ c.restarter->get_status(); // do sync call to ensure error has been inserted
return 0;
err:
return -1;
@@ -4531,7 +4532,9 @@ static int
st_end_trans(ST_Con& c, uint flags)
{
g_info << "end trans flags:" << hex << flags << endl;
- chk2(c.dic->endSchemaTrans(flags) == 0, c.dic->getNdbError());
+ int res= c.dic->endSchemaTrans(flags);
+ g_info << "end trans result:" << res << endl;
+ chk2(res == 0, c.dic->getNdbError());
c.tx_on = false;
c.tx_commit = !(flags & ST_AbortFlag);
st_set_commit_all(c);
@@ -4544,10 +4547,12 @@ static int
st_end_trans_aborted(ST_Con& c, uint flags)
{
g_info << "end trans flags:" << hex << flags << endl;
+ int res= c.dic->endSchemaTrans(flags);
+ g_info << "end trans result:" << res << endl;
if (flags & ST_AbortFlag)
- chk1(c.dic->endSchemaTrans(flags) == 0);
+ chk1(res == 0);
else
- chk1(c.dic->endSchemaTrans(flags) != 0);
+ chk1(res != 0);
c.tx_on = false;
c.tx_commit = (flags & ST_AbortFlag);
return 0;
@@ -5708,10 +5713,15 @@ st_test_mnf_prepare(ST_Con& c, int arg =
}
else
chk1(st_end_trans_aborted(c, errins, ST_CommitFlag) == 0);
- st_wait_db_node_up(c, master);
+ chk1(c.restarter->waitClusterStarted() == 0);
+ //st_wait_db_node_up(c, master);
for (i = 0; i < c.tabcount; i++) {
ST_Tab& tab = c.tab(i);
- chk1(st_verify_table(c, tab) == -1);
+ // Verify that table is not in db
+ c.dic->invalidateTable(tab.name);
+ const NdbDictionary::Table* pTab =
+ NDBT_Table::discoverTableFromDb(c.ndb, tab.name);
+ chk1(pTab == NULL);
}
return NDBT_OK;
err:
@@ -5738,7 +5748,8 @@ st_test_mnf_commit1(ST_Con& c, int arg =
}
else
chk1(st_end_trans(c, errins, ST_CommitFlag) == 0);
- st_wait_db_node_up(c, master);
+ chk1(c.restarter->waitClusterStarted() == 0);
+ //st_wait_db_node_up(c, master);
for (i = 0; i < c.tabcount; i++) {
ST_Tab& tab = c.tab(i);
chk1(st_verify_table(c, tab) == 0);
@@ -5769,7 +5780,8 @@ st_test_mnf_commit2(ST_Con& c, int arg =
}
else
chk1(st_end_trans(c, errins, ST_CommitFlag) == 0);
- st_wait_db_node_up(c, master);
+ chk1(c.restarter->waitClusterStarted() == 0);
+ //st_wait_db_node_up(c, master);
chk1(st_verify_all(c) == 0);
for (i = 0; i < c.tabcount; i++) {
ST_Tab& tab = c.tab(i);
@@ -5818,7 +5830,8 @@ st_test_mnf_run_commit(ST_Con& c, int ar
verify:
g_info << "wait for master node to come up" << endl;
- st_wait_db_node_up(c, master);
+ chk1(c.restarter->waitClusterStarted() == 0);
+ //st_wait_db_node_up(c, master);
g_info << "verify all" << endl;
for (i = 0; i < c.tabcount; i++) {
ST_Tab& tab = c.tab(i);
@@ -5864,7 +5877,8 @@ st_test_mnf_run_abort(ST_Con& c, int arg
chk1(st_end_trans_aborted(c, ST_AbortFlag) == 0);
g_info << "wait for master node to come up" << endl;
- st_wait_db_node_up(c, master);
+ chk1(c.restarter->waitClusterStarted() == 0);
+ //st_wait_db_node_up(c, master);
g_info << "verify all" << endl;
for (i = 0; i < c.tabcount; i++) {
ST_Tab& tab = c.tab(i);
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-7.0 branch (Martin.Skold:2881) WL#4331 | Martin Skold | 5 May |