List:Commits« Previous MessageNext Message »
From:Martin Skold Date:May 5 2009 7:42am
Subject:bzr commit into mysql-5.1-telco-7.0 branch (Martin.Skold:2881) WL#4331
View as plain text  
#At file:///home/marty/MySQL/mysql-5.1-telco-7.0_new/

 2881 Martin Skold	2009-05-05 [merge]
      WL#4331 Ensuring resilience against master node failures (Ndb): Fixing failures in testDict -n SchemaTrans
      modified:
        storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
        storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp
        storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp
        storage/ndb/test/ndbapi/testDict.cpp

=== modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2009-04-15 14:08:26 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2009-05-05 07:42:07 +0000
@@ -17859,7 +17859,16 @@ void Dbdict::check_takeover_replies(Sign
     pending_trans = c_schemaTransList.next(trans_ptr);
   }
 
-  masterNodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
+  /* 
+     Initialize all node recovery states 
+  */
+  for (unsigned i = 1; i < MAX_NDB_NODES; i++) {
+    jam();
+    NodeRecordPtr nodePtr;
+    c_nodes.getPtr(nodePtr, i);
+    nodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
+  }
+
   pending_trans = c_schemaTransList.first(trans_ptr);
   while (pending_trans)
   {
@@ -17875,7 +17884,6 @@ void Dbdict::check_takeover_replies(Sign
       {
         jam();
         c_nodes.getPtr(nodePtr, i);
-        nodePtr.p->recoveryState = NodeRecord::RS_NORMAL;
 #ifdef VM_TRACE
         ndbout_c("Node %u had %u operations, master has %u",i , nodePtr.p->takeOverConf.op_count, masterNodePtr.p->takeOverConf.op_count);
 #endif        
@@ -17892,7 +17900,6 @@ void Dbdict::check_takeover_replies(Sign
 #ifdef VM_TRACE
             ndbout_c("Node %u had no operations for  transaction %u, ignore it when aborting", i, trans_ptr.p->trans_key);
 #endif
-            nodePtr.p->recoveryState = NodeRecord::RS_PARTIAL_ROLLBACK;
             nodePtr.p->start_op = 0;
             nodePtr.p->start_op_state = SchemaOp::OS_PARSED;
           }
@@ -22257,6 +22264,9 @@ Dbdict::seizeSchemaTrans(SchemaTransPtr&
     c_opRecordSequence = trans_key;
     return true;
   }
+#ifdef MARTIN
+  ndbout_c("Dbdict::seizeSchemaTrans: Failed to seize schema trans");
+#endif
   return false;
 }
 
@@ -23304,7 +23314,7 @@ Dbdict::check_partial_trans_abort_parse_
         jam();
         c_nodes.getPtr(nodePtr, i);
 #ifdef VM_TRACE
-        ndbout_c("Checking node %u(%u), %u<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, op_ptr.p->op_key);
+        ndbout_c("Checking node %u(%u), %u(%u)<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, nodePtr.p->start_op_state, op_ptr.p->op_key);
 #endif
         if (nodePtr.p->recoveryState == NodeRecord::RS_PARTIAL_ROLLBACK &&
             //nodePtr.p->start_op_state == SchemaOp::OS_PARSED &&
@@ -23461,13 +23471,15 @@ Dbdict::check_partial_trans_abort_prepar
       {
         c_nodes.getPtr(nodePtr, i);
 #ifdef VM_TRACE
-        ndbout_c("Checking node %u(%u), %u<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, op_ptr.p->op_key);
+        ndbout_c("Checking node %u(%u), %u(%u)<%u", nodePtr.i, nodePtr.p->recoveryState, nodePtr.p->start_op, nodePtr.p->start_op_state, op_ptr.p->op_key);
 #endif
         if (nodePtr.p->recoveryState == NodeRecord::RS_PARTIAL_ROLLBACK &&
+            (nodePtr.p->start_op_state == SchemaOp::OS_PARSED &&
+              nodePtr.p->start_op <= op_ptr.p->op_key) ||
             (nodePtr.p->start_op_state == SchemaOp::OS_PREPARED &&
               nodePtr.p->start_op < op_ptr.p->op_key) ||
             (nodePtr.p->start_op_state == SchemaOp::OS_ABORTED_PREPARE &&
-             nodePtr.p->start_op > op_ptr.p->op_key))
+             nodePtr.p->start_op >= op_ptr.p->op_key))
                
         {
 #ifdef VM_TRACE
@@ -24857,7 +24869,8 @@ Dbdict::slave_run_flush(Signal *signal,
     else
     {
       jam();
-      ndbrequire(trans_ptr.p->m_state == SchemaTrans::TS_STARTED);
+      ndbrequire(trans_ptr.p->m_state == SchemaTrans::TS_STARTED ||
+                 trans_ptr.p->m_state == SchemaTrans::TS_ABORTING_PARSE);
       trans_ptr.p->m_state = SchemaTrans::TS_FLUSH_PREPARE;
     }
     do_flush = trans_ptr.p->m_flush_prepare;

=== modified file 'storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp'
--- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp	2009-03-30 13:11:17 +0000
+++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp	2009-04-17 12:32:02 +0000
@@ -7629,19 +7629,21 @@ int
 NdbDictionaryImpl::beginSchemaTrans()
 {
   DBUG_ENTER("beginSchemaTrans");
-  if (m_tx.m_transOn) {
+  if (m_tx.m_state == NdbDictInterface::Tx::Started) {
     m_error.code = 4410;
     DBUG_RETURN(-1);
   }
   // TODO real transId
   m_tx.m_transId = rand();
+  m_tx.m_state = NdbDictInterface::Tx::Started;
+  m_tx.m_error.code = 0;
   if (m_tx.m_transId == 0)
     m_tx.m_transId = 1;
   int ret = m_receiver.beginSchemaTrans();
   if (ret == -1) {
+    m_tx.m_state = NdbDictInterface::Tx::NotStarted;
     DBUG_RETURN(-1);
   }
-  m_tx.m_transOn = true;
   DBUG_PRINT("info", ("transId: %x transKey: %x",
                       m_tx.m_transId, m_tx.m_transKey));
   DBUG_RETURN(0);
@@ -7651,40 +7653,48 @@ int
 NdbDictionaryImpl::endSchemaTrans(Uint32 flags)
 {
   DBUG_ENTER("endSchemaTrans");
-  if (! m_tx.m_transOn) {
+  if (m_tx.m_state == NdbDictInterface::Tx::NotStarted) {
     DBUG_RETURN(0);
   }
   /*
     Check if schema transaction has been aborted
     already, for example because of master node failure.
    */
-  if (m_error.code == 787)
+  if (m_tx.m_state != NdbDictInterface::Tx::Started)
   {
     m_tx.m_op.clear();
-    if (flags & NdbDictionary::Dictionary::SchemaTransAbort)
+    DBUG_PRINT("info", ("endSchemaTrans: state %u, flags 0x%x\n", m_tx.m_state, flags));
+    if (m_tx.m_state == NdbDictInterface::Tx::Aborted && // rollback at master takeover
+        flags & NdbDictionary::Dictionary::SchemaTransAbort)
     {
-      m_error.code = 0;
+      m_tx.m_error.code = 0;
       DBUG_RETURN(0);
     }
+    m_error.code = m_tx.m_error.code;
     DBUG_RETURN(-1);
   }
   DBUG_PRINT("info", ("transId: %x transKey: %x",
                       m_tx.m_transId, m_tx.m_transKey));
   int ret = m_receiver.endSchemaTrans(flags);
-  m_tx.m_transOn = false;
-  if (ret == -1) {
+  if (ret == -1 || m_tx.m_error.code != 0) {
+    DBUG_PRINT("info", ("endSchemaTrans: state %u, flags 0x%x\n", m_tx.m_state, flags));
+    if (m_tx.m_state == NdbDictInterface::Tx::Committed && // rollforward at master takeover
+        !(flags & NdbDictionary::Dictionary::SchemaTransAbort))
+      goto committed;
     m_tx.m_op.clear();
-    if (m_error.code == 787)
+    if (m_tx.m_state == NdbDictInterface::Tx::Aborted && // rollback at master takeover
+        flags & NdbDictionary::Dictionary::SchemaTransAbort)
     {
-      if (flags & NdbDictionary::Dictionary::SchemaTransAbort)
-      {
-        m_error.code = 0;
-        DBUG_RETURN(0);
-      }
+      m_error.code = m_tx.m_error.code = 0;
+      m_tx.m_state = NdbDictInterface::Tx::NotStarted;
+      DBUG_RETURN(0);
     }
+    if (m_tx.m_error.code != 0)
+      m_error.code = m_tx.m_error.code;
+    m_tx.m_state = NdbDictInterface::Tx::NotStarted;
     DBUG_RETURN(-1);
   }
-
+committed:
   // invalidate old version of altered table
   uint i;
   for (i = 0; i < m_tx.m_op.size(); i++) {
@@ -7699,6 +7709,7 @@ NdbDictionaryImpl::endSchemaTrans(Uint32
         abort();
     }
   }
+  m_tx.m_state = NdbDictInterface::Tx::NotStarted;
   m_tx.m_op.clear();
   DBUG_RETURN(0);
 }
@@ -7813,6 +7824,7 @@ NdbDictInterface::execSCHEMA_TRANS_END_R
   const SchemaTransEndRef* ref =
     CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
   m_error.code = ref->errorCode;
+  m_tx.m_error.code = ref->errorCode;
   m_masterNodeId = ref->masterNodeId;
   m_waiter.signal(NO_WAIT);
 }
@@ -7823,7 +7835,11 @@ NdbDictInterface::execSCHEMA_TRANS_END_R
 {
   const SchemaTransEndRep* rep =
     CAST_CONSTPTR(SchemaTransEndRep, signal->getDataPtr());
-  m_error.code = rep->errorCode;
+  (rep->errorCode == 0) ?
+    m_tx.m_state = Tx::Committed
+    :
+    m_tx.m_state = Tx::Aborted;
+  m_tx.m_error.code = rep->errorCode;
   m_masterNodeId = rep->masterNodeId;
   m_waiter.signal(NO_WAIT);
 }

=== modified file 'storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp	2009-03-31 14:35:37 +0000
+++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp	2009-04-03 12:52:34 +0000
@@ -553,20 +553,29 @@ public:
       Uint32 m_gsn;
       NdbTableImpl* m_impl;
     };
-    bool m_transOn;
+    enum State {
+      NotStarted,
+      Started,
+      Committed,
+      Aborted
+    };
+    State m_state;
+    NdbError m_error;
     Uint32 m_transId;   // API
     Uint32 m_transKey;  // DICT
     Vector<Op> m_op;
     Tx() :
-      m_transOn(false),
+      m_state(NotStarted),
       m_transId(0),
       m_transKey(0)
-    {}
+    {
+      m_error.code = 0;
+    }
     Uint32 transId() const {
-      return m_transOn ? m_transId : 0;
+      return (m_state == Started) ? m_transId : 0;
     }
     Uint32 transKey() const {
-      return m_transOn ? m_transKey : 0;
+      return (m_state == Started) ? m_transKey : 0;
     }
     Uint32 requestFlags() const {
       Uint32 flags = 0;
@@ -851,7 +860,8 @@ public:
 
   int beginSchemaTrans();
   int endSchemaTrans(Uint32 flags);
-  bool hasSchemaTrans() const { return m_tx.m_transOn; }
+  bool hasSchemaTrans() const
+    { return (m_tx.m_state == NdbDictInterface::Tx::Started); }
   NdbDictInterface::Tx m_tx;
 
   const NdbError & getNdbError() const;

=== modified file 'storage/ndb/test/ndbapi/testDict.cpp'
--- a/storage/ndb/test/ndbapi/testDict.cpp	2009-03-17 15:49:46 +0000
+++ b/storage/ndb/test/ndbapi/testDict.cpp	2009-04-03 12:55:18 +0000
@@ -3958,6 +3958,7 @@ st_do_errins(ST_Con& c, ST_Errins& errin
   }
   g_info << "errins: " << errins << endl;
   chk2(c.restarter->insertErrorInNode(errins.node, errins.value) == 0, errins);
+  c.restarter->get_status(); // do sync call to ensure error has been inserted
   return 0;
 err:
   return -1;
@@ -4531,7 +4532,9 @@ static int
 st_end_trans(ST_Con& c, uint flags)
 {
   g_info << "end trans flags:" << hex << flags << endl;
-  chk2(c.dic->endSchemaTrans(flags) == 0, c.dic->getNdbError());
+  int res= c.dic->endSchemaTrans(flags);
+  g_info << "end trans result:" << res << endl;
+  chk2(res == 0, c.dic->getNdbError());
   c.tx_on = false;
   c.tx_commit = !(flags & ST_AbortFlag);
   st_set_commit_all(c);
@@ -4544,10 +4547,12 @@ static int
 st_end_trans_aborted(ST_Con& c, uint flags)
 {
   g_info << "end trans flags:" << hex << flags << endl;
+  int res= c.dic->endSchemaTrans(flags);
+  g_info << "end trans result:" << res << endl;
   if (flags & ST_AbortFlag)
-    chk1(c.dic->endSchemaTrans(flags) == 0);
+    chk1(res == 0);
   else
-    chk1(c.dic->endSchemaTrans(flags) != 0);
+    chk1(res != 0);
   c.tx_on = false;
   c.tx_commit = (flags & ST_AbortFlag);
   return 0;
@@ -5708,10 +5713,15 @@ st_test_mnf_prepare(ST_Con& c, int arg =
   }
   else
     chk1(st_end_trans_aborted(c, errins, ST_CommitFlag) == 0);
-  st_wait_db_node_up(c, master);
+  chk1(c.restarter->waitClusterStarted() == 0);
+  //st_wait_db_node_up(c, master);
   for (i = 0; i < c.tabcount; i++) {
     ST_Tab& tab = c.tab(i);
-    chk1(st_verify_table(c, tab) == -1);
+    // Verify that table is not in db
+    c.dic->invalidateTable(tab.name);
+    const NdbDictionary::Table* pTab =
+      NDBT_Table::discoverTableFromDb(c.ndb, tab.name);
+    chk1(pTab == NULL);
   }
   return NDBT_OK;
 err:
@@ -5738,7 +5748,8 @@ st_test_mnf_commit1(ST_Con& c, int arg =
   }
   else
     chk1(st_end_trans(c, errins, ST_CommitFlag) == 0);
-  st_wait_db_node_up(c, master);
+  chk1(c.restarter->waitClusterStarted() == 0);
+  //st_wait_db_node_up(c, master);
   for (i = 0; i < c.tabcount; i++) {
     ST_Tab& tab = c.tab(i);
     chk1(st_verify_table(c, tab) == 0);
@@ -5769,7 +5780,8 @@ st_test_mnf_commit2(ST_Con& c, int arg =
   }
   else
     chk1(st_end_trans(c, errins, ST_CommitFlag) == 0);
-  st_wait_db_node_up(c, master);
+  chk1(c.restarter->waitClusterStarted() == 0);
+  //st_wait_db_node_up(c, master);
   chk1(st_verify_all(c) == 0);
   for (i = 0; i < c.tabcount; i++) {
     ST_Tab& tab = c.tab(i);
@@ -5818,7 +5830,8 @@ st_test_mnf_run_commit(ST_Con& c, int ar
 
 verify:
   g_info << "wait for master node to come up" << endl;
-  st_wait_db_node_up(c, master);
+  chk1(c.restarter->waitClusterStarted() == 0);
+  //st_wait_db_node_up(c, master);
   g_info << "verify all" << endl;
   for (i = 0; i < c.tabcount; i++) {
     ST_Tab& tab = c.tab(i);
@@ -5864,7 +5877,8 @@ st_test_mnf_run_abort(ST_Con& c, int arg
     chk1(st_end_trans_aborted(c, ST_AbortFlag) == 0);
 
   g_info << "wait for master node to come up" << endl;
-  st_wait_db_node_up(c, master);
+  chk1(c.restarter->waitClusterStarted() == 0);
+  //st_wait_db_node_up(c, master);
   g_info << "verify all" << endl;
   for (i = 0; i < c.tabcount; i++) {
     ST_Tab& tab = c.tab(i);

Thread
bzr commit into mysql-5.1-telco-7.0 branch (Martin.Skold:2881) WL#4331Martin Skold5 May