List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:September 21 2009 8:26am
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jonas:3055) Bug#37688
View as plain text  
#At file:///home/jonas/src/telco-6.3/ based on revid:jonas@stripped

 3055 Jonas Oreland	2009-09-21
      ndb - bug#37688 - fix race condition between TAKE_OVERTCCONF and NODE_FAILREP by introducing a generic route-facility

    added:
      storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp
      storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp
    modified:
      storage/ndb/include/kernel/GlobalSignalNumbers.h
      storage/ndb/include/kernel/signaldata/SignalData.hpp
      storage/ndb/src/common/debugger/signaldata/Makefile.am
      storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp
      storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
      storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
      storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
      storage/ndb/src/kernel/vm/SimulatedBlock.cpp
      storage/ndb/src/kernel/vm/SimulatedBlock.hpp
=== modified file 'storage/ndb/include/kernel/GlobalSignalNumbers.h'
--- a/storage/ndb/include/kernel/GlobalSignalNumbers.h	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h	2009-09-21 08:26:49 +0000
@@ -388,7 +388,8 @@ extern const GlobalSignalNumber NO_OF_SI
 #define GSN_UPGRADE_PROTOCOL_ORD        285
 
 /* 286 not unused */
-/* 287 unused */
+#define GSN_LOCAL_ROUTE_ORD             287 /* local */
+
 #define GSN_GETGCICONF                  288
 #define GSN_GETGCIREQ                   289
 #define GSN_HOT_SPAREREP                290

=== added file 'storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp'
--- a/storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp	1970-01-01 00:00:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp	2009-09-21 08:26:49 +0000
@@ -0,0 +1,34 @@
+/*
+   Copyright (C) 2003 MySQL AB
+    All rights reserved. Use is subject to license terms.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#ifndef LOCAL_ROUTE_ORD_HPP
+#define LOCAL_ROUTE_ORD_HPP
+
+#include "SignalData.hpp"
+
+struct LocalRouteOrd
+{
+  STATIC_CONST( StaticLen = 3 );
+
+  Uint32 cnt; // 16-bit path, 16-bit destination
+  Uint32 gsn; // Final gsn
+  Uint32 prio;// Final prio
+  Uint32 path[1];
+};
+
+#endif

=== modified file 'storage/ndb/include/kernel/signaldata/SignalData.hpp'
--- a/storage/ndb/include/kernel/signaldata/SignalData.hpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/include/kernel/signaldata/SignalData.hpp	2009-09-21 08:26:49 +0000
@@ -226,4 +226,6 @@ GSN_PRINT_SIGNATURE(printCONTINUEB_NDBFS
 GSN_PRINT_SIGNATURE(printCONTINUEB_DBDIH);
 GSN_PRINT_SIGNATURE(printSTART_FRAG_REQ);
 
+GSN_PRINT_SIGNATURE(printLOCAL_ROUTE_ORD);
+
 #endif

=== added file 'storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp	1970-01-01 00:00:00 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp	2009-09-21 08:26:49 +0000
@@ -0,0 +1,63 @@
+/*
+   Copyright (C) 2003 MySQL AB
+    All rights reserved. Use is subject to license terms.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#include <signaldata/LocalRouteOrd.hpp>
+#include <DebuggerNames.hpp>
+#include <RefConvert.hpp>
+
+bool
+printLOCAL_ROUTE_ORD(FILE* output,
+                     const Uint32* theData, Uint32 len,
+                     Uint16 rbn)
+{
+  const LocalRouteOrd * sig = (const LocalRouteOrd*)theData;
+  Uint32 pathcnt = sig->cnt >> 16;
+  Uint32 dstcnt = sig->cnt & 0xFFFF;
+
+  fprintf(output, " pathcnt: %u dstcnt: %u\n", pathcnt, dstcnt);
+  fprintf(output, " gsn: %u(%s) prio: %u\n",
+          sig->gsn, getSignalName(sig->gsn), sig->prio);
+
+  const Uint32 * ptr = sig->path;
+  fprintf(output, " path:");
+  for (Uint32 i = 0; i<pathcnt; i++)
+  {
+    fprintf(output, " [ hop: 0x%x(%s) prio: %u ]",
+            ptr[0], getBlockName(refToMain(ptr[0])), ptr[1]);
+    ptr += 2;
+  }
+
+  fprintf(output, "\n dst:");
+  for (Uint32 i = 0; i<dstcnt; i++)
+  {
+    fprintf(output, " [ 0x%x(%s) ]",
+            ptr[0], getBlockName(refToMain(ptr[0])));
+  }
+  fprintf(output, "\n");
+
+  if (ptr < (theData + len))
+  {
+    fprintf(output, " data:");
+    while (ptr < (theData + len))
+    {
+      fprintf(output, " %.8x", * ptr++);
+    }
+    fprintf(output, "\n");
+  }
+  return true;
+}

=== modified file 'storage/ndb/src/common/debugger/signaldata/Makefile.am'
--- a/storage/ndb/src/common/debugger/signaldata/Makefile.am	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/Makefile.am	2009-09-21 08:26:49 +0000
@@ -39,7 +39,8 @@ libsignaldataprint_la_SOURCES = \
           SumaImpl.cpp NdbSttor.cpp CreateFragmentation.cpp \
 	  UtilLock.cpp TuxMaint.cpp AccLock.cpp \
           LqhTrans.cpp ReadNodesConf.cpp CntrStart.cpp \
-          ScanFrag.cpp
+          ScanFrag.cpp \
+          LocalRouteOrd.cpp
 
 include $(top_srcdir)/storage/ndb/config/common.mk.am
 include $(top_srcdir)/storage/ndb/config/type_ndbapi.mk.am

=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp	2009-09-21 08:26:49 +0000
@@ -201,6 +201,8 @@ SignalDataPrintFunctions[] = {
   ,{ GSN_LQH_TRANSCONF, printLQH_TRANSCONF }
   ,{ GSN_SCAN_FRAGREQ, printSCAN_FRAGREQ }
   ,{ GSN_START_FRAGREQ, printSTART_FRAG_REQ }
+
+  ,{ GSN_LOCAL_ROUTE_ORD, printLOCAL_ROUTE_ORD }
   ,{ 0, 0 }
 };
 

=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalNames.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2009-09-21 08:26:49 +0000
@@ -655,5 +655,7 @@ const GsnName SignalNames [] = {
   ,{ GSN_START_TOREF, "START_TOREF" }
   ,{ GSN_END_TOREF, "END_TOREF" }
   ,{ GSN_START_PERMREP, "START_PERMREP" }
+
+  ,{ GSN_LOCAL_ROUTE_ORD, "LOCAL_ROUTE_ORD" }
 };
 const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);

=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2009-09-14 12:51:36 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2009-09-21 08:26:49 +0000
@@ -7533,11 +7533,25 @@ Dbtc::checkScanFragList(Signal* signal,
 void Dbtc::execTAKE_OVERTCCONF(Signal* signal) 
 {
   jamEntry();
+
+  if (!checkNodeFailSequence(signal))
+  {
+    jam();
+    return;
+  }
+
   tfailedNodeId = signal->theData[0];
   hostptr.i = tfailedNodeId;
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
 
-  if (signal->getSendersBlockRef() != reference())
+  Uint32 senderRef = signal->theData[1];
+  if (signal->getLength() < 2)
+  {
+    jam();
+    senderRef = 0; // currently only used to see if it's from self
+  }
+
+  if (senderRef != reference())
   {
     jam();
 
@@ -7854,7 +7868,8 @@ void Dbtc::completeTransAtTakeOverDoLast
     /*------------------------------------------------------------*/
     NodeReceiverGroup rg(DBTC, c_alive_nodes);
     signal->theData[0] = tcNodeFailptr.p->takeOverNode;
-    sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+    signal->theData[1] = reference();
+    sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 2, JBB);
     
     if (tcNodeFailptr.p->queueIndex > 0) {
       jam();

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2009-09-01 11:07:11 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2009-09-21 08:26:49 +0000
@@ -3801,7 +3801,7 @@ Qmgr::sendCommitFailReq(Signal* signal)
     ptrAss(nodePtr, nodeRec);
 
 #ifdef ERROR_INSERT    
-    if (ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
+    if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
     {
       ndbout_c("skipping node %d", c_error_insert_extra);
       CLEAR_ERROR_INSERT_VALUE;
@@ -3918,6 +3918,9 @@ void Qmgr::execCOMMIT_FAILREQ(Signal* si
 {
   NodeRecPtr nodePtr;
   jamEntry();
+
+  CRASH_INSERTION(935);
+
   BlockReference Tblockref = signal->theData[0];
   UintR TfailureNr = signal->theData[1];
   if (Tblockref != cpdistref) {

=== modified file 'storage/ndb/src/kernel/vm/SimulatedBlock.cpp'
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp	2009-09-21 08:26:49 +0000
@@ -34,6 +34,7 @@
 #include <signaldata/NodeStateSignalData.hpp>
 #include <signaldata/FsRef.hpp>
 #include <signaldata/SignalDroppedRep.hpp>
+#include <signaldata/LocalRouteOrd.hpp>
 #include <DebuggerNames.hpp>
 #include "LongSignal.hpp"
 
@@ -151,6 +152,7 @@ SimulatedBlock::installSimulatedBlockFun
   a[GSN_FSAPPENDREF]  = &SimulatedBlock::execFSAPPENDREF;
   a[GSN_NODE_START_REP] = &SimulatedBlock::execNODE_START_REP;
   a[GSN_API_START_REP] = &SimulatedBlock::execAPI_START_REP;
+  a[GSN_LOCAL_ROUTE_ORD] = &SimulatedBlock::execLOCAL_ROUTE_ORD;
 }
 
 void
@@ -2377,3 +2379,250 @@ SimulatedBlock::create_distr_key(Uint32 
 }
 
 CArray<KeyDescriptor> g_key_descriptor_pool;
+
+void
+SimulatedBlock::sendRoutedSignal(RoutePath path[], Uint32 pathcnt,
+                                 Uint32 dst[],
+                                 Uint32 dstcnt,
+                                 Uint32 gsn,
+                                 Signal * signal,
+                                 Uint32 sigLen,
+                                 JobBufferLevel prio,
+                                 SectionHandle * userhandle)
+{
+  ndbrequire(pathcnt > 0); // don't support (now) directly multi-cast
+  pathcnt--; // first hop is made from here
+
+
+  Uint32 len = LocalRouteOrd::StaticLen + (2 * pathcnt) + dstcnt;
+  ndbrequire(len <= 25);
+
+  SectionHandle handle(this, signal);
+  if (userhandle)
+  {
+    ljam();
+    handle.m_cnt = userhandle->m_cnt;
+    for (Uint32 i = 0; i<handle.m_cnt; i++)
+      handle.m_ptr[i] = userhandle->m_ptr[i];
+    userhandle->m_cnt = 0;
+  }
+
+  if (len + sigLen > 25)
+  {
+    ljam();
+
+    /**
+     * we need to store theData in a section
+     */
+    ndbrequire(handle.m_cnt < 3);
+    handle.m_ptr[2] = handle.m_ptr[1];
+    handle.m_ptr[1] = handle.m_ptr[0];
+    Ptr<SectionSegment> tmp;
+    ndbrequire(import(tmp, signal->theData, sigLen));
+    handle.m_ptr[0].p = tmp.p;
+    handle.m_ptr[0].i = tmp.i;
+    handle.m_ptr[0].sz = sigLen;
+    handle.m_cnt ++;
+  }
+  else
+  {
+    ljam();
+    memmove(signal->theData + len, signal->theData, 4 * sigLen);
+    len += sigLen;
+  }
+
+  LocalRouteOrd * ord = (LocalRouteOrd*)signal->getDataPtrSend();
+  ord->cnt = (pathcnt << 16) | (dstcnt);
+  ord->gsn = gsn;
+  ord->prio = Uint32(prio);
+
+  Uint32 * dstptr = ord->path;
+  for (Uint32 i = 1; i <= pathcnt; i++)
+  {
+    ndbrequire(refToNode(path[i].ref) == 0 ||
+               refToNode(path[i].ref) == getOwnNodeId());
+
+    * dstptr++ = path[i].ref;
+    * dstptr++ = Uint32(path[i].prio);
+  }
+
+  for (Uint32 i = 0; i<dstcnt; i++)
+  {
+    ndbrequire(refToNode(dst[i]) == 0 ||
+               refToNode(dst[i]) == getOwnNodeId());
+
+    * dstptr++ = dst[i];
+  }
+
+  sendSignal(path[0].ref, GSN_LOCAL_ROUTE_ORD, signal, len,
+             path[0].prio, &handle);
+}
+
+void
+SimulatedBlock::execLOCAL_ROUTE_ORD(Signal* signal)
+{
+  ljamEntry();
+
+  if (!assembleFragments(signal))
+  {
+    ljam();
+    return;
+  }
+
+  if (ERROR_INSERTED(1001))
+  {
+    /**
+     * This NDBCNTR error code 1001
+     */
+    ljam();
+    SectionHandle handle(this, signal);
+    sendSignalWithDelay(reference(), GSN_LOCAL_ROUTE_ORD, signal, 200, 
+                        signal->getLength(), &handle);
+    return;
+  }
+
+  LocalRouteOrd* ord = (LocalRouteOrd*)signal->getDataPtr();
+  Uint32 pathcnt = ord->cnt >> 16;
+  Uint32 dstcnt = ord->cnt & 0xFFFF;
+  Uint32 sigLen = signal->getLength();
+
+  if (pathcnt == 0)
+  {
+    /**
+     * Send to final destination(s);
+     */
+    ljam();
+    Uint32 gsn = ord->gsn;
+    Uint32 prio = ord->prio;
+    memcpy(signal->theData+25, ord->path, 4*dstcnt);
+    SectionHandle handle(this, signal);
+    if (sigLen > LocalRouteOrd::StaticLen + dstcnt)
+    {
+      ljam();
+      /**
+       * Data is at end of this...
+       */
+      memmove(signal->theData,
+              signal->theData + LocalRouteOrd::StaticLen + dstcnt,
+              4 * (sigLen - (LocalRouteOrd::StaticLen + dstcnt)));
+      sigLen = sigLen - (LocalRouteOrd::StaticLen + dstcnt);
+    }
+    else
+    {
+      ljam();
+      /**
+       * Put section 0 in signal->theData
+       */
+      sigLen = handle.m_ptr[0].sz;
+      ndbrequire(sigLen <= 25);
+      copy(signal->theData, handle.m_ptr[0]);
+      release(handle.m_ptr[0]);
+
+      for (Uint32 i = 0; i < handle.m_cnt - 1; i++)
+        handle.m_ptr[i] = handle.m_ptr[i+1];
+      handle.m_cnt--;
+    }
+
+    /*
+     * The extra if-statement is as sendSignalNoRelease will copy sections
+     *   which is not necessary is only sending to one destination
+     */
+    if (dstcnt > 1)
+    {
+      jam();
+      /** 6.3 has not yet impl. sendSignalNoRelease */
+#if NDB_VERRSION_D >= NDB_MAKE_VERSION(6,4,0)
+      for (Uint32 i = 0; i<dstcnt; i++)
+      {
+        ljam();
+        sendSignalNoRelease(signal->theData[25+i], gsn, signal, sigLen,
+                            JobBufferLevel(prio), &handle);
+      }
+      releaseSections(handle);
+#else
+      if (handle.m_cnt == 0)
+      {
+        ljam();
+        for (Uint32 i = 0; i<dstcnt; i++)
+        {
+          ljam();
+          sendSignal(signal->theData[25+i], gsn, signal, sigLen,
+                     JobBufferLevel(prio));
+        }
+      }
+      else
+      {
+        /**
+         * This path is (out of laziness?) not implemented in 6.3
+         */
+        ndbrequire(false);
+      }
+#endif
+    }
+    else
+    {
+      jam();
+      sendSignal(signal->theData[25+0], gsn, signal, sigLen,
+                 JobBufferLevel(prio), &handle);
+    }
+  }
+  else
+  {
+    /**
+     * Reroute
+     */
+    ljam();
+    SectionHandle handle(this, signal);
+    Uint32 ref = ord->path[0];
+    Uint32 prio = ord->path[1];
+    Uint32 len = sigLen - 2;
+    ord->cnt = ((pathcnt - 1) << 16) | dstcnt;
+    memmove(ord->path, ord->path+2, 4 * (len - LocalRouteOrd::StaticLen));
+    sendSignal(ref, GSN_LOCAL_ROUTE_ORD, signal, len,
+               JobBufferLevel(prio), &handle);
+  }
+}
+
+
+bool
+SimulatedBlock::checkNodeFailSequence(Signal* signal)
+{
+  Uint32 ref = signal->getSendersBlockRef();
+
+  /**
+   * Make sure that a signal being part of node-failure handling
+   *   from a remote node, does not get to us before we got the NODE_FAILREP
+   *   (this to avoid tricky state handling)
+   *
+   * To ensure this, we send the signal via QMGR (GSN_COMMIT_FAILREQ)
+   *   and NDBCNTR (which sends NODE_FAILREP)
+   *
+   * The extra time should be negilable
+   *
+   * Note, make an exception for signals sent by our self
+   *       as they are only sent as a consequence of NODE_FAILREP
+   */
+  if (ref == reference() ||
+      (refToNode(ref) == getOwnNodeId() &&
+       refToMain(ref) == NDBCNTR))
+  {
+    ljam();
+    return true;
+  }
+
+  RoutePath path[2];
+  path[0].ref = QMGR_REF;
+  path[0].prio = JBB;
+  path[1].ref = NDBCNTR_REF;
+  path[1].prio = JBB;
+
+  Uint32 dst[1];
+  dst[0] = reference();
+
+  SectionHandle handle(this, signal);
+  Uint32 gsn = signal->header.theVerId_signalNumber;
+  Uint32 len = signal->getLength();
+
+  sendRoutedSignal(path, 2, dst, 1, gsn, signal, len, JBB, &handle);
+  return false;
+}

=== modified file 'storage/ndb/src/kernel/vm/SimulatedBlock.hpp'
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2009-09-21 08:26:49 +0000
@@ -218,6 +218,33 @@ protected:
   void handle_lingering_sections_after_execute(Signal*) const;
   void handle_lingering_sections_after_execute(SectionHandle*) const;
 
+  /**
+   * Send routed signals (ONLY LOCALLY)
+   *
+   * NOTE: Only localhost is allowed!
+   */
+  struct RoutePath
+  {
+    Uint32 ref;
+    JobBufferLevel prio;
+  };
+  void sendRoutedSignal(RoutePath path[],
+                        Uint32 pathcnt,      // #hops
+                        Uint32 dst[],        // Final destination(s)
+                        Uint32 dstcnt,       // #final destination(s)
+                        Uint32 gsn,          // Final GSN
+                        Signal*,
+                        Uint32 len,
+                        JobBufferLevel prio, // Final prio
+                        SectionHandle * handle = 0);
+
+
+  /**
+   * Check that signal sent from remote node
+   *   is guaranteed to be correctly serialized wrt to NODE_FAILREP
+   */
+  bool checkNodeFailSequence(Signal*);
+
   /**********************************************************
    * Fragmented signals
    */
@@ -485,6 +512,8 @@ protected:  
   void execCONTINUE_FRAGMENTED(Signal* signal);
   void execAPI_START_REP(Signal* signal);
   void execNODE_START_REP(Signal* signal);
+
+  void execLOCAL_ROUTE_ORD(Signal*);
 private:
   /**
    * Node state


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20090921082649-rgbc1jftgr8r114l.bundle
Thread
bzr commit into mysql-5.1-telco-6.3 branch (jonas:3055) Bug#37688Jonas Oreland21 Sep