List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:November 7 2008 2:50pm
Subject:bzr commit into mysql-5.1 branch (jonas:3057) Bug#18621
View as plain text  
#At file:///home/jonas/src/telco-6.4/

 3057 Jonas Oreland	2008-11-07
      ndb - bug#18621 - fix >2 replicas wrt suma/ndbeventoperation
modified:
  storage/ndb/include/kernel/signaldata/DictLock.hpp
  storage/ndb/include/ndb_version.h.in
  storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
  storage/ndb/src/kernel/blocks/suma/Suma.cpp
  storage/ndb/src/kernel/blocks/suma/Suma.hpp

=== modified file 'storage/ndb/include/kernel/signaldata/DictLock.hpp'
--- a/storage/ndb/include/kernel/signaldata/DictLock.hpp	2008-02-20 14:45:18 +0000
+++ b/storage/ndb/include/kernel/signaldata/DictLock.hpp	2008-11-07 14:50:21 +0000
@@ -37,6 +37,7 @@ public:
     ,DropFileLock    = 10
     ,DropFilegroupLock = 11
     ,SumaStartMe = 12
+    ,SumaHandOver = 13
   };
 private:
   Uint32 userPtr;
@@ -47,6 +48,7 @@ private:
 class DictLockConf {
   friend class Dbdict;
   friend class Dbdih;
+  friend class Suma;
 public:
   STATIC_CONST( SignalLength = 3 );
 private:

=== modified file 'storage/ndb/include/ndb_version.h.in'
--- a/storage/ndb/include/ndb_version.h.in	2008-09-17 13:23:21 +0000
+++ b/storage/ndb/include/ndb_version.h.in	2008-11-07 14:50:21 +0000
@@ -189,7 +189,7 @@ ndb_pnr(Uint32 version)
 static
 inline
 int
-ndbd_suma_dictlock(Uint32 x)
+ndbd_suma_dictlock_startme(Uint32 x)
 {
   const Uint32 major = (x >> 16) & 0xFF;
   const Uint32 minor = (x >>  8) & 0xFF;
@@ -272,4 +272,14 @@ ndb_scan_distributionkey(Uint32 version)
 #define NDBD_FILTER_INSTANCE_63 NDB_MAKE_VERSION(6,3,16)
 #define NDBD_COPY_GCI_RESTART_NR NDB_MAKE_VERSION(6,3,18)
 
+#define NDBD_SUMA_DICTLOCK_HANDOVER NDB_MAKE_VERSION(6,4,0)
+
+static
+inline
+int
+ndbd_suma_dictlock_handover(Uint32 x)
+{
+  return (x >= NDBD_SUMA_DICTLOCK_HANDOVER);
+}
+
 #endif

=== modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2008-11-06 16:52:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2008-11-07 14:50:21 +0000
@@ -17035,7 +17035,8 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal
   const DictLockType* lt = getDictLockType(req.lockType);
 
   Uint32 err;
-  if (req.lockType == DictLockReq::SumaStartMe)
+  if (req.lockType == DictLockReq::SumaStartMe ||
+      req.lockType == DictLockReq::SumaHandOver)
   {
     jam();
     
@@ -17047,6 +17048,14 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal
       goto ref;
     }
     
+    if (req.lockType == DictLockReq::SumaHandOver &&
+        !c_sub_startstop_lock.isclear())
+    {
+      g_eventLogger->info("refing dict lock to %u", refToNode(req.userRef));
+      err = DictLockRef::TooManyRequests;
+      goto ref;
+    }
+
     c_sub_startstop_lock.set(refToNode(req.userRef));
     
     g_eventLogger->info("granting dict lock to %u", refToNode(req.userRef));
@@ -17158,7 +17167,8 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* sign
     req.userRef = signal->getSendersBlockRef();
   }
 
-  if (ord->lockType ==  DictLockReq::SumaStartMe)
+  if (ord->lockType == DictLockReq::SumaStartMe ||
+      ord->lockType == DictLockReq::SumaHandOver)
   {
     jam();
     g_eventLogger->info("clearing dict lock for %u", refToNode(ord->senderRef));

=== modified file 'storage/ndb/src/kernel/blocks/suma/Suma.cpp'
--- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2008-11-06 16:52:59 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2008-11-07 14:50:21 +0000
@@ -85,6 +85,8 @@ extern EventLogger * g_eventLogger;
 #define DBUG_VOID_RETURN { ndbout_c("%s:%d <", __FILE__, __LINE__); return; }
 #endif
 
+#define DBG_3R 0
+
 /**
  * @todo:
  * SUMA crashes if an index is created at the same time as
@@ -292,7 +294,7 @@ Suma::execSTTOR(Signal* signal) {
       }
       
       ndbassert(tmp.get(getOwnNodeId()));
-      m_gcp_complete_rep_count = tmp.count();// I contribute 1 gcp complete rep
+      m_gcp_complete_rep_count = m_active_buckets.count();
     }
     else
       m_gcp_complete_rep_count = 0; // I contribute 1 gcp complete rep
@@ -342,34 +344,62 @@ Suma::execSTTOR(Signal* signal) {
 #include <ndb_version.h>
 
 void
-Suma::send_dict_lock_req(Signal* signal)
+Suma::send_dict_lock_req(Signal* signal, Uint32 state)
 {
-  if (ndbd_suma_dictlock(getNodeInfo(c_masterNodeId).m_version))
+  if (state == DictLockReq::SumaStartMe &&
+      !ndbd_suma_dictlock_startme(getNodeInfo(c_masterNodeId).m_version))
+  {
+    jam();
+    goto notsupported;
+  }
+  else if (state == DictLockReq::SumaHandOver &&
+           !ndbd_suma_dictlock_handover(getNodeInfo(c_masterNodeId).m_version))
+  {
+    jam();
+    goto notsupported;
+  }
+
   {
     jam();
     DictLockReq* req = (DictLockReq*)signal->getDataPtrSend();
-    req->lockType = DictLockReq::SumaStartMe;
-    req->userPtr = 0;
+    req->lockType = state;
+    req->userPtr = state;
     req->userRef = reference();
     sendSignal(calcDictBlockRef(c_masterNodeId),
                GSN_DICT_LOCK_REQ, signal, DictLockReq::SignalLength, JBB);
   }
-  else
-  {
-    jam();
-    c_startup.m_restart_server_node_id = 0;
-    send_start_me_req(signal);
-  }
+  return;
+
+notsupported:
+  DictLockConf* conf = (DictLockConf*)signal->getDataPtrSend();
+  conf->userPtr = state;
+  execDICT_LOCK_CONF(signal);
 }
 
 void
 Suma::execDICT_LOCK_CONF(Signal* signal)
 {
   jamEntry();
-  c_startup.m_restart_server_node_id = 0;
 
-  CRASH_INSERTION(13039);
-  send_start_me_req(signal);
+  DictLockConf* conf = (DictLockConf*)signal->getDataPtr();
+  Uint32 state = conf->userPtr;
+
+  switch(state){
+  case DictLockReq::SumaStartMe:
+    jam();
+    c_startup.m_restart_server_node_id = 0;
+    CRASH_INSERTION(13039);
+    send_start_me_req(signal);
+    return;
+  case DictLockReq::SumaHandOver:
+    jam();
+    send_handover_req(signal);
+    return;
+  default:
+    jam();
+    jamLine(state);
+    ndbrequire(false);
+  }
 }
 
 void
@@ -378,10 +408,38 @@ Suma::execDICT_LOCK_REF(Signal* signal)
   jamEntry();
 
   DictLockRef* ref = (DictLockRef*)signal->getDataPtr();
+  Uint32 state = ref->userPtr;
 
   ndbrequire(ref->errorCode == DictLockRef::TooManyRequests);
   signal->theData[0] = SumaContinueB::RETRY_DICT_LOCK;
-  sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 300, 1);
+  signal->theData[1] = state;
+  sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 300, 2);
+}
+
+void
+Suma::send_dict_unlock_ord(Signal* signal, Uint32 state)
+{
+  if (state == DictLockReq::SumaStartMe &&
+      !ndbd_suma_dictlock_startme(getNodeInfo(c_masterNodeId).m_version))
+  {
+    jam();
+    return;
+  }
+  else if (state == DictLockReq::SumaHandOver &&
+           !ndbd_suma_dictlock_handover(getNodeInfo(c_masterNodeId).m_version))
+  {
+    jam();
+    return;
+  }
+
+  jam();
+  DictUnlockOrd* ord = (DictUnlockOrd*)signal->getDataPtrSend();
+  ord->lockPtr = 0;
+  ord->lockType = state;
+  ord->senderData = state;
+  ord->senderRef = reference();
+  sendSignal(calcDictBlockRef(c_masterNodeId),
+             GSN_DICT_UNLOCK_ORD, signal, DictUnlockOrd::SignalLength, JBB);
 }
 
 void
@@ -441,18 +499,7 @@ Suma::execSUMA_START_ME_CONF(Signal* sig
   infoEvent("Suma: node %d has completed restoring me", 
 	    c_startup.m_restart_server_node_id);
   sendSTTORRY(signal);  
-
-  if (ndbd_suma_dictlock(getNodeInfo(c_masterNodeId).m_version))
-  {
-    jam();
-    DictUnlockOrd* ord = (DictUnlockOrd*)signal->getDataPtrSend();
-    ord->lockPtr = 0;
-    ord->lockType = DictLockReq::SumaStartMe;
-    ord->senderData = 0;
-    ord->senderRef = reference();
-    sendSignal(calcDictBlockRef(c_masterNodeId),
-               GSN_DICT_UNLOCK_ORD, signal, DictUnlockOrd::SignalLength, JBB);
-  }
+  send_dict_unlock_ord(signal, DictLockReq::SumaStartMe);
   c_startup.m_restart_server_node_id= 0;
 }
 
@@ -518,6 +565,15 @@ Suma::execREAD_NODESCONF(Signal* signal)
     tmp.assign(NdbNodeBitmask::Size, conf->startedNodes);
     ndbrequire(tmp.isclear()); // No nodes can be started during SR
   }
+
+  if (DBG_3R)
+  {
+    for (Uint32 i = 0; i<MAX_NDB_NODES; i++)
+    {
+      if (c_alive_nodes.get(i))
+        ndbout_c("%u c_alive_nodes.set(%u)", __LINE__, i);
+    }
+  }
   
   c_masterNodeId = conf->masterNodeId;
   
@@ -542,6 +598,29 @@ Suma::getNodeGroupMembers(Signal* signal
   DBUG_VOID_RETURN;
 }
 
+static
+bool
+valid_seq(Uint32 n, Uint32 r, Uint16 dst[])
+{
+  Uint16 tmp[MAX_REPLICAS];
+  for (Uint32 i = 0; i<r; i++)
+  {
+    tmp[i] = n % r;
+    for (Uint32 j = 0; j<i; j++)
+      if (tmp[j] == tmp[i])
+        return false;
+    n /= r;
+  }
+
+  /**
+   * reverse order for backward compatibility (with 2 replica)
+   */
+  for (Uint32 i = 0; i<r; i++)
+    dst[i] = tmp[r-i-1];
+
+  return true;
+}
+
 void
 Suma::fix_nodegroup()
 {
@@ -563,15 +642,40 @@ Suma::fix_nodegroup()
     for(i = 1; i <= replicas; i++)
       buckets *= i;
 
-    for(i = 0; i<buckets; i++)
+    Uint32 tot = 0;
+    switch(replicas){
+    case 1:
+      tot = 1;
+      break;
+    case 2:
+      tot = 4; // 2^2
+      break;
+    case 3:
+      tot = 27; // 3^3
+      break;
+    case 4:
+      tot = 256; // 4^4
+      break;
+      ndbrequire(false);
+    }
+    Uint32 cnt = 0;
+    for (i = 0; i<tot; i++)
     {
-      Bucket* ptr= c_buckets+i;
-      for(Uint32 j= 0; j< replicas; j++)
+      Bucket* ptr= c_buckets + cnt;
+      if (valid_seq(i, replicas, ptr->m_nodes))
       {
-        ptr->m_nodes[j] = c_nodesInGroup[(i + j) % replicas];
+        jam();
+        if (DBG_3R) printf("bucket %u : ", cnt);
+        for (Uint32 j = 0; j<replicas; j++)
+        {
+          ptr->m_nodes[j] = c_nodesInGroup[ptr->m_nodes[j]];
+          if (DBG_3R) printf("%u ", ptr->m_nodes[j]);
+        }
+        if (DBG_3R) printf("\n");
+        cnt++;
       }
     }
-
+    ndbrequire(cnt == buckets);
     c_no_of_buckets= buckets;
   }
   else
@@ -610,7 +714,7 @@ Suma::execCHECKNODEGROUPSCONF(Signal *si
   {
     jam();
     
-    send_dict_lock_req(signal);
+    send_dict_lock_req(signal, DictLockReq::SumaStartMe);
 
     return;
   }
@@ -648,7 +752,7 @@ Suma::check_start_handover(Signal* signa
     if (c_no_of_buckets)
     {
       jam();
-      send_handover_req(signal);
+      send_dict_lock_req(signal, DictLockReq::SumaHandOver);
     }
     else
     {
@@ -746,7 +850,7 @@ Suma::execCONTINUEB(Signal* signal){
     return;
   case SumaContinueB::RETRY_DICT_LOCK:
     jam();
-    send_dict_lock_req(signal);
+    send_dict_lock_req(signal, signal->theData[1]);
     return;
   }
 }
@@ -1118,7 +1222,27 @@ Suma::execINCL_NODEREQ(Signal* signal){
   const Uint32 nodeId  = signal->theData[1];
 
   ndbrequire(!c_alive_nodes.get(nodeId));
-  c_alive_nodes.set(nodeId);
+  if (c_nodes_in_nodegroup_mask.get(nodeId))
+  {
+    /**
+     *
+     * XXX TODO: This should be removed
+     *           But, other nodes are (incorrectly) reported as started
+     *                even if they're not "started", but only INCL_NODEREQ'ed
+     */
+    c_alive_nodes.set(nodeId);
+
+    /**
+     *
+     * Nodes in nodegroup will be "alive" when
+     *   sending SUMA_HANDOVER_REQ
+     */
+  }
+  else
+  {
+    jam();
+    c_alive_nodes.set(nodeId);
+  }
   
   signal->theData[0] = nodeId;
   signal->theData[1] = reference();
@@ -3666,20 +3790,16 @@ Suma::get_responsible_node(Uint32 bucket
     node= ptr->m_nodes[i];
     if(c_alive_nodes.get(node))
     {
-      break;
+#ifdef NODEFAIL_DEBUG2
+      theCounts[node]++;
+      ndbout_c("Suma:responsible n=%u, D=%u, id = %u, count=%u",
+               n,D, id, theCounts[node]);
+#endif
+      return node;
     }
   }
   
-  
-#ifdef NODEFAIL_DEBUG2
-  if(node != 0)
-  {
-    theCounts[node]++;
-    ndbout_c("Suma:responsible n=%u, D=%u, id = %u, count=%u",
-	     n,D, id, theCounts[node]);
-  }
-#endif
-  return node;
+  return 0;
 }
 
 Uint32 
@@ -4032,7 +4152,7 @@ found:
    */
   if(!m_switchover_buckets.isclear())
   {
-    NdbNodeBitmask takeover_nodes;
+    bool unlock = false;
     NdbNodeBitmask handover_nodes;
     Uint32 i = m_switchover_buckets.find(0);
     for(; i != Bucket_mask::NotFound; i = m_switchover_buckets.find(i + 1))
@@ -4057,7 +4177,8 @@ found:
 	  m_active_buckets.set(i);
 	  c_buckets[i].m_state &= ~(Uint32)Bucket::BUCKET_STARTING;
 	  ndbout_c("starting");
-	  m_gcp_complete_rep_count = 1;
+	  m_gcp_complete_rep_count++;
+          unlock = true;
 	}
 	else if(state & Bucket::BUCKET_TAKEOVER)
 	{
@@ -4081,8 +4202,8 @@ found:
 	  bucket->m_buffer_head.m_page_pos = Buffer_page::DATA_WORDS + 1;
 
 	  m_active_buckets.set(i);
+          m_gcp_complete_rep_count++;
 	  c_buckets[i].m_state &= ~(Uint32)Bucket::BUCKET_TAKEOVER;
-	  takeover_nodes.set(c_buckets[i].m_switchover_node);
 	}
 	else if (state & Bucket::BUCKET_HANDOVER)
 	{
@@ -4092,6 +4213,7 @@ found:
           jam();
 	  c_buckets[i].m_state &= ~(Uint32)Bucket::BUCKET_HANDOVER;
 	  handover_nodes.set(c_buckets[i].m_switchover_node);
+          m_gcp_complete_rep_count--;
 	  ndbout_c("handover");
 	}
         else if (state & Bucket::BUCKET_CREATED)
@@ -4122,10 +4244,6 @@ found:
         }
       }
     }
-    ndbassert(handover_nodes.count() == 0 || 
-	      m_gcp_complete_rep_count > handover_nodes.count());
-    m_gcp_complete_rep_count -= handover_nodes.count();
-    m_gcp_complete_rep_count += takeover_nodes.count();
 
     if(getNodeState().startLevel == NodeState::SL_STARTING && 
        m_switchover_buckets.isclear() && 
@@ -4134,6 +4252,12 @@ found:
       jam();
       sendSTTORRY(signal);
     }
+
+    if (unlock)
+    {
+      jam();
+      send_dict_unlock_ord(signal, DictLockReq::SumaHandOver);
+    }
   }
 
   if(ERROR_INSERTED(13010))
@@ -4842,7 +4966,7 @@ Suma::sendSubCreateReq(Signal* signal, P
   {
     jam();
     c_restart.m_waiting_on_self = 0;
-    if (!ndbd_suma_dictlock(getNodeInfo(refToNode(c_restart.m_ref)).m_version))
+    if (!ndbd_suma_dictlock_startme(getNodeInfo(refToNode(c_restart.m_ref)).m_version))
     {
       jam();
       /**
@@ -5044,7 +5168,9 @@ Suma::execSUMA_HANDOVER_REQ(Signal* sign
   // mark all active buckets really belonging to restarting SUMA
 
   c_alive_nodes.set(nodeId);
-  
+  if (DBG_3R)
+    ndbout_c("%u c_alive_nodes.set(%u)", __LINE__, nodeId);
+
   Bucket_mask tmp;
   for( Uint32 i = 0; i < c_no_of_buckets; i++) 
   {
@@ -5100,10 +5226,20 @@ Suma::execSUMA_HANDOVER_CONF(Signal* sig
   ndbout_c("Suma::execSUMA_HANDOVER_CONF, gci = %u", gci);
 #endif
 
+  char buf[255];
+  tmp.getText(buf);
+  infoEvent("Suma: handover from node %d gci: %d buckets: %s (%d)",
+	    nodeId, gci, buf, c_no_of_buckets);
+  g_eventLogger->info("Suma: handover from node %d gci: %d buckets: %s (%d)",
+                      nodeId, gci, buf, c_no_of_buckets);
+
   for( Uint32 i = 0; i < c_no_of_buckets; i++) 
   {
     if (tmp.get(i))
     {
+      if (DBG_3R)
+        ndbout_c("%u : %u %u", i, get_responsible_node(i), getOwnNodeId());
+
       ndbrequire(get_responsible_node(i) == getOwnNodeId());
       // We should run this bucket, but _nodeId_ is
       c_buckets[i].m_switchover_gci = (Uint64(gci) << 32) - 1;
@@ -5111,10 +5247,6 @@ Suma::execSUMA_HANDOVER_CONF(Signal* sig
     }
   }
   
-  char buf[255];
-  tmp.getText(buf);
-  infoEvent("Suma: handover from node %d gci: %d buckets: %s (%d)",
-	    nodeId, gci, buf, c_no_of_buckets);
   m_switchover_buckets.bitOR(tmp);
   c_startup.m_handover_nodes.clear(nodeId);
   DBUG_VOID_RETURN;

=== modified file 'storage/ndb/src/kernel/blocks/suma/Suma.hpp'
--- a/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2008-11-06 16:52:59 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2008-11-07 14:50:21 +0000
@@ -529,7 +529,8 @@ private:
   Uint32 c_nodesInGroup[MAX_REPLICAS];
   NdbNodeBitmask c_nodes_in_nodegroup_mask;  // NodeId's of nodes in nodegroup
 
-  void send_dict_lock_req(Signal* signal);
+  void send_dict_lock_req(Signal* signal, Uint32 state);
+  void send_dict_unlock_ord(Signal* signal, Uint32 state);
   void send_start_me_req(Signal* signal);
   void check_start_handover(Signal* signal);
   void send_handover_req(Signal* signal);

Thread
bzr commit into mysql-5.1 branch (jonas:3057) Bug#18621Jonas Oreland7 Nov