MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:pekka Date:June 8 2006 2:16pm
Subject:bk commit into 5.0 tree (pekka:1.2168) BUG#18781
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of pekka. When pekka does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2168 06/06/08 16:16:07 pekka@stripped +19 -0
  ndb - bug#18781 lock DICT during node restart

  ndb/src/ndbapi/ndberror.c
    1.44 06/06/08 16:13:09 pekka@stripped +1 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.50 06/06/08 16:13:09 pekka@stripped +128 -5
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
    1.11 06/06/08 16:13:08 pekka@stripped +6 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/dbdih/Dbdih.hpp
    1.11 06/06/08 16:13:08 pekka@stripped +28 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/dbdict/Dbdict.hpp
    1.17 06/06/08 16:13:08 pekka@stripped +96 -1
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/dbdict/Dbdict.cpp
    1.62 06/06/08 16:13:08 pekka@stripped +244 -1
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/ERROR_codes.txt
    1.19 06/06/08 16:13:08 pekka@stripped +3 -1
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/common/debugger/signaldata/SignalNames.cpp
    1.7 06/06/08 16:13:08 pekka@stripped +6 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/include/kernel/signaldata/DropTable.hpp
    1.4 06/06/08 16:13:08 pekka@stripped +1 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/include/kernel/signaldata/DictLock.hpp
    1.2 06/06/08 16:13:08 pekka@stripped +76 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/include/kernel/signaldata/CreateTable.hpp
    1.4 06/06/08 16:13:08 pekka@stripped +1 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/include/kernel/signaldata/AlterTable.hpp
    1.5 06/06/08 16:13:08 pekka@stripped +1 -0
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/include/kernel/GlobalSignalNumbers.h
    1.11 06/06/08 16:13:08 pekka@stripped +6 -4
    locking of master DICT against schema ops, used by slave DIH under NR

  ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
    1.32 06/06/08 16:12:47 pekka@stripped +2 -2
    spelling

  ndb/src/common/debugger/SignalLoggerManager.cpp
    1.9 06/06/08 16:11:10 pekka@stripped +1 -1
    block no fix

  ndb/src/kernel/vm/pc.hpp
    1.10 06/06/08 16:08:33 pekka@stripped +2 -0
    ERROR_INSERTED_CLEAR(x) test and clear if set

  ndb/src/kernel/vm/DLFifoList.hpp
    1.4 06/06/08 16:07:46 pekka@stripped +14 -0
    add hasPrev

  ndb/test/ndbapi/testDict.cpp
    1.24 06/06/08 16:07:16 pekka@stripped +304 -0
    test NF/NR + dict ops

  ndb/src/kernel/main.cpp
    1.61 06/06/08 16:06:44 pekka@stripped +4 -0
    signal log from start (#if 0-ed)

  ndb/include/kernel/signaldata/DictLock.hpp
    1.1 06/06/08 15:21:00 pekka@stripped +0 -0

  ndb/include/kernel/signaldata/DictLock.hpp
    1.0 06/06/08 15:21:00 pekka@stripped +0 -0
    BitKeeper file /space/pekka/ndb/version/my50-bug18781/ndb/include/kernel/signaldata/DictLock.hpp

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	pekka
# Host:	orca.ndb.mysql.com
# Root:	/space/pekka/ndb/version/my50-bug18781

--- 1.10/ndb/include/kernel/GlobalSignalNumbers.h	2005-09-20 09:34:43 +02:00
+++ 1.11/ndb/include/kernel/GlobalSignalNumbers.h	2006-06-08 16:13:08 +02:00
@@ -507,16 +507,12 @@ extern const GlobalSignalNumber NO_OF_SI
 #define GSN_TEST_ORD                    407
 #define GSN_TESTSIG                     408
 #define GSN_TIME_SIGNAL                 409
-/* 410 unused  */
-/* 411 unused  */
-/* 412 unused */
 #define GSN_TUP_ABORTREQ                414
 #define GSN_TUP_ADD_ATTCONF             415
 #define GSN_TUP_ADD_ATTRREF             416
 #define GSN_TUP_ADD_ATTRREQ             417
 #define GSN_TUP_ATTRINFO                418
 #define GSN_TUP_COMMITREQ               419
-/* 420 unused */
 #define GSN_TUP_LCPCONF                 421
 #define GSN_TUP_LCPREF                  422
 #define GSN_TUP_LCPREQ                  423
@@ -937,5 +933,11 @@ extern const GlobalSignalNumber NO_OF_SI
 
 #define GSN_ACC_LOCKREQ			711
 #define GSN_READ_PSUEDO_REQ             712
+
+/* DICT LOCK signals */
+#define GSN_DICT_LOCK_REQ               410
+#define GSN_DICT_LOCK_CONF              411
+#define GSN_DICT_LOCK_REF               412
+#define GSN_DICT_UNLOCK_ORD             420
 
 #endif

--- 1.4/ndb/include/kernel/signaldata/AlterTable.hpp	2005-08-18 14:02:19 +02:00
+++ 1.5/ndb/include/kernel/signaldata/AlterTable.hpp	2006-06-08 16:13:08 +02:00
@@ -114,6 +114,7 @@ public:
     InvalidTableVersion = 241,
     DropInProgress      = 283,
     Busy = 701,
+    BusyWithNR = 711,
     NotMaster = 702,
     InvalidFormat = 703,
     AttributeNameTooLong = 704,

--- 1.3/ndb/include/kernel/signaldata/CreateTable.hpp	2004-12-22 06:19:17 +01:00
+++ 1.4/ndb/include/kernel/signaldata/CreateTable.hpp	2006-06-08 16:13:08 +02:00
@@ -77,6 +77,7 @@ public:
   enum ErrorCode {
     NoError = 0,
     Busy = 701,
+    BusyWithNR = 711,
     NotMaster = 702,
     InvalidFormat = 703,
     AttributeNameTooLong = 704,

--- 1.3/ndb/include/kernel/signaldata/DropTable.hpp	2005-07-20 13:21:46 +02:00
+++ 1.4/ndb/include/kernel/signaldata/DropTable.hpp	2006-06-08 16:13:08 +02:00
@@ -53,6 +53,7 @@ public:
   
   enum ErrorCode {
     Busy = 701,
+    BusyWithNR = 711,
     NotMaster = 702,
     NoSuchTable         = 709,
     InvalidTableVersion = 241,

--- 1.8/ndb/src/common/debugger/SignalLoggerManager.cpp	2005-05-26 11:35:56 +02:00
+++ 1.9/ndb/src/common/debugger/SignalLoggerManager.cpp	2006-06-08 16:11:10 +02:00
@@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode
   } else {
     for (int i = 0; i < count; ++i){
       BlockNumber number = getBlockNo(blocks[i]);
-      cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode);
+      cnt += log(SLM_ON, number, logMode);
     }
   }
   for(int i = 0; i<count; i++){

--- 1.6/ndb/src/common/debugger/signaldata/SignalNames.cpp	2005-01-12 22:09:34 +01:00
+++ 1.7/ndb/src/common/debugger/signaldata/SignalNames.cpp	2006-06-08 16:13:08 +02:00
@@ -647,6 +647,12 @@ const GsnName SignalNames [] = {
   ,{ GSN_TUX_MAINT_REF,  "TUX_MAINT_REF" }
   ,{ GSN_TUX_BOUND_INFO,  "TUX_BOUND_INFO" }
   ,{ GSN_ACC_LOCKREQ,  "ACC_LOCKREQ" }
+
+  /* DICT LOCK */
+  ,{ GSN_DICT_LOCK_REQ,          "DICT_LOCK_REQ" }
+  ,{ GSN_DICT_LOCK_CONF,         "DICT_LOCK_CONF" }
+  ,{ GSN_DICT_LOCK_REF,          "DICT_LOCK_REF" }
+  ,{ GSN_DICT_UNLOCK_ORD,        "DICT_UNLOCK_ORD" }
   
 };
 const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);

--- 1.18/ndb/src/kernel/blocks/ERROR_codes.txt	2006-06-01 08:28:55 +02:00
+++ 1.19/ndb/src/kernel/blocks/ERROR_codes.txt	2006-06-08 16:13:08 +02:00
@@ -5,7 +5,7 @@ Next DBACC 3002
 Next DBTUP 4013
 Next DBLQH 5043
 Next DBDICT 6007
-Next DBDIH 7174
+Next DBDIH 7175
 Next DBTC 8037
 Next CMVMI 9000
 Next BACKUP 10022
@@ -311,6 +311,8 @@ Test Crashes in handling node restarts
 7132: Crash when receiving START_COPYCONF in starting node
 
 7170: Crash when receiving START_PERMREF (InitialStartRequired)
+
+7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
 
 DICT:
 6000  Crash during NR when receiving DICTSTARTREQ

--- 1.61/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2006-02-10 10:10:49 +01:00
+++ 1.62/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2006-06-08 16:13:08 +02:00
@@ -203,6 +203,11 @@ void Dbdict::execCONTINUEB(Signal* signa
     sendGetTabResponse(signal);
     break;
 
+  case ZDICT_LOCK_POLL:
+    jam();
+    checkDictLockQueue(signal);
+    break;
+
   default :
     ndbrequire(false);
     break;
@@ -1208,7 +1213,9 @@ Dbdict::Dbdict(const class Configuration
   c_opCreateTrigger(c_opRecordPool),
   c_opDropTrigger(c_opRecordPool),
   c_opAlterTrigger(c_opRecordPool),
-  c_opRecordSequence(0)
+  c_opRecordSequence(0),
+  c_dictLockQueue(c_dictLockPool),
+  c_dictLockPoll(false)
 {
   BLOCK_CONSTRUCTOR(Dbdict);
   
@@ -1352,6 +1359,9 @@ Dbdict::Dbdict(const class Configuration
   addRecSignal(GSN_DROP_TAB_CONF, &Dbdict::execDROP_TAB_CONF);
 
   addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Dbdict::execBACKUP_FRAGMENT_REQ);
+
+  addRecSignal(GSN_DICT_LOCK_REQ, &Dbdict::execDICT_LOCK_REQ);
+  addRecSignal(GSN_DICT_UNLOCK_ORD, &Dbdict::execDICT_UNLOCK_ORD);
 }//Dbdict::Dbdict()
 
 Dbdict::~Dbdict() 
@@ -1764,6 +1774,8 @@ void Dbdict::execREAD_CONFIG_REQ(Signal*
   c_opCreateTrigger.setSize(8);
   c_opDropTrigger.setSize(8);
   c_opAlterTrigger.setSize(8);
+
+  c_dictLockPool.setSize(32);
   
   // Initialize schema file copies
   c_schemaFile[0].schemaPage =
@@ -2821,6 +2833,11 @@ void Dbdict::execNODE_FAILREP(Signal* si
     c_blockState = BS_NODE_FAILURE;
     ok = true;
     break;
+  case BS_NODE_RESTART:
+    jam();
+    ok = true;
+    removeStaleDictLocks(signal, theFailedNodes);
+    break;
   }
   ndbrequire(ok);
   
@@ -2911,6 +2928,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* sig
       break;
     }
     
+    if (c_blockState == BS_NODE_RESTART){
+      jam();
+      parseRecord.errorCode = CreateTableRef::BusyWithNR;
+      break;
+    }
+    
     if (c_blockState != BS_IDLE){
       jam();
       parseRecord.errorCode = CreateTableRef::Busy;
@@ -3060,6 +3083,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* sign
     return;
   }
   
+  if(c_blockState == BS_NODE_RESTART){
+    jam();
+    alterTableRef(signal, req, AlterTableRef::BusyWithNR);
+    return;
+  }
+  
   if(c_blockState != BS_IDLE){
     jam();
     alterTableRef(signal, req, AlterTableRef::Busy);
@@ -5372,6 +5401,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signa
     return;
   }
 
+  if(c_blockState == BS_NODE_RESTART){
+    jam();
+    dropTableRef(signal, req, DropTableRef::BusyWithNR);
+    return;
+  }
+
   if(c_blockState != BS_IDLE){
     jam();
     dropTableRef(signal, req, DropTableRef::Busy);
@@ -12169,6 +12204,214 @@ Dbdict::getIndexAttrMask(TableRecordPtr 
     itAttr = iaRec->nextAttrInTable;
   }
 }
+
+// DICT lock master
+
+const Dbdict::DictLockType*
+Dbdict::getDictLockType(Uint32 lockType)
+{
+  static DictLockType lt[] = {
+    { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" }
+  };
+  for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) {
+    if (lt[i].lockType == lockType)
+      return &lt[i];
+  }
+  return NULL;
+}
+
+void
+Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text)
+{
+  infoEvent("DICT: %s %u for %s",
+      text,
+      (unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
+}
+
+void
+Dbdict::execDICT_LOCK_REQ(Signal* signal)
+{
+  jamEntry();
+  const DictLockReq* req = (const DictLockReq*)&signal->theData[0];
+
+  if (getOwnNodeId() != c_masterNodeId) {
+    jam();
+    sendDictLockRef(signal, *req, DictLockRef::NotMaster);
+    return;
+  }
+
+  const DictLockType* lt = getDictLockType(req->lockType);
+  if (lt == NULL) {
+    jam();
+    sendDictLockRef(signal, *req, DictLockRef::InvalidLockType);
+    return;
+  }
+
+  DictLockPtr lockPtr;
+  if (! c_dictLockQueue.seize(lockPtr)) {
+    jam();
+    sendDictLockRef(signal, *req, DictLockRef::TooManyRequests);
+    return;
+  }
+
+  lockPtr.p->req = *req;
+  lockPtr.p->locked = false;
+  lockPtr.p->lt = lt;
+
+  checkDictLockQueue(signal);
+
+  if (! lockPtr.p->locked)
+    sendDictLockInfoEvent(lockPtr, "lock request by node");
+}
+
+void
+Dbdict::checkDictLockQueue(Signal* signal)
+{
+  DictLockPtr lockPtr;
+
+  do {
+    if (! c_dictLockQueue.first(lockPtr)) {
+      jam();
+      setDictLockPoll(signal, false);
+      return;
+    }
+
+    if (lockPtr.p->locked) {
+      jam();
+      ndbrequire(c_blockState == lockPtr.p->lt->blockState);
+      break;
+    }
+
+    if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) {
+      jam();
+      break;
+    }
+
+    ndbrequire(c_blockState == BS_IDLE);
+    lockPtr.p->locked = true;
+    c_blockState = lockPtr.p->lt->blockState;
+    sendDictLockConf(signal, lockPtr);
+
+    sendDictLockInfoEvent(lockPtr, "locked by node");
+  } while (0);
+
+  // poll while first request is open
+  // this routine is called again when it is removed for any reason
+
+  bool on = ! lockPtr.p->locked;
+  setDictLockPoll(signal, on);
+}
+
+void
+Dbdict::execDICT_UNLOCK_ORD(Signal* signal)
+{
+  jamEntry();
+  const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0];
+
+  DictLockPtr lockPtr;
+  c_dictLockQueue.getPtr(lockPtr, ord->lockPtr);
+  ndbrequire(lockPtr.p->lt->lockType == ord->lockType);
+
+  if (lockPtr.p->locked) {
+    jam();
+    ndbrequire(c_blockState == lockPtr.p->lt->blockState);
+    ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
+    ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
+
+    c_blockState = BS_IDLE;
+    sendDictLockInfoEvent(lockPtr, "unlocked by node");
+  } else {
+    sendDictLockInfoEvent(lockPtr, "lock request removed by node");
+  }
+
+  c_dictLockQueue.release(lockPtr);
+
+  checkDictLockQueue(signal);
+}
+
+void
+Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr)
+{
+  DictLockConf* conf = (DictLockConf*)&signal->theData[0];
+  const DictLockReq& req = lockPtr.p->req;
+
+  conf->userPtr = req.userPtr;
+  conf->lockType = req.lockType;
+  conf->lockPtr = lockPtr.i;
+
+  sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal,
+      DictLockConf::SignalLength, JBB);
+}
+
+void
+Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode)
+{
+  DictLockRef* ref = (DictLockRef*)&signal->theData[0];
+
+  ref->userPtr = req.userPtr;
+  ref->lockType = req.lockType;
+  ref->errorCode = errorCode;
+
+  sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal,
+      DictLockRef::SignalLength, JBB);
+}
+
+// control polling
+
+void
+Dbdict::setDictLockPoll(Signal* signal, bool on)
+{
+  if (on) {
+    jam();
+    signal->theData[0] = ZDICT_LOCK_POLL;
+    sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
+  }
+
+  if (c_dictLockPoll != on) {
+    jam();
+#ifdef VM_TRACE
+    infoEvent("DICT: lock polling %s", on ? "On" : "Off");
+#endif
+    c_dictLockPoll = on;
+  }
+}
+
+// NF handling
+
+void
+Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
+{
+  DictLockPtr loopPtr;
+  c_dictLockQueue.first(loopPtr);
+
+  while (loopPtr.i != RNIL) {
+    jam();
+    DictLockPtr lockPtr = loopPtr;
+    c_dictLockQueue.next(loopPtr);
+
+    Uint32 nodeId = refToNode(lockPtr.p->req.userRef);
+
+    if (NodeBitmask::get(theFailedNodes, nodeId)) {
+      if (lockPtr.p->locked) {
+        jam();
+        ndbrequire(c_blockState == lockPtr.p->lt->blockState);
+        ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
+        ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
+
+        c_blockState = BS_IDLE;
+
+        sendDictLockInfoEvent(lockPtr, "remove lock by failed node");
+      } else {
+        sendDictLockInfoEvent(lockPtr, "remove lock request by failed node");
+      }
+
+      c_dictLockQueue.release(lockPtr);
+    }
+  }
+
+  checkDictLockQueue(signal);
+}
+
 
 /* **************************************************************** */
 /* ---------------------------------------------------------------- */

--- 1.16/ndb/src/kernel/blocks/dbdict/Dbdict.hpp	2005-09-15 15:00:32 +02:00
+++ 1.17/ndb/src/kernel/blocks/dbdict/Dbdict.hpp	2006-06-08 16:13:08 +02:00
@@ -26,6 +26,7 @@
 #include <pc.hpp>
 #include <ArrayList.hpp>
 #include <DLHashTable.hpp>
+#include <DLFifoList.hpp>
 #include <CArray.hpp>
 #include <KeyTable2.hpp>
 #include <SimulatedBlock.hpp>
@@ -50,6 +51,7 @@
 #include <signaldata/CreateTrig.hpp>
 #include <signaldata/DropTrig.hpp>
 #include <signaldata/AlterTrig.hpp>
+#include <signaldata/DictLock.hpp>
 #include "SchemaFile.hpp"
 #include <blocks/mutexes.hpp>
 #include <SafeCounter.hpp>
@@ -63,6 +65,7 @@
 /*--------------------------------------------------------------*/
 #define ZPACK_TABLE_INTO_PAGES 0
 #define ZSEND_GET_TAB_RESPONSE 3
+#define ZDICT_LOCK_POLL 4
 
 
 /*--------------------------------------------------------------*/
@@ -587,6 +590,9 @@ private:
   void execALTER_TAB_CONF(Signal* signal);
   bool check_ndb_versions() const;
 
+  void execDICT_LOCK_REQ(Signal* signal);
+  void execDICT_UNLOCK_ORD(Signal* signal);
+
   /*
    *  2.4 COMMON STORED VARIABLES
    */
@@ -817,12 +823,43 @@ private:
   // State variables
   /* ----------------------------------------------------------------------- */
   
+#ifndef ndb_dbdict_log_block_state
   enum BlockState {
     BS_IDLE = 0,
     BS_CREATE_TAB = 1,
     BS_BUSY = 2,
-    BS_NODE_FAILURE = 3
+    BS_NODE_FAILURE = 3,
+    BS_NODE_RESTART = 4
+  };
+#else // quick hack to log changes
+  enum {
+    BS_IDLE = 0,
+    BS_CREATE_TAB = 1,
+    BS_BUSY = 2,
+    BS_NODE_FAILURE = 3,
+    BS_NODE_RESTART = 4
+  };
+  struct BlockState;
+  friend struct BlockState;
+  struct BlockState {
+    BlockState() :
+      m_value(BS_IDLE) {
+    }
+    BlockState(int value) :
+      m_value(value) {
+    }
+    operator int() const {
+      return m_value;
+    }
+    BlockState& operator=(const BlockState& bs) {
+      Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT);
+      dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value);
+      m_value = bs.m_value;
+      return *this;
+    }
+    int m_value;
   };
+#endif
   BlockState c_blockState;
 
   struct PackTable {
@@ -1721,6 +1758,64 @@ private:
 
   // Unique key for operation  XXX move to some system table
   Uint32 c_opRecordSequence;
+
+  /*
+   * Master DICT can be locked in 2 mutually exclusive ways:
+   *
+   * 1) for schema ops, via operation records
+   * 2) against schema ops, via a lock queue
+   *
+   * Current use of 2) is by a starting node, to prevent schema ops
+   * until started.  The ops are refused (BlockState != BS_IDLE),
+   * not queued.
+   *
+   * Master failure is not handled, in node start case the starting
+   * node will crash too anyway.  Use lock table in future..
+   *
+   * The lock queue is "serial" but other behaviour is possible
+   * by checking lock types e.g. to allow parallel node starts.
+   *
+   * Checking release of last op record is not convenient with
+   * current structure (5.0).  Instead we poll via continueB.
+   *
+   * XXX only table ops check BlockState
+   */
+
+  struct DictLockType {
+    DictLockReq::LockType lockType;
+    BlockState blockState;
+    const char* text;
+  };
+
+  struct DictLockRecord {
+    DictLockReq req;
+    const DictLockType* lt;
+    bool locked;
+  union {
+    Uint32 nextPool;
+    Uint32 nextList;
+    };
+    Uint32 prevList;
+  };
+
+  typedef Ptr<DictLockRecord> DictLockPtr;
+  ArrayPool<DictLockRecord> c_dictLockPool;
+  DLFifoList<DictLockRecord> c_dictLockQueue;
+  bool c_dictLockPoll;
+
+  static const DictLockType* getDictLockType(Uint32 lockType);
+  void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text);
+
+  void checkDictLockQueue(Signal* signal);
+  void sendDictLockConf(Signal* signal, DictLockPtr lockPtr);
+  void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode);
+
+  // control polling i.e. continueB loop
+  void setDictLockPoll(Signal* signal, bool on);
+
+  // NF handling
+  void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes);
+
 
   // Statement blocks
 

--- 1.10/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2006-03-22 15:10:34 +01:00
+++ 1.11/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2006-06-08 16:13:08 +02:00
@@ -718,6 +718,9 @@ private:
   void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr);
   void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId);
 
+  void execDICT_LOCK_CONF(Signal* signal);
+  void execDICT_LOCK_REF(Signal* signal);
+
   // Statement blocks
 //------------------------------------
 // Methods that send signals
@@ -935,6 +938,7 @@ private:
   void initialStartCompletedLab(Signal *);
   void allNodesLcpCompletedLab(Signal *);
   void nodeRestartPh2Lab(Signal *);
+  void nodeRestartPh2Lab2(Signal *);
   void initGciFilesLab(Signal *);
   void dictStartConfLab(Signal *);
   void nodeDictStartConfLab(Signal *);
@@ -1594,6 +1598,30 @@ private:
    * Reply from nodeId
    */
   void startInfoReply(Signal *, Uint32 nodeId);
+
+  /*
+   * Lock master DICT.  Only current use is by starting node
+   * during NR.  A pool of slave records is convenient anyway.
+   */
+  struct DictLockSlaveRecord {
+    Uint32 lockPtr;
+    Uint32 lockType;
+    bool locked;
+    Callback callback;
+    Uint32 nextPool;
+  };
+
+  typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr;
+  ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool;
+
+  // slave
+  void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c);
+  void recvDictLockConf(Signal* signal);
+  void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI);
+
+  // NR
+  Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR
+  void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret);
 };
 
 #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)

--- 1.10/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2006-04-06 11:43:28 +02:00
+++ 1.11/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2006-06-08 16:13:08 +02:00
@@ -66,6 +66,9 @@ void Dbdih::initData() 
   waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE);
   waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE);
 
+  c_dictLockSlavePool.setSize(1); // assert single usage
+  c_dictLockSlavePtrI_nodeRestart = RNIL;
+
   cgcpOrderBlocked = 0;
   c_lcpState.ctcCounter = 0;
   cwaitLcpSr       = false;
@@ -263,6 +266,9 @@ Dbdih::Dbdih(const class Configuration &
 
   addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, 
 	       &Dbdih::execCREATE_FRAGMENTATION_REQ);
+
+  addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF);
+  addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF);
 
   apiConnectRecord = 0;  
   connectRecord = 0;  

--- 1.49/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-06-01 08:28:55 +02:00
+++ 1.50/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-06-08 16:13:09 +02:00
@@ -67,6 +67,7 @@
 #include <signaldata/CreateFragmentation.hpp>
 #include <signaldata/LqhFrag.hpp>
 #include <signaldata/FsOpenReq.hpp>
+#include <signaldata/DictLock.hpp>
 #include <DebuggerNames.hpp>
 
 #include <EventLogger.hpp>
@@ -544,7 +545,7 @@ void Dbdih::execCONTINUEB(Signal* signal
     break;
   case DihContinueB::ZSTART_PERMREQ_AGAIN:
     jam();
-    nodeRestartPh2Lab(signal);
+    nodeRestartPh2Lab2(signal);
     return;
     break;
   case DihContinueB::SwitchReplica:
@@ -1284,6 +1285,7 @@ void Dbdih::execNDB_STTOR(Signal* signal
     case NodeState::ST_INITIAL_NODE_RESTART:
     case NodeState::ST_NODE_RESTART:
       jam();
+
       /***********************************************************************
        * When starting nodes while system is operational we must be controlled
        * by the master since only one node restart is allowed at a time. 
@@ -1294,7 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal
       req->startingRef = reference();
       req->startingVersion = 0; // Obsolete
       sendSignal(cmasterdihref, GSN_START_MEREQ, signal, 
-		 StartMeReq::SignalLength, JBB);
+                 StartMeReq::SignalLength, JBB);
       return;
     }
     ndbrequire(false);
@@ -1354,6 +1356,24 @@ void Dbdih::execNDB_STTOR(Signal* signal
     }
     ndbrequire(false);
     break;
+  case ZNDB_SPH7:
+    jam();
+    switch (typestart) {
+    case NodeState::ST_INITIAL_START:
+    case NodeState::ST_SYSTEM_RESTART:
+      jam();
+      ndbsttorry10Lab(signal, __LINE__);
+      return;
+    case NodeState::ST_NODE_RESTART:
+    case NodeState::ST_INITIAL_NODE_RESTART:
+      jam();
+      sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
+      c_dictLockSlavePtrI_nodeRestart = RNIL;
+      ndbsttorry10Lab(signal, __LINE__);
+      return;
+    }
+    ndbrequire(false);
+    break;
   default:
     jam();
     ndbsttorry10Lab(signal, __LINE__);
@@ -1564,6 +1584,31 @@ void Dbdih::execREAD_NODESCONF(Signal* s
 /*---------------------------------------------------------------------------*/
 void Dbdih::nodeRestartPh2Lab(Signal* signal) 
 {
+  /*
+   * Lock master DICT to avoid metadata operations during INR/NR.
+   * Done just before START_PERMREQ.
+   *
+   * It would be more elegant to do this just before START_MEREQ.
+   * The problem is, on INR we end up in massive invalidateNodeLCP
+   * which is not fully protected against metadata ops.
+   */
+  ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
+
+  Uint32 lockType = DictLockReq::NodeRestartLock;
+  Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
+  sendDictLockReq(signal, lockType, c);
+}
+
+void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
+{
+  ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
+  c_dictLockSlavePtrI_nodeRestart = data;
+
+  nodeRestartPh2Lab2(signal);
+}
+
+void Dbdih::nodeRestartPh2Lab2(Signal* signal)
+{
   /*------------------------------------------------------------------------*/
   // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
   // RUNNING SYSTEM.
@@ -1574,7 +1619,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* si
   req->nodeId    = cownNodeId;
   req->startType = cstarttype;
   sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
-}//Dbdih::nodeRestartPh2Lab()
+}
 
 void Dbdih::execSTART_PERMCONF(Signal* signal) 
 {
@@ -1696,12 +1741,12 @@ void Dbdih::execSTART_PERMREQ(Signal* si
   const BlockReference retRef = req->blockRef;
   const Uint32 nodeId   = req->nodeId;
   const Uint32 typeStart = req->startType;
-  
   CRASH_INSERTION(7122);
   ndbrequire(isMaster());
   ndbrequire(refToNode(retRef) == nodeId);
   if ((c_nodeStartMaster.activeState) ||
-      (c_nodeStartMaster.wait != ZFALSE)) {
+      (c_nodeStartMaster.wait != ZFALSE) ||
+      ERROR_INSERTED_CLEAR(7174)) {
     jam();
     signal->theData[0] = nodeId;
     signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
@@ -10448,6 +10493,10 @@ void Dbdih::crashSystemAtGcpStop(Signal*
 	     c_copyGCIMaster.m_copyReason,
 	     c_copyGCIMaster.m_waiting);
     break;
+  case GCP_READY: // shut up lint
+  case GCP_PREPARE_SENT:
+  case GCP_COMMIT_SENT:
+    break;
   }
   
   ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
@@ -14638,4 +14687,78 @@ Dbdih::NodeRecord::NodeRecord(){
   useInTransactions = false;
   copyCompleted = false;
   allowNodeStart = true;
+}
+
+// DICT lock slave
+
+void
+Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
+{
+  DictLockReq* req = (DictLockReq*)&signal->theData[0];
+  DictLockSlavePtr lockPtr;
+
+  c_dictLockSlavePool.seize(lockPtr);
+  ndbrequire(lockPtr.i != RNIL);
+
+  req->userPtr = lockPtr.i;
+  req->lockType = lockType;
+  req->userRef = reference();
+
+  lockPtr.p->lockPtr = RNIL;
+  lockPtr.p->lockType = lockType;
+  lockPtr.p->locked = false;
+  lockPtr.p->callback = c;
+
+  BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
+  sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
+      DictLockReq::SignalLength, JBB);
+}
+
+void
+Dbdih::execDICT_LOCK_CONF(Signal* signal)
+{
+  jamEntry();
+  recvDictLockConf(signal);
+}
+
+void
+Dbdih::execDICT_LOCK_REF(Signal* signal)
+{
+  jamEntry();
+  ndbrequire(false);
+}
+
+void
+Dbdih::recvDictLockConf(Signal* signal)
+{
+  const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
+
+  DictLockSlavePtr lockPtr;
+  c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
+  
+  lockPtr.p->lockPtr = conf->lockPtr;
+  ndbrequire(lockPtr.p->lockType == conf->lockType);
+  ndbrequire(lockPtr.p->locked == false);
+  lockPtr.p->locked = true;
+
+  lockPtr.p->callback.m_callbackData = lockPtr.i;
+  execute(signal, lockPtr.p->callback, 0);
+}
+
+void
+Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
+{
+  DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
+
+  DictLockSlavePtr lockPtr;
+  c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
+
+  ord->lockPtr = lockPtr.p->lockPtr;
+  ord->lockType = lockPtr.p->lockType;
+
+  c_dictLockSlavePool.release(lockPtr);
+
+  BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
+  sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal,
+      DictUnlockOrd::SignalLength, JBB);
 }

--- 1.31/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2006-04-07 10:29:45 +02:00
+++ 1.32/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2006-06-08 16:12:47 +02:00
@@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* si
   {
     jam();
     CRASH_INSERTION(932);
-    BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);    
+    BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);    
     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
     ndbrequire(false);
   }
@@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* si
     ndbrequire(false);
   case ZAPI_INACTIVE:
   {
-    BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);    
+    BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);    
     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
     ndbrequire(false);
   }

--- 1.60/ndb/src/kernel/main.cpp	2006-02-07 18:09:44 +01:00
+++ 1.61/ndb/src/kernel/main.cpp	2006-06-08 16:06:44 +02:00
@@ -420,6 +420,10 @@ int main(int argc, char** argv)
   FILE * signalLog = fopen(buf, "a");
   globalSignalLoggers.setOwnNodeId(globalData.ownId);
   globalSignalLoggers.setOutputStream(signalLog);
+#if 0 // to log startup
+  globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH");
+  globalData.testOn = 1;
+#endif
 #endif
   
   catchsigs(false);

--- 1.3/ndb/src/kernel/vm/DLFifoList.hpp	2004-06-23 23:33:11 +02:00
+++ 1.4/ndb/src/kernel/vm/DLFifoList.hpp	2006-06-08 16:07:46 +02:00
@@ -115,6 +115,13 @@ public:
    */
   bool hasNext(const Ptr<T> &) const;
 
+  /**
+   * Check if prev exists i.e. this is not first
+   *
+   * NOTE ptr must be both p & i
+   */
+  bool hasPrev(const Ptr<T> &) const;
+
   Uint32 noOfElements() const {
     Uint32 c = 0;
     Uint32 i = head.firstItem;
@@ -355,6 +362,13 @@ inline
 bool
 DLFifoList<T>::hasNext(const Ptr<T> & p) const {
   return p.p->nextList != RNIL;
+}
+
+template <class T>
+inline
+bool
+DLFifoList<T>::hasPrev(const Ptr<T> & p) const {
+  return p.p->prevList != RNIL;
 }
 
 #endif

--- 1.9/ndb/src/kernel/vm/pc.hpp	2005-10-03 20:04:27 +02:00
+++ 1.10/ndb/src/kernel/vm/pc.hpp	2006-06-08 16:08:33 +02:00
@@ -125,11 +125,13 @@
 #ifdef ERROR_INSERT
 #define ERROR_INSERT_VARIABLE UintR cerrorInsert
 #define ERROR_INSERTED(x) (cerrorInsert == (x))
+#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false)
 #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x
 #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0
 #else
 #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used
 #define ERROR_INSERTED(x) false
+#define ERROR_INSERTED_CLEAR(x) false
 #define SET_ERROR_INSERT_VALUE(x)
 #define CLEAR_ERROR_INSERT_VALUE
 #endif

--- 1.23/ndb/test/ndbapi/testDict.cpp	2006-05-17 12:30:57 +02:00
+++ 1.24/ndb/test/ndbapi/testDict.cpp	2006-06-08 16:07:16 +02:00
@@ -1551,6 +1551,282 @@ end:
   return result;
 }
 
+// NFNR
+
+// Restarter controls dict ops : 1-run 2-pause 3-stop
+// synced by polling...
+
+static bool
+send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd)
+{
+  ctx->setProperty("DictOps_CMD", cmd);
+  while (1) {
+    if (ctx->isTestStopped())
+      return false;
+    if (ctx->getProperty("DictOps_ACK") == cmd)
+      break;
+    NdbSleep_MilliSleep(100);
+  }
+  return true;
+}
+
+static bool
+recv_dict_ops_run(NDBT_Context* ctx)
+{
+  while (1) {
+    if (ctx->isTestStopped())
+      return false;
+    Uint32 cmd = ctx->getProperty("DictOps_CMD");
+    ctx->setProperty("DictOps_ACK", cmd);
+    if (cmd == 1)
+      break;
+    if (cmd == 3)
+      return false;
+    NdbSleep_MilliSleep(100);
+  }
+  return true;
+}
+
+int
+runRestarts(NDBT_Context* ctx, NDBT_Step* step)
+{
+  static int err_master[] = {   // non-crashing
+    0,
+    7174        // send one fake START_PERMREF
+  };
+  static int err_node[] = {
+    0,
+    7121,       // crash on START_PERMCONF
+    7130        // crash on START_MECONF
+  };
+  const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]);
+  const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]);
+
+  myRandom48Init(NdbTick_CurrentMillisecond());
+  NdbRestarter restarter;
+  int result = NDBT_OK;
+  const int loops = ctx->getNumLoops();
+
+  for (int l = 0; l < loops && result == NDBT_OK; l++) {
+    g_info << "1: === loop " << l << " ===" << endl;
+
+    // assuming 2-way replicated
+
+    int numnodes = restarter.getNumDbNodes();
+    CHECK(numnodes >= 1);
+    if (numnodes == 1)
+      break;
+
+    int masterNodeId = restarter.getMasterNodeId();
+    CHECK(masterNodeId != -1);
+
+    // for more complex cases need more restarter support methods
+
+    int nodeIdList[2] = { 0, 0 };
+    int nodeIdCnt = 0;
+
+    if (numnodes >= 2) {
+      int rand = myRandom48(numnodes);
+      int nodeId = restarter.getRandomNotMasterNodeId(rand);
+      CHECK(nodeId != -1);
+      nodeIdList[nodeIdCnt++] = nodeId;
+    }
+
+    if (numnodes >= 4) {
+      int rand = myRandom48(numnodes);
+      int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand);
+      CHECK(nodeId != -1);
+      if (nodeId != masterNodeId)
+        nodeIdList[nodeIdCnt++] = nodeId;
+    }
+
+    g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl;
+
+    const unsigned maxsleep = 2000; //ms
+
+    bool NF_ops = ctx->getProperty("Restart_NF_ops");
+    uint NF_type = ctx->getProperty("Restart_NF_type");
+    bool NR_ops = ctx->getProperty("Restart_NR_ops");
+    bool NR_error = ctx->getProperty("Restart_NR_error");
+
+    g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl;
+    if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2))
+      break;
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    {
+      int i = 0;
+      while (i < nodeIdCnt) {
+        int nodeId = nodeIdList[i++];
+
+        bool nostart = true;
+        bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2);
+        bool initial = myRandom48(2);
+
+        char flags[40];
+        strcpy(flags, "flags: nostart");
+        if (abort)
+          strcat(flags, ",abort");
+        if (initial)
+          strcat(flags, ",initial");
+
+        g_info << "1: restart " << nodeId << " " << flags << endl;
+        CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0);
+      }
+    }
+
+    g_info << "1: wait for nostart" << endl;
+    CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0);
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl;
+    if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2))
+      break;
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "1: start nodes" << endl;
+    CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0);
+
+    if (NR_error) {
+      {
+        int rand = myRandom48(err_master_cnt);
+        int err = err_master[rand];
+        if (err != 0) {
+          g_info << "1: insert master error " << err << endl;
+          CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0);
+        }
+      }
+
+      // limitation: cannot have 2 node restarts and crash_insert
+      // one node may die for real (NF during startup)
+
+      int i = 0;
+      while (i < nodeIdCnt && nodeIdCnt == 1) {
+        int nodeId = nodeIdList[i++];
+
+        int rand = myRandom48(err_node_cnt);
+        int err = err_node[rand];
+        if (err != 0) {
+          g_info << "1: insert node " << nodeId << " error " << err << endl;
+          CHECK(restarter.insertErrorInNode(nodeId, err) == 0);
+        }
+      }
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "1: wait cluster started" << endl;
+    CHECK(restarter.waitClusterStarted() == 0);
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "1: restart done" << endl;
+  }
+
+  g_info << "1: stop dict ops" << endl;
+  send_dict_ops_cmd(ctx, 3);
+
+  return result;
+}
+
+int
+runDictOps(NDBT_Context* ctx, NDBT_Step* step)
+{
+  myRandom48Init(NdbTick_CurrentMillisecond());
+  int result = NDBT_OK;
+
+  for (int l = 0; result == NDBT_OK; l++) {
+    if (! recv_dict_ops_run(ctx))
+      break;
+    
+    g_info << "2: === loop " << l << " ===" << endl;
+
+    Ndb* pNdb = GETNDB(step);
+    NdbDictionary::Dictionary* pDic = pNdb->getDictionary();
+    const NdbDictionary::Table* pTab = ctx->getTab();
+    const char* tabName = pTab->getName();
+
+    const unsigned long maxsleep = 100; //ms
+
+    g_info << "2: create table" << endl;
+    {
+      uint count = 0;
+    try_create:
+      count++;
+      if (pDic->createTable(*pTab) != 0) {
+        const NdbError err = pDic->getNdbError();
+        if (count == 1)
+          g_err << "2: " << tabName << ": create failed: " << err << endl;
+        if (err.code != 711) {
+          result = NDBT_FAILED;
+          break;
+        }
+        NdbSleep_MilliSleep(myRandom48(maxsleep));
+        goto try_create;
+      }
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "2: verify create" << endl;
+    const NdbDictionary::Table* pTab2 = pDic->getTable(tabName);
+    if (pTab2 == NULL) {
+      const NdbError err = pDic->getNdbError();
+      g_err << "2: " << tabName << ": verify create: " << err << endl;
+      result = NDBT_FAILED;
+      break;
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    // replace by the Retrieved table
+    pTab = pTab2;
+
+    int records = myRandom48(ctx->getNumRecords());
+    g_info << "2: load " << records << " records" << endl;
+    HugoTransactions hugoTrans(*pTab);
+    if (hugoTrans.loadTable(pNdb, records) != 0) {
+      // XXX get error code from hugo
+      g_err << "2: " << tabName << ": load failed" << endl;
+      result = NDBT_FAILED;
+      break;
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "2: drop" << endl;
+    {
+      uint count = 0;
+    try_drop:
+      count++;
+      if (pDic->dropTable(tabName) != 0) {
+        const NdbError err = pDic->getNdbError();
+        if (count == 1)
+          g_err << "2: " << tabName << ": drop failed: " << err << endl;
+        if (err.code != 711) {
+          result = NDBT_FAILED;
+          break;
+        }
+        NdbSleep_MilliSleep(myRandom48(maxsleep));
+        goto try_drop;
+      }
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+
+    g_info << "2: verify drop" << endl;
+    const NdbDictionary::Table* pTab3 = pDic->getTable(tabName);
+    if (pTab3 != NULL) {
+      g_err << "2: " << tabName << ": verify drop: table exists" << endl;
+      result = NDBT_FAILED;
+      break;
+    }
+    if (pDic->getNdbError().code != 709) {
+      const NdbError err = pDic->getNdbError();
+      g_err << "2: " << tabName << ": verify drop: " << err << endl;
+      result = NDBT_FAILED;
+      break;
+    }
+    NdbSleep_MilliSleep(myRandom48(maxsleep));
+  }
+
+  return result;
+}
+
 NDBT_TESTSUITE(testDict);
 TESTCASE("CreateAndDrop", 
 	 "Try to create and drop the table loop number of times\n"){
@@ -1655,6 +1931,34 @@ TESTCASE("FailAddFragment",
          "Fail add fragment or attribute in ACC or TUP or TUX\n"){
   INITIALIZER(runFailAddFragment);
 }
+TESTCASE("Restart_NF1",
+         "DICT ops during node graceful shutdown (not master)"){
+  TC_PROPERTY("Restart_NF_ops", 1);
+  TC_PROPERTY("Restart_NF_type", 1);
+  STEP(runRestarts);
+  STEP(runDictOps);
+}
+TESTCASE("Restart_NF2",
+         "DICT ops during node shutdown abort (not master)"){
+  TC_PROPERTY("Restart_NF_ops", 1);
+  TC_PROPERTY("Restart_NF_type", 2);
+  STEP(runRestarts);
+  STEP(runDictOps);
+}
+TESTCASE("Restart_NR1",
+         "DICT ops during node startup (not master)"){
+  TC_PROPERTY("Restart_NR_ops", 1);
+  STEP(runRestarts);
+  STEP(runDictOps);
+}
+TESTCASE("Restart_NR2",
+         "DICT ops during node startup with crash inserts (not master)"){
+  TC_PROPERTY("Restart_NR_ops", 1);
+  TC_PROPERTY("Restart_NR_error", 1);
+  STEP(runRestarts);
+  STEP(runDictOps);
+}
+
 NDBT_TESTSUITE_END(testDict);
 
 int main(int argc, const char** argv){
--- New file ---
+++ ndb/include/kernel/signaldata/DictLock.hpp	06/06/08 15:21:00
/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#ifndef DICT_LOCK_HPP
#define DICT_LOCK_HPP

#include "SignalData.hpp"

// see comments in Dbdict.hpp

class DictLockReq {
  friend class Dbdict;
  friend class Dbdih;
public:
  STATIC_CONST( SignalLength = 3 );
  enum LockType {
    NoLock = 0,
    NodeRestartLock = 1
  };
private:
  Uint32 userPtr;
  Uint32 lockType;
  Uint32 userRef;
};

class DictLockConf {
  friend class Dbdict;
  friend class Dbdih;
public:
  STATIC_CONST( SignalLength = 3 );
private:
  Uint32 userPtr;
  Uint32 lockType;
  Uint32 lockPtr;
};

class DictLockRef {
  friend class Dbdict;
  friend class Dbdih;
public:
  STATIC_CONST( SignalLength = 3 );
  enum ErrorCode {
    NotMaster = 1,
    InvalidLockType = 2,
    TooManyRequests = 3
  };
private:
  Uint32 userPtr;
  Uint32 lockType;
  Uint32 errorCode;
};

class DictUnlockOrd {
  friend class Dbdict;
  friend class Dbdih;
public:
  STATIC_CONST( SignalLength = 2 );
private:
  Uint32 lockPtr;
  Uint32 lockType;
};

#endif


--- 1.43/ndb/src/ndbapi/ndberror.c	2005-12-14 10:57:06 +01:00
+++ 1.44/ndb/src/ndbapi/ndberror.c	2006-06-08 16:13:09 +02:00
@@ -325,6 +325,7 @@ ErrorBundle ErrorCodes[] = {
    * SchemaError
    */
   { 701,  SE, "System busy with other schema operation" },
+  { 711,  SE, "System busy with node restart, schema operations not allowed" },
   { 703,  SE, "Invalid table format" },
   { 704,  SE, "Attribute name too long" },
   { 705,  SE, "Table name too long" },
Thread
bk commit into 5.0 tree (pekka:1.2168) BUG#18781pekka8 Jun