List:Commits« Previous MessageNext Message »
From:tomas Date:March 27 2006 4:53pm
Subject:bk commit into 5.1 tree (tomas:1.2232) BUG#18491
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2232 06/03/27 18:53:23 tomas@stripped +5 -0
  Bug #18491 cluster: node restart with pending dropeed events causes failed restart

  storage/ndb/test/run-test/daily-devel-tests.txt
    1.27 06/03/27 18:53:16 tomas@stripped +5 -0
    Bug #18491 cluster: node restart with pending dropeed events causes failed restart

  storage/ndb/test/ndbapi/test_event.cpp
    1.19 06/03/27 18:53:16 tomas@stripped +145 -1
    Bug #18491 cluster: node restart with pending dropeed events causes failed restart

  storage/ndb/src/ndbapi/ndberror.c
    1.57 06/03/27 18:53:16 tomas@stripped +2 -0
    Bug #18491 cluster: node restart with pending dropeed events causes failed restart

  storage/ndb/src/kernel/blocks/suma/Suma.cpp
    1.41 06/03/27 18:53:16 tomas@stripped +118 -13
    Bug #18491 cluster: node restart with pending dropeed events causes failed restart
    - recreate subscriptions in correct state (dropped if dropped...)
    - handle recreated subscribers with drooped table (tab_inforef)

  storage/ndb/include/kernel/signaldata/SumaImpl.hpp
    1.8 06/03/27 18:53:16 tomas@stripped +2 -0
    Bug #18491 cluster: node restart with pending dropeed events causes failed restart

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	poseidon.ndb.mysql.com
# Root:	/home/tomas/mysql-5.1-new

--- 1.26/storage/ndb/test/run-test/daily-devel-tests.txt	2006-01-11 09:26:04 +01:00
+++ 1.27/storage/ndb/test/run-test/daily-devel-tests.txt	2006-03-27 18:53:16 +02:00
@@ -218,6 +218,11 @@
 cmd: test_event
 args: -n Multi
 
+#
+max-time: 2500
+cmd: test_event
+args: -n CreateDropNR -l 2
+
 max-time: 600
 cmd: testBasic
 args: -n PkRead T1

--- 1.7/storage/ndb/include/kernel/signaldata/SumaImpl.hpp	2006-01-31 23:20:32 +01:00
+++ 1.8/storage/ndb/include/kernel/signaldata/SumaImpl.hpp	2006-03-27 18:53:16 +02:00
@@ -30,6 +30,7 @@
   
   friend bool printSUB_CREATE_REQ(FILE *, const Uint32 *, Uint32, Uint16);
   STATIC_CONST( SignalLength = 6 );
+  STATIC_CONST( SignalLength2 = 7 );
   
   enum SubscriptionType {
     SingleTableScan  = 1,  // 
@@ -50,6 +51,7 @@
   Uint32 subscriptionKey;
   Uint32 subscriptionType;
   Uint32 tableId;
+  Uint32 state;
 };
 
 struct SubCreateRef {

--- 1.40/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2006-03-22 18:10:50 +01:00
+++ 1.41/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2006-03-27 18:53:16 +02:00
@@ -1040,6 +1040,15 @@
   const Uint32 reportSubscribe = (flags & SubCreateReq::ReportSubscribe) ?
     Subscription::REPORT_SUBSCRIBE : 0;
   const Uint32 tableId = req.tableId;
+  Subscription::State state = (Subscription::State) req.state;
+  if (signal->getLength() != SubCreateReq::SignalLength2)
+  {
+    /*
+      api or restarted by older version
+      if restarted by old version, do the best we can
+    */
+    state = Subscription::DEFINED;
+  }
 
   Subscription key;
   key.m_subscriptionId  = subId;
@@ -1067,6 +1076,17 @@
       addTableId(req.tableId, subPtr, 0);
     }
   } else {
+    if (c_startup.m_restart_server_node_id && 
+        refToNode(subRef) != c_startup.m_restart_server_node_id)
+    {
+      /**
+       * only allow "restart_server" Suma's to come through 
+       * for restart purposes
+       */
+      jam();
+      sendSubStartRef(signal, 1405);
+      DBUG_VOID_RETURN;
+    }
     // Check that id/key is unique
     if(c_subscriptions.find(subPtr, key)) {
       jam();
@@ -1090,7 +1110,7 @@
     subPtr.p->m_options          = reportSubscribe | reportAll;
     subPtr.p->m_tableId          = tableId;
     subPtr.p->m_table_ptrI       = RNIL;
-    subPtr.p->m_state            = Subscription::DEFINED;
+    subPtr.p->m_state            = state;
     subPtr.p->n_subscribers      = 0;
     subPtr.p->m_current_sync_ptrI = RNIL;
 
@@ -1446,7 +1466,9 @@
   jam();
   DBUG_ENTER("Suma::completeOneSubscriber");
 
-  if (tabPtr.p->m_error)
+  if (tabPtr.p->m_error &&
+      (c_startup.m_restart_server_node_id == 0 ||
+       tabPtr.p->m_state != Table::DROPPED))
   {
     sendSubStartRef(signal,subbPtr,tabPtr.p->m_error,
 		    SubscriptionData::TableData);
@@ -1531,8 +1553,44 @@
 void
 Suma::execGET_TABINFOREF(Signal* signal){
   jamEntry();
-  /* ToDo handle this */
-  ndbrequire(false);
+  GetTabInfoRef* ref = (GetTabInfoRef*)signal->getDataPtr();
+  Uint32 tableId = ref->tableId;
+  Uint32 senderData = ref->senderData;
+  GetTabInfoRef::ErrorCode errorCode =
+    (GetTabInfoRef::ErrorCode) ref->errorCode;
+  int do_resend_request = 0;
+  TablePtr tabPtr;
+  c_tablePool.getPtr(tabPtr, senderData);
+  switch (errorCode)
+  {
+  case GetTabInfoRef::TableNotDefined:
+    // wrong state
+    break;
+  case GetTabInfoRef::InvalidTableId:
+    // no such table
+    break;
+  case GetTabInfoRef::Busy:
+    do_resend_request = 1;
+    break;
+  case GetTabInfoRef::TableNameTooLong:
+    ndbrequire(false);
+  }
+  if (do_resend_request)
+  {
+    GetTabInfoReq * req = (GetTabInfoReq *)signal->getDataPtrSend();
+    req->senderRef = reference();
+    req->senderData = senderData;
+    req->requestType = 
+      GetTabInfoReq::RequestById | GetTabInfoReq::LongSignalConf;
+    req->tableId = tableId;
+    sendSignalWithDelay(DBDICT_REF, GSN_GET_TABINFOREQ, signal,
+                        30, GetTabInfoReq::SignalLength);
+    return;
+  }
+  tabPtr.p->m_state = Table::DROPPED;
+  tabPtr.p->m_error = errorCode;
+  completeAllSubscribers(signal, tabPtr);
+  completeInitTable(signal, tabPtr);
 }
 
 void
@@ -2153,7 +2211,7 @@
   Subscription key; 
   key.m_subscriptionId        = req->subscriptionId;
   key.m_subscriptionKey       = req->subscriptionKey;
-  
+
   if (c_startup.m_restart_server_node_id && 
       refToNode(senderRef) != c_startup.m_restart_server_node_id)
   {
@@ -2173,13 +2231,24 @@
     DBUG_VOID_RETURN;
   }
   
-  if (subPtr.p->m_state != Subscription::DEFINED) {
+  if (subPtr.p->m_state == Subscription::LOCKED) {
     jam();
     DBUG_PRINT("info",("Locked"));
     sendSubStartRef(signal, 1411);
     DBUG_VOID_RETURN;
   }
 
+  if (subPtr.p->m_state == Subscription::DROPPED &&
+      c_startup.m_restart_server_node_id == 0) {
+    jam();
+    DBUG_PRINT("info",("Dropped"));
+    sendSubStartRef(signal, 1418);
+    DBUG_VOID_RETURN;
+  }
+
+  ndbrequire(subPtr.p->m_state == Subscription::DEFINED ||
+             c_startup.m_restart_server_node_id);
+
   SubscriberPtr subbPtr;
   if(!c_subscriberPool.seize(subbPtr)){
     jam();
@@ -2193,7 +2262,8 @@
   c_subscriber_nodes.set(refToNode(subscriberRef));
 
   // setup subscription record
-  subPtr.p->m_state = Subscription::LOCKED;
+  if (subPtr.p->m_state == Subscription::DEFINED)
+    subPtr.p->m_state = Subscription::LOCKED;
   // store these here for later use
   subPtr.p->m_senderRef  = senderRef;
   subPtr.p->m_senderData = senderData;
@@ -2241,8 +2311,14 @@
 
   SubscriptionPtr subPtr;
   c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI);
-  ndbrequire( subPtr.p->m_state == Subscription::LOCKED )
-  subPtr.p->m_state = Subscription::DEFINED;
+  ndbrequire(subPtr.p->m_state == Subscription::LOCKED ||
+             (subPtr.p->m_state == Subscription::DROPPED &&
+              c_startup.m_restart_server_node_id));
+  if (subPtr.p->m_state == Subscription::LOCKED)
+  {
+    jam();
+    subPtr.p->m_state = Subscription::DEFINED;
+  }
   subPtr.p->n_subscribers++;
 
   DBUG_PRINT("info",("subscriber: %u[%u,%u] subscription: %u[%u,%u] "
@@ -2293,8 +2369,14 @@
   SubscriptionPtr subPtr;
   c_subscriptions.getPtr(subPtr, subbPtr.p->m_subPtrI);
 
-  ndbrequire( subPtr.p->m_state == Subscription::LOCKED );
-  subPtr.p->m_state = Subscription::DEFINED;
+  ndbrequire(subPtr.p->m_state == Subscription::LOCKED ||
+             (subPtr.p->m_state == Subscription::DROPPED &&
+              c_startup.m_restart_server_node_id));
+  if (subPtr.p->m_state == Subscription::LOCKED)
+  {
+    jam();
+    subPtr.p->m_state = Subscription::DEFINED;
+  }
 
   SubStartRef * ref= (SubStartRef *)signal->getDataPtrSend();
   ref->senderRef        = reference();
@@ -2360,6 +2442,18 @@
     DBUG_VOID_RETURN;
   }
   
+  if (c_startup.m_restart_server_node_id && 
+      refToNode(senderRef) != c_startup.m_restart_server_node_id)
+  {
+    /**
+     * only allow "restart_server" Suma's to come through 
+     * for restart purposes
+     */
+    jam();
+    sendSubStopRef(signal, 1405);
+    DBUG_VOID_RETURN;
+  }
+
   if (subPtr.p->m_state == Subscription::LOCKED) {
     jam();
     DBUG_PRINT("error", ("locked"));
@@ -3668,7 +3762,17 @@
     sendSubRemoveRef(signal, req, 1413);
     DBUG_VOID_RETURN;
   }
-  
+  if (subPtr.p->m_state == Subscription::DROPPED)
+  {
+    /**
+     * already dropped
+     */
+    jam();
+    sendSubRemoveRef(signal, req, 1419);
+    DBUG_VOID_RETURN;
+  }
+
+  ndbrequire(subPtr.p->m_state == Subscription::DEFINED);
   DBUG_PRINT("info",("n_subscribers: %u", subPtr.p->n_subscribers));
 
   if (subPtr.p->n_subscribers == 0)
@@ -3981,8 +4085,9 @@
   case SubCreateReq::TableEvent:
     jam();
     req->tableId = subPtr.p->m_tableId;
+    req->state = subPtr.p->m_state;
     suma.sendSignal(sumaRef, GSN_SUB_CREATE_REQ, signal,
-		    SubCreateReq::SignalLength, JBB);
+		    SubCreateReq::SignalLength2, JBB);
     DBUG_VOID_RETURN;
   case SubCreateReq::SingleTableScan:
     jam();

--- 1.18/storage/ndb/test/ndbapi/test_event.cpp	2006-02-06 21:30:34 +01:00
+++ 1.19/storage/ndb/test/ndbapi/test_event.cpp	2006-03-27 18:53:16 +02:00
@@ -101,6 +101,40 @@
   return NDBT_OK;
 }
  
+static
+NdbEventOperation *createEventOperation(Ndb *ndb,
+                                        const NdbDictionary::Table &tab,
+                                        int do_report_error = 1)
+{
+  char buf[1024];
+  sprintf(buf, "%s_EVENT", tab.getName());
+  NdbEventOperation *pOp= ndb->createEventOperation(buf);
+  if (pOp == 0)
+  {
+    if (do_report_error)
+      g_err << "createEventOperation: "
+            << ndb->getNdbError().code << " "
+            << ndb->getNdbError().message << endl;
+    return 0;
+  }
+  int n_columns= tab.getNoOfColumns();
+  for (int j = 0; j < n_columns; j++)
+  {
+    pOp->getValue(tab.getColumn(j)->getName());
+    pOp->getPreValue(tab.getColumn(j)->getName());
+  }
+  if ( pOp->execute() )
+  {
+    if (do_report_error)
+      g_err << "pOp->execute(): "
+            << pOp->getNdbError().code << " "
+            << pOp->getNdbError().message << endl;
+    ndb->dropEventOperation(pOp);
+    return 0;
+  }
+  return pOp;
+}
+
 static int runCreateEvent(NDBT_Context* ctx, NDBT_Step* step)
 {
   if (createEvent(GETNDB(step),* ctx->getTab()) != 0){
@@ -870,7 +904,7 @@
 
 static int dropAllEvents(NDBT_Context* ctx, NDBT_Step* step)
 {
-  DBUG_ENTER("createAllEvents");
+  DBUG_ENTER("dropAllEvents");
   Ndb * ndb= GETNDB(step);
   int i;
 
@@ -1212,6 +1246,18 @@
   DBUG_RETURN(NDBT_OK);
 }
 
+static int createAllEventOperations(NDBT_Context* ctx, NDBT_Step* step)
+{
+  DBUG_ENTER("createAllEventOperations");
+  Ndb * ndb= GETNDB(step);
+  int r= createEventOperations(ndb);
+  if (r != NDBT_OK)
+  {
+    DBUG_RETURN(NDBT_FAILED);
+  }
+  DBUG_RETURN(NDBT_OK);
+}
+
 static int dropEventOperations(Ndb * ndb)
 {
   DBUG_ENTER("dropEventOperations");
@@ -1228,6 +1274,18 @@
   DBUG_RETURN(NDBT_OK);
 }
 
+static int dropAllEventOperations(NDBT_Context* ctx, NDBT_Step* step)
+{
+  DBUG_ENTER("dropAllEventOperations");
+  Ndb * ndb= GETNDB(step);
+  int r= dropEventOperations(ndb);
+  if (r != NDBT_OK)
+  {
+    DBUG_RETURN(NDBT_FAILED);
+  }
+  DBUG_RETURN(NDBT_OK);
+}
+
 static int runMulti(NDBT_Context* ctx, NDBT_Step* step)
 {
   DBUG_ENTER("runMulti");
@@ -1409,6 +1467,87 @@
   DBUG_RETURN(NDBT_OK);
 }
 
+static int restartAllNodes()
+{
+  NdbRestarter restarter;
+  int id = 0;
+  do {
+    int nodeId = restarter.getDbNodeId(id++);
+    ndbout << "Restart node " << nodeId << endl; 
+    if(restarter.restartOneDbNode(nodeId, false, false, true) != 0){
+      g_err << "Failed to restartNextDbNode" << endl;
+      break;
+    }    
+    if(restarter.waitClusterStarted(60) != 0){
+      g_err << "Cluster failed to start" << endl;
+      break;
+    }
+    id = id % restarter.getNumDbNodes();
+  } while (id);
+  return id != 0;
+}
+
+static int runCreateDropNR(NDBT_Context* ctx, NDBT_Step* step)
+{
+  DBUG_ENTER("runCreateDropNR");
+  Ndb * ndb= GETNDB(step);
+  int result = NDBT_OK;
+  NdbRestarter restarter;
+  int loops = ctx->getNumLoops();
+
+  if (restarter.getNumDbNodes() < 2)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+  do
+  {
+    result = NDBT_FAILED;
+    const NdbDictionary::Table* pTab = ctx->getTab();
+    if (createEvent(ndb, *pTab))
+    {
+      g_err << "createEvent failed" << endl;
+      break;
+    }
+    NdbEventOperation *pOp= createEventOperation(ndb, *pTab);
+    if (pOp == 0)
+    {
+      g_err << "Failed to createEventOperation" << endl;
+      break;
+    }
+    if (dropEvent(ndb, *pTab))
+    {
+      g_err << "Failed to dropEvent()" << endl;
+      break;
+    }
+    ndbout << "Restarting with dropped events with subscribers" << endl;
+    if (restartAllNodes())
+      break;
+    if (ndb->getDictionary()->dropTable(pTab->getName()) != 0){
+      g_err << "Failed to drop " << pTab->getName() <<" in db" << endl;
+      break;
+    }
+    ndbout << "Restarting with dropped events and dropped "
+           << "table with subscribers" << endl;
+    if (restartAllNodes())
+      break;
+    if (ndb->dropEventOperation(pOp))
+    {
+      g_err << "Failed dropEventOperation" << endl;
+      break;
+    }
+    NdbDictionary::Table tmp(*pTab);
+    tmp.setNodeGroupIds(0, 0);
+    if (ndb->getDictionary()->createTable(tmp) != 0){
+      g_err << "createTable failed: "
+            << ndb->getDictionary()->getNdbError() << endl;
+      break;
+    }
+    result = NDBT_OK;
+  } while (--loops);
+
+  DBUG_RETURN(result);
+}
 
 NDBT_TESTSUITE(test_event);
 TESTCASE("BasicEventOperation", 
@@ -1491,6 +1630,11 @@
   STEP(runMulti_NR);
   FINALIZER(dropAllShadows);
   FINALIZER(dropAllEvents);
+}
+TESTCASE("CreateDropNR", 
+	 "Verify that we can Create and Drop in any order"
+	 "NOTE! No errors are allowed!" ){
+  FINALIZER(runCreateDropNR);
 }
 NDBT_TESTSUITE_END(test_event);
 

--- 1.56/storage/ndb/src/ndbapi/ndberror.c	2006-03-11 07:02:03 +01:00
+++ 1.57/storage/ndb/src/ndbapi/ndberror.c	2006-03-27 18:53:16 +02:00
@@ -475,6 +475,8 @@
   { 1415, DMEC, SE, "Subscription not unique in subscriber manager" },
   { 1416, DMEC, IS, "Can't accept more subscriptions, out of space in pool" },
   { 1417, DMEC, SE, "Table in suscription not defined, probably dropped" },
+  { 1418, DMEC, SE, "Subscription dropped, no new subscribers allowed" },
+  { 1419, DMEC, SE, "Subscription already dropped" },
 
   { 4004, DMEC, AE, "Attribute name not found in the Table" },
   
Thread
bk commit into 5.1 tree (tomas:1.2232) BUG#18491tomas27 Mar