From: Date: December 8 2005 3:28pm Subject: bk commit into 4.1 tree (jonas:1.2456) BUG#15587 List-Archive: http://lists.mysql.com/internals/33170 X-Bug: 15587 Message-Id: <20051208142819.6713A21C85B@perch.ndb.mysql.com> Below is the list of changes that have just been committed into a local 4.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2456 05/12/08 15:28:15 jonas@stripped +7 -0 bug#15587 - ndb Fix error in NF during NR ndb/test/run-test/daily-basic-tests.txt 1.22 05/12/08 15:28:13 jonas@stripped +4 -0 Add testcase for bug#15587 ndb/test/ndbapi/testNodeRestart.cpp 1.11 05/12/08 15:28:13 jonas@stripped +45 -0 Add testcase for bug#15587 ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 1.57 05/12/08 15:28:13 jonas@stripped +7 -2 Add dump for ERROR 5002 with specified table ndb/src/kernel/blocks/dblqh/Dblqh.hpp 1.30 05/12/08 15:28:13 jonas@stripped +1 -0 Add dump for ERROR 5002 with specified table ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 1.26 05/12/08 15:28:13 jonas@stripped +12 -2 Run updateNodeInfo if failed node is not in list of storedReplicas ndb/src/kernel/blocks/ERROR_codes.txt 1.10 05/12/08 15:28:13 jonas@stripped +3 -0 Add dump for ERROR 5002 with specified table ndb/include/kernel/signaldata/DumpStateOrd.hpp 1.5 05/12/08 15:28:13 jonas@stripped +2 -0 Add dump for ERROR 5002 with specified table # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: perch.ndb.mysql.com # Root: /home/jonas/src/mysql-4.1 --- 1.21/ndb/test/run-test/daily-basic-tests.txt 2005-07-20 12:39:42 +02:00 +++ 1.22/ndb/test/run-test/daily-basic-tests.txt 2005-12-08 15:28:13 +01:00 @@ -434,6 +434,10 @@ cmd: testScan args: -l 100 -n Scan-bug8262 T7 +max-time: 500 +cmd: testNodeRestart +args: -n BugBug15587 T1 + # OLD FLEX max-time: 500 cmd: flexBench --- 1.4/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2005-10-21 06:41:46 +02:00 +++ 1.5/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2005-12-08 15:28:13 +01:00 @@ -78,6 +78,8 @@ LqhDumpAllScanRec = 2301, LqhDumpAllActiveScanRec = 2302, LqhDumpLcpState = 2303, + LqhErrorInsert5042 = 2315, + AccDumpOneScanRec = 2400, AccDumpAllScanRec = 2401, AccDumpAllActiveScanRec = 2402, --- 1.9/ndb/src/kernel/blocks/ERROR_codes.txt 2005-09-15 14:47:14 +02:00 +++ 1.10/ndb/src/kernel/blocks/ERROR_codes.txt 2005-12-08 15:28:13 +01:00 @@ -155,6 +155,9 @@ 5006: Insert node failure handling when receiving ABORTREQ. +5042: +As 5002, but with specified table (see DumpStateOrd) + These error code can be combined with error codes for testing time-out handling in DBTC to ensure that node failures are also well handled in time-out handling. They can also be used to test multiple node failure --- 1.25/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2005-09-02 11:47:09 +02:00 +++ 1.26/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2005-12-08 15:28:13 +01:00 @@ -5187,15 +5187,16 @@ /** * For each of replica record */ - Uint32 replicaNo = 0; + bool found = false; ReplicaRecordPtr replicaPtr; for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL; - replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) { + replicaPtr.i = replicaPtr.p->nextReplica) { jam(); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); if(replicaPtr.p->procNode == nodeId){ jam(); + found = true; noOfRemovedReplicas++; removeNodeFromStored(nodeId, fragPtr, replicaPtr); if(replicaPtr.p->lcpOngoingFlag){ @@ -5210,6 +5211,15 @@ replicaPtr.p->lcpOngoingFlag = false; } } + } + if (!found) + { + jam(); + /** + * Run updateNodeInfo to remove any dead nodes from list of activeNodes + * see bug#15587 + */ + updateNodeInfo(fragPtr); } noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas; } --- 1.29/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2005-09-08 18:24:34 +02:00 +++ 1.30/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2005-12-08 15:28:13 +01:00 @@ -2881,6 +2881,7 @@ UintR ctransidHash[1024]; Uint32 c_diskless; + Uint32 c_error_insert_table_id; public: /** --- 1.56/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2005-09-08 18:24:34 +02:00 +++ 1.57/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2005-12-08 15:28:13 +01:00 @@ -3532,6 +3532,7 @@ jam(); regTcPtr->activeCreat = ZTRUE; CRASH_INSERTION(5002); + CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id); } else { regTcPtr->activeCreat = ZFALSE; }//if @@ -18402,8 +18403,12 @@ return; } - - + if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2) + { + c_error_insert_table_id = dumpState->args[1]; + SET_ERROR_INSERT_VALUE(5042); + } + }//Dblqh::execDUMP_STATE_ORD() void Dblqh::execSET_VAR_REQ(Signal* signal) --- 1.10/ndb/test/ndbapi/testNodeRestart.cpp 2005-05-05 11:08:57 +02:00 +++ 1.11/ndb/test/ndbapi/testNodeRestart.cpp 2005-12-08 15:28:13 +01:00 @@ -21,6 +21,7 @@ #include #include #include +#include int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -409,6 +410,43 @@ return NDBT_OK; } +int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + + Uint32 tableId = ctx->getTab()->getTableId(); + int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 }; + dump[1] = tableId; + + int nodeId = restarter.getDbNodeId(1); + + ndbout << "Restart node " << nodeId << endl; + + if (restarter.restartOneDbNode(nodeId, + /** initial */ false, + /** nostart */ true, + /** abort */ true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.dumpStateOneNode(nodeId, dump, 2)) + return NDBT_FAILED; + + if (restarter.startNodes(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.waitNodesStarted(&nodeId, 1)) + return NDBT_FAILED; + + ctx->stopTest(); + return NDBT_OK; +} + + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -669,6 +707,13 @@ "Test commit after node failure"){ INITIALIZER(runLoadTable); STEP(runLateCommit); + FINALIZER(runClearTable); +} +TESTCASE("Bug15587", + "Test bug with NF during NR"){ + INITIALIZER(runLoadTable); + STEP(runScanUpdateUntilStopped); + STEP(runBug15587); FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testNodeRestart);