From: Date: February 6 2008 11:28am Subject: bk commit into 5.1 tree (jonas:1.2679) BUG#34216 List-Archive: http://lists.mysql.com/commits/41756 X-Bug: 34216 Message-Id: <20080206102847.7C14A90B035@perch.localdomain> Below is the list of changes that have just been committed into a local 5.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2008-02-06 11:28:43+01:00, jonas@stripped +6 -0 ndb - bug#34216 testcases storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +7 -2 new error codes storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +6 -0 new error codes storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +4 -1 new error codes storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +1 -1 remove assert storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +253 -9 new testcase 1) -n Bug34216 Which tests node diying during multip-op commit Very controlled 2) -n mixedmultiop Runs several threads "load" of same scenario...not very controlled storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2008-02-06 11:28:41+01:00, jonas@stripped +8 -0 new testcases diff -Nrup a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-01-23 14:42:54 +01:00 +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-02-06 11:28:41 +01:00 @@ -3,10 +3,10 @@ Next NDBCNTR 1002 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4029 -Next DBLQH 5047 +Next DBLQH 5050 Next DBDICT 6008 Next DBDIH 7195 -Next DBTC 8054 +Next DBTC 8058 Next CMVMI 9000 Next BACKUP 10038 Next DBUTIL 11002 @@ -263,6 +263,9 @@ Delay execution of ABORTCONF signal 2 se 8053: Crash in timeOutFoundLab, state CS_WAIT_COMMIT_CONF +5048: Crash in execCOMMIT +5049: SET_ERROR_INSERT_VALUE(5048) + ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- @@ -318,6 +321,8 @@ ABORT OF TCKEYREQ ------ 8038 : Simulate API disconnect just after SCAN_TAB_REQ + +8057 : Send only 1 COMMIT per timeslice 8052 : Simulate failure of TransactionBufferMemory allocation for OI lookup diff -Nrup a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2007-12-03 14:31:14 +01:00 +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-02-06 11:28:41 +01:00 @@ -5959,6 +5959,12 @@ void Dblqh::execCOMMIT(Signal* signal) TcConnectionrec * const regTcPtr = tcConnectptr.p; TRACE_OP(regTcPtr, "COMMIT"); + + CRASH_INSERTION(5048); + if (ERROR_INSERTED(5049)) + { + SET_ERROR_INSERT_VALUE(5048); + } commitReqLab(signal, gci); return; diff -Nrup a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2007-12-13 21:32:40 +01:00 +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-02-06 11:28:41 +01:00 @@ -4495,7 +4495,7 @@ void Dbtc::commit020Lab(Signal* signal) if (localTcConnectptr.i != RNIL) { Tcount = Tcount + 1; - if (Tcount < 16) { + if (Tcount < 16 && !ERROR_INSERTED(8057)) { ptrCheckGuard(localTcConnectptr, TtcConnectFilesize, localTcConnectRecord); jam(); @@ -4514,6 +4514,9 @@ void Dbtc::commit020Lab(Signal* signal) }//if } else { jam(); + if (ERROR_INSERTED(8057)) + CLEAR_ERROR_INSERT_VALUE; + regApiPtr->apiConnectstate = CS_COMMIT_SENT; return; }//if diff -Nrup a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2008-01-24 15:31:40 +01:00 +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2008-02-06 11:28:41 +01:00 @@ -486,7 +486,7 @@ void Dbtup::execTUP_COMMITREQ(Signal* si */ fix_commit_order(regOperPtr); } - ndbassert(regOperPtr.p->is_first_operation()); + //ndbassert(regOperPtr.p->is_first_operation()); regFragPtr.i= regOperPtr.p->fragmentPtr; trans_state= get_trans_state(regOperPtr.p); diff -Nrup a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2007-12-03 14:31:14 +01:00 +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-02-06 11:28:41 +01:00 @@ -23,6 +23,7 @@ #include #include #include +#include int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -121,15 +122,57 @@ int runPkReadUntilStopped(NDBT_Context* int runPkUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); + int multiop = ctx->getProperty("MULTI_OP", 1); + Ndb* pNdb = GETNDB(step); int i = 0; - HugoTransactions hugoTrans(*ctx->getTab()); - while (ctx->isTestStopped() == false) { + + HugoOperations hugoOps(*ctx->getTab()); + while (ctx->isTestStopped() == false) + { g_info << i << ": "; - int rows = (rand()%records)+1; - int batch = (rand()%rows)+1; - if (hugoTrans.pkUpdateRecords(GETNDB(step), rows, batch) != 0){ - return NDBT_FAILED; + int batch = (rand()%records)+1; + int row = rand() % records; + + if (batch > 25) + batch = 25; + + if(row + batch > records) + batch = records - row; + + if(hugoOps.startTransaction(pNdb) != 0) + goto err; + + if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0) + goto err; + + for (int j = 1; jgetNdbError(); + hugoOps.closeTransaction(pNdb); + if (error.status == NdbError::TemporaryError){ + NdbSleep_MilliSleep(50); + continue; } + return NDBT_FAILED; + i++; } return result; @@ -230,7 +273,7 @@ int runRestarter(NDBT_Context* ctx, NDBT return NDBT_OK; } - if(restarter.waitClusterStarted(60) != 0){ + if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; return NDBT_FAILED; } @@ -241,13 +284,27 @@ int runRestarter(NDBT_Context* ctx, NDBT int id = lastId % restarter.getNumDbNodes(); int nodeId = restarter.getDbNodeId(id); ndbout << "Restart node " << nodeId << endl; - if(restarter.restartOneDbNode(nodeId, false, false, true) != 0){ + if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){ g_err << "Failed to restartNextDbNode" << endl; result = NDBT_FAILED; break; } - if(restarter.waitClusterStarted(60) != 0){ + if (restarter.waitNodesNoStart(&nodeId, 1)) + { + g_err << "Failed to waitNodesNoStart" << endl; + result = NDBT_FAILED; + break; + } + + if (restarter.startNodes(&nodeId, 1)) + { + g_err << "Failed to start node" << endl; + result = NDBT_FAILED; + break; + } + + if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; result = NDBT_FAILED; break; @@ -1883,6 +1940,178 @@ runBug32160(NDBT_Context* ctx, NDBT_Step return NDBT_OK; } +int +runBug34216(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + NdbRestarter restarter; + int i = 0; + int lastId = 0; + HugoOperations hugoOps(*ctx->getTab()); + int records = ctx->getNumRecords(); + Ndb* pNdb = GETNDB(step); + + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + if(restarter.waitClusterStarted() != 0){ + g_err << "Cluster failed to start" << endl; + return NDBT_FAILED; + } + + char buf[100]; + const char * off = NdbEnv_GetEnv("NDB_ERR_OFFSET", buf, sizeof(buf)); + int offset = off ? atoi(off) : 0; + + while(iisTestStopped()) + { + int id = lastId % restarter.getNumDbNodes(); + int nodeId = restarter.getDbNodeId(id); + int err = 5048 + ((i+offset) % 2); + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if(hugoOps.startTransaction(pNdb) != 0) + goto err; + + nodeId = hugoOps.getTransaction()->getConnectedNodeId(); + ndbout << "Restart node " << nodeId << " " << err <stopTest(); + + return result; +err: + return NDBT_FAILED; +} + + +int +runNF_commit(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + if(restarter.waitClusterStarted() != 0){ + g_err << "Cluster failed to start" << endl; + return NDBT_FAILED; + } + + int i = 0; + while(iisTestStopped()) + { + int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); + int err = 5048; + + ndbout << "Restart node " << nodeId << " " << err <stopTest(); + + return result; +} + + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -2254,6 +2483,21 @@ TESTCASE("Bug29364", ""){ } TESTCASE("Bug32160", ""){ INITIALIZER(runBug32160); +} +TESTCASE("Bug34216", ""){ + INITIALIZER(runCheckAllNodesStarted); + INITIALIZER(runLoadTable); + STEP(runBug34216); + FINALIZER(runClearTable); +} +TESTCASE("mixedmultiop", ""){ + TC_PROPERTY("MULTI_OP", 5); + INITIALIZER(runCheckAllNodesStarted); + INITIALIZER(runLoadTable); + STEP(runNF_commit); + STEP(runPkUpdateUntilStopped); + STEP(runPkUpdateUntilStopped); + FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testNodeRestart); diff -Nrup a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt --- a/storage/ndb/test/run-test/daily-basic-tests.txt 2008-01-31 23:49:22 +01:00 +++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2008-02-06 11:28:41 +01:00 @@ -1050,3 +1050,11 @@ max-time: 300 cmd: test_event args: -n Bug33793 T1 +max-time: 600 +cmd: testNodeRestart +args: --nologging -n Bug34216 -l 10 T1 I3 D2 + +max-time: 1200 +cmd: testNodeRestart +args: -n mixedmultiop -l 10 T1 I2 I3 D2 +