Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-02-06 11:28:43+01:00, jonas@stripped +6 -0
ndb - bug#34216
testcases
storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +7 -2
new error codes
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +6 -0
new error codes
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +4 -1
new error codes
storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +1 -1
remove assert
storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +253 -9
new testcase
1) -n Bug34216
Which tests node diying during multip-op commit
Very controlled
2) -n mixedmultiop
Runs several threads "load" of same scenario...not very controlled
storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2008-02-06 11:28:41+01:00,
jonas@stripped +8 -0
new testcases
diff -Nrup a/storage/ndb/src/kernel/blocks/ERROR_codes.txt
b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-01-23 14:42:54 +01:00
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-02-06 11:28:41 +01:00
@@ -3,10 +3,10 @@ Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4029
-Next DBLQH 5047
+Next DBLQH 5050
Next DBDICT 6008
Next DBDIH 7195
-Next DBTC 8054
+Next DBTC 8058
Next CMVMI 9000
Next BACKUP 10038
Next DBUTIL 11002
@@ -263,6 +263,9 @@ Delay execution of ABORTCONF signal 2 se
8053: Crash in timeOutFoundLab, state CS_WAIT_COMMIT_CONF
+5048: Crash in execCOMMIT
+5049: SET_ERROR_INSERT_VALUE(5048)
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
@@ -318,6 +321,8 @@ ABORT OF TCKEYREQ
------
8038 : Simulate API disconnect just after SCAN_TAB_REQ
+
+8057 : Send only 1 COMMIT per timeslice
8052 : Simulate failure of TransactionBufferMemory allocation for OI lookup
diff -Nrup a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2007-12-03 14:31:14 +01:00
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-02-06 11:28:41 +01:00
@@ -5959,6 +5959,12 @@ void Dblqh::execCOMMIT(Signal* signal)
TcConnectionrec * const regTcPtr = tcConnectptr.p;
TRACE_OP(regTcPtr, "COMMIT");
+
+ CRASH_INSERTION(5048);
+ if (ERROR_INSERTED(5049))
+ {
+ SET_ERROR_INSERT_VALUE(5048);
+ }
commitReqLab(signal, gci);
return;
diff -Nrup a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2007-12-13 21:32:40 +01:00
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-02-06 11:28:41 +01:00
@@ -4495,7 +4495,7 @@ void Dbtc::commit020Lab(Signal* signal)
if (localTcConnectptr.i != RNIL) {
Tcount = Tcount + 1;
- if (Tcount < 16) {
+ if (Tcount < 16 && !ERROR_INSERTED(8057)) {
ptrCheckGuard(localTcConnectptr,
TtcConnectFilesize, localTcConnectRecord);
jam();
@@ -4514,6 +4514,9 @@ void Dbtc::commit020Lab(Signal* signal)
}//if
} else {
jam();
+ if (ERROR_INSERTED(8057))
+ CLEAR_ERROR_INSERT_VALUE;
+
regApiPtr->apiConnectstate = CS_COMMIT_SENT;
return;
}//if
diff -Nrup a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp
b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2008-01-24 15:31:40 +01:00
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2008-02-06 11:28:41 +01:00
@@ -486,7 +486,7 @@ void Dbtup::execTUP_COMMITREQ(Signal* si
*/
fix_commit_order(regOperPtr);
}
- ndbassert(regOperPtr.p->is_first_operation());
+ //ndbassert(regOperPtr.p->is_first_operation());
regFragPtr.i= regOperPtr.p->fragmentPtr;
trans_state= get_trans_state(regOperPtr.p);
diff -Nrup a/storage/ndb/test/ndbapi/testNodeRestart.cpp
b/storage/ndb/test/ndbapi/testNodeRestart.cpp
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2007-12-03 14:31:14 +01:00
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-02-06 11:28:41 +01:00
@@ -23,6 +23,7 @@
#include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
#include <RefConvert.hpp>
+#include <NdbEnv.h>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -121,15 +122,57 @@ int runPkReadUntilStopped(NDBT_Context*
int runPkUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int records = ctx->getNumRecords();
+ int multiop = ctx->getProperty("MULTI_OP", 1);
+ Ndb* pNdb = GETNDB(step);
int i = 0;
- HugoTransactions hugoTrans(*ctx->getTab());
- while (ctx->isTestStopped() == false) {
+
+ HugoOperations hugoOps(*ctx->getTab());
+ while (ctx->isTestStopped() == false)
+ {
g_info << i << ": ";
- int rows = (rand()%records)+1;
- int batch = (rand()%rows)+1;
- if (hugoTrans.pkUpdateRecords(GETNDB(step), rows, batch) != 0){
- return NDBT_FAILED;
+ int batch = (rand()%records)+1;
+ int row = rand() % records;
+
+ if (batch > 25)
+ batch = 25;
+
+ if(row + batch > records)
+ batch = records - row;
+
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
+ goto err;
+
+ for (int j = 1; j<multiop; j++)
+ {
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
+ goto err;
+ }
+
+ if(hugoOps.execute_Commit(pNdb) != 0)
+ goto err;
+
+ hugoOps.closeTransaction(pNdb);
+
+ continue;
+
+err:
+ NdbConnection* pCon = hugoOps.getTransaction();
+ if(pCon == 0)
+ continue;
+ NdbError error = pCon->getNdbError();
+ hugoOps.closeTransaction(pNdb);
+ if (error.status == NdbError::TemporaryError){
+ NdbSleep_MilliSleep(50);
+ continue;
}
+ return NDBT_FAILED;
+
i++;
}
return result;
@@ -230,7 +273,7 @@ int runRestarter(NDBT_Context* ctx, NDBT
return NDBT_OK;
}
- if(restarter.waitClusterStarted(60) != 0){
+ if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
return NDBT_FAILED;
}
@@ -241,13 +284,27 @@ int runRestarter(NDBT_Context* ctx, NDBT
int id = lastId % restarter.getNumDbNodes();
int nodeId = restarter.getDbNodeId(id);
ndbout << "Restart node " << nodeId << endl;
- if(restarter.restartOneDbNode(nodeId, false, false, true) != 0){
+ if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){
g_err << "Failed to restartNextDbNode" << endl;
result = NDBT_FAILED;
break;
}
- if(restarter.waitClusterStarted(60) != 0){
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ {
+ g_err << "Failed to waitNodesNoStart" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if (restarter.startNodes(&nodeId, 1))
+ {
+ g_err << "Failed to start node" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
result = NDBT_FAILED;
break;
@@ -1883,6 +1940,178 @@ runBug32160(NDBT_Context* ctx, NDBT_Step
return NDBT_OK;
}
+int
+runBug34216(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ NdbRestarter restarter;
+ int i = 0;
+ int lastId = 0;
+ HugoOperations hugoOps(*ctx->getTab());
+ int records = ctx->getNumRecords();
+ Ndb* pNdb = GETNDB(step);
+
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ if(restarter.waitClusterStarted() != 0){
+ g_err << "Cluster failed to start" << endl;
+ return NDBT_FAILED;
+ }
+
+ char buf[100];
+ const char * off = NdbEnv_GetEnv("NDB_ERR_OFFSET", buf, sizeof(buf));
+ int offset = off ? atoi(off) : 0;
+
+ while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped())
+ {
+ int id = lastId % restarter.getNumDbNodes();
+ int nodeId = restarter.getDbNodeId(id);
+ int err = 5048 + ((i+offset) % 2);
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ nodeId = hugoOps.getTransaction()->getConnectedNodeId();
+ ndbout << "Restart node " << nodeId << " " << err
<<endl;
+
+ if (restarter.dumpStateOneNode(nodeId, val2, 2))
+ return NDBT_FAILED;
+
+ if(restarter.insertErrorInNode(nodeId, err) != 0){
+ g_err << "Failed to restartNextDbNode" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if (restarter.insertErrorInNode(nodeId, 8057) != 0)
+ {
+ g_err << "Failed to insert error 8057" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ int rows = 1;
+ int batch = 1;
+ int row = (records - rows) ? rand() % (records - rows) : 0;
+
+ if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
+ goto err;
+
+ for (int l = 1; l<5; l++)
+ {
+ if (hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
+ goto err;
+ }
+
+ hugoOps.execute_Commit(pNdb);
+ hugoOps.closeTransaction(pNdb);
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ {
+ g_err << "Failed to waitNodesNoStart" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if (restarter.startNodes(&nodeId, 1))
+ {
+ g_err << "Failed to startNodes" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if(restarter.waitClusterStarted() != 0){
+ g_err << "Cluster failed to start" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ lastId++;
+ i++;
+ }
+
+ ctx->stopTest();
+
+ return result;
+err:
+ return NDBT_FAILED;
+}
+
+
+int
+runNF_commit(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ NdbRestarter restarter;
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ if(restarter.waitClusterStarted() != 0){
+ g_err << "Cluster failed to start" << endl;
+ return NDBT_FAILED;
+ }
+
+ int i = 0;
+ while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped())
+ {
+ int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
+ int err = 5048;
+
+ ndbout << "Restart node " << nodeId << " " << err
<<endl;
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ if (restarter.dumpStateOneNode(nodeId, val2, 2))
+ return NDBT_FAILED;
+
+ if(restarter.insertErrorInNode(nodeId, err) != 0){
+ g_err << "Failed to restartNextDbNode" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ {
+ g_err << "Failed to waitNodesNoStart" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if (restarter.startNodes(&nodeId, 1))
+ {
+ g_err << "Failed to startNodes" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ if(restarter.waitClusterStarted() != 0){
+ g_err << "Cluster failed to start" << endl;
+ result = NDBT_FAILED;
+ break;
+ }
+
+ i++;
+ }
+
+ ctx->stopTest();
+
+ return result;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -2254,6 +2483,21 @@ TESTCASE("Bug29364", ""){
}
TESTCASE("Bug32160", ""){
INITIALIZER(runBug32160);
+}
+TESTCASE("Bug34216", ""){
+ INITIALIZER(runCheckAllNodesStarted);
+ INITIALIZER(runLoadTable);
+ STEP(runBug34216);
+ FINALIZER(runClearTable);
+}
+TESTCASE("mixedmultiop", ""){
+ TC_PROPERTY("MULTI_OP", 5);
+ INITIALIZER(runCheckAllNodesStarted);
+ INITIALIZER(runLoadTable);
+ STEP(runNF_commit);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runPkUpdateUntilStopped);
+ FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
diff -Nrup a/storage/ndb/test/run-test/daily-basic-tests.txt
b/storage/ndb/test/run-test/daily-basic-tests.txt
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2008-01-31 23:49:22 +01:00
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2008-02-06 11:28:41 +01:00
@@ -1050,3 +1050,11 @@ max-time: 300
cmd: test_event
args: -n Bug33793 T1
+max-time: 600
+cmd: testNodeRestart
+args: --nologging -n Bug34216 -l 10 T1 I3 D2
+
+max-time: 1200
+cmd: testNodeRestart
+args: -n mixedmultiop -l 10 T1 I2 I3 D2
+
| Thread |
|---|
| • bk commit into 5.1 tree (jonas:1.2679) BUG#34216 | jonas | 6 Feb 2008 |