From: Date: August 13 2007 9:22am Subject: bk commit into 5.0 tree (jonas:1.2473) BUG#28804 List-Archive: http://lists.mysql.com/commits/32429 X-Bug: 28804 Message-Id: <20070813072247.11E5160D30@perch.ndb.mysql.com> Below is the list of changes that have just been committed into a local 5.0 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2007-08-13 09:22:42+02:00, jonas@stripped +8 -0 ndb - bug#28804 Handle out of transaction buffer in TC for INDX lookups ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-08-13 09:22:39+02:00, jonas@stripped +5 -1 Add new error codes for simulating out of transaction buffer memory ndb/src/kernel/blocks/dbtc/Dbtc.hpp@stripped, 2007-08-13 09:22:39+02:00, jonas@stripped +5 -5 Change signature to handle out of buffer ndb/src/kernel/blocks/dbtc/DbtcMain.cpp@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +122 -41 Handle otu of transaction buffers in index operations (TCINDXREQ++) ndb/src/ndbapi/NdbTransaction.cpp@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +32 -6 Give more info on 4012 ndb/src/ndbapi/ndberror.c@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +2 -0 Add new error code ndb/test/ndbapi/testIndex.cpp@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +117 -0 add tests ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +8 -0 add tests sql/ha_ndbcluster.cc@stripped, 2007-08-13 09:22:40+02:00, jonas@stripped +8 -2 Set correct status # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: perch.ndb.mysql.com # Root: /home/jonas/src/50-bug28804 --- 1.55/ndb/test/run-test/daily-basic-tests.txt 2007-08-13 09:22:46 +02:00 +++ 1.56/ndb/test/run-test/daily-basic-tests.txt 2007-08-13 09:22:46 +02:00 @@ -779,3 +779,11 @@ args: -time 60 -p 1 -proc 25 type: bench +max-time: 180 +cmd: testIndex +args: -n Bug28804 T1 T3 + +max-time: 180 +cmd: testIndex +args: -n Bug28804_ATTRINFO T1 T3 + --- 1.29/ndb/src/kernel/blocks/ERROR_codes.txt 2007-08-13 09:22:46 +02:00 +++ 1.30/ndb/src/kernel/blocks/ERROR_codes.txt 2007-08-13 09:22:46 +02:00 @@ -6,7 +6,7 @@ Next DBLQH 5043 Next DBDICT 6007 Next DBDIH 7183 -Next DBTC 8039 +Next DBTC 8052 Next CMVMI 9000 Next BACKUP 10022 Next DBUTIL 11002 @@ -295,6 +295,10 @@ ------ 8038 : Simulate API disconnect just after SCAN_TAB_REQ + +8039 : Simulate failure of TransactionBufferMemory allocation for OI lookup + +8051 : Simulate failure of allocation for saveINDXKEYINFO CMVMI --- 1.45/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2007-08-13 09:22:46 +02:00 +++ 1.46/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2007-08-13 09:22:46 +02:00 @@ -1497,12 +1497,12 @@ void clearCommitAckMarker(ApiConnectRecord * const regApiPtr, TcConnectRecord * const regTcPtr); // Trigger and index handling - bool saveINDXKEYINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len); + int saveINDXKEYINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len); bool receivedAllINDXKEYINFO(TcIndexOperation* indexOp); - bool saveINDXATTRINFO(Signal* signal, + int saveINDXATTRINFO(Signal* signal, TcIndexOperation* indexOp, const Uint32 *src, Uint32 len); --- 1.111/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2007-08-13 09:22:46 +02:00 +++ 1.112/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2007-08-13 09:22:46 +02:00 @@ -1789,9 +1789,18 @@ }//switch } +static +inline +bool +compare_transid(Uint32* val0, Uint32* val1) +{ + Uint32 tmp0 = val0[0] ^ val1[0]; + Uint32 tmp1 = val0[1] ^ val1[1]; + return (tmp0 | tmp1) == 0; +} + void Dbtc::execKEYINFO(Signal* signal) { - UintR compare_transid1, compare_transid2; jamEntry(); apiConnectptr.i = signal->theData[0]; tmaxData = 20; @@ -1801,10 +1810,8 @@ }//if ptrAss(apiConnectptr, apiConnectRecord); ttransid_ptr = 1; - compare_transid1 = apiConnectptr.p->transid[0] ^ signal->theData[1]; - compare_transid2 = apiConnectptr.p->transid[1] ^ signal->theData[2]; - compare_transid1 = compare_transid1 | compare_transid2; - if (compare_transid1 != 0) { + if (compare_transid(apiConnectptr.p->transid, signal->theData+1) == false) + { TCKEY_abort(signal, 19); return; }//if @@ -2105,7 +2112,6 @@ void Dbtc::execATTRINFO(Signal* signal) { - UintR compare_transid1, compare_transid2; UintR Tdata1 = signal->theData[0]; UintR Tlength = signal->length(); UintR TapiConnectFilesize = capiConnectFilesize; @@ -2120,17 +2126,13 @@ return; }//if - UintR Tdata2 = signal->theData[1]; - UintR Tdata3 = signal->theData[2]; ApiConnectRecord * const regApiPtr = &localApiConnectRecord[Tdata1]; - compare_transid1 = regApiPtr->transid[0] ^ Tdata2; - compare_transid2 = regApiPtr->transid[1] ^ Tdata3; apiConnectptr.p = regApiPtr; - compare_transid1 = compare_transid1 | compare_transid2; - if (compare_transid1 != 0) { + if (compare_transid(regApiPtr->transid, signal->theData+1) == false) + { DEBUG("Drop ATTRINFO, wrong transid, lenght="<theData[1]<<", "<theData[2]); TCKEY_abort(signal, 19); return; }//if @@ -5456,11 +5458,32 @@ } }//Dbtc::execTC_COMMITREQ() +/** + * TCROLLBACKREQ + * + * Format is: + * + * thedata[0] = apiconnectptr + * thedata[1] = transid[0] + * thedata[2] = transid[1] + * OPTIONAL thedata[3] = flags + * + * Flags: + * 0x1 = potentiallyBad data from API (try not to assert) + */ void Dbtc::execTCROLLBACKREQ(Signal* signal) { + bool potentiallyBad= false; UintR compare_transid1, compare_transid2; jamEntry(); + + if(unlikely((signal->getLength() >= 4) && (signal->theData[3] & 0x1))) + { + ndbout_c("Trying to roll back potentially bad txn\n"); + potentiallyBad= true; + } + apiConnectptr.i = signal->theData[0]; if (apiConnectptr.i >= capiConnectFilesize) { goto TC_ROLL_warning; @@ -5547,12 +5570,14 @@ TC_ROLL_warning: jam(); - warningHandlerLab(signal, __LINE__); + if(likely(potentiallyBad==false)) + warningHandlerLab(signal, __LINE__); return; TC_ROLL_system_error: jam(); - systemErrorLab(signal, __LINE__); + if(likely(potentiallyBad==false)) + systemErrorLab(signal, __LINE__); return; }//Dbtc::execTCROLLBACKREQ() @@ -11559,6 +11584,7 @@ // This is a newly started transaction, clean-up releaseAllSeizedIndexOperations(regApiPtr); + regApiPtr->apiConnectstate = CS_STARTED; regApiPtr->transid[0] = tcIndxReq->transId1; regApiPtr->transid[1] = tcIndxReq->transId2; }//if @@ -11599,20 +11625,29 @@ Uint32 includedIndexLength = MIN(indexLength, indexBufSize); indexOp->expectedAttrInfo = attrLength; Uint32 includedAttrLength = MIN(attrLength, attrBufSize); - if (saveINDXKEYINFO(signal, - indexOp, - dataPtr, - includedIndexLength)) { + + int ret; + if ((ret = saveINDXKEYINFO(signal, + indexOp, + dataPtr, + includedIndexLength)) == 0) + { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); return; } + else if (ret == -1) + { + jam(); + return; + } + dataPtr += includedIndexLength; if (saveINDXATTRINFO(signal, indexOp, dataPtr, - includedAttrLength)) { + includedAttrLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); @@ -11715,13 +11750,25 @@ TcIndexOperationPtr indexOpPtr; TcIndexOperation* indexOp; + if (compare_transid(regApiPtr->transid, indxKeyInfo->transId) == false) + { + TCKEY_abort(signal, 19); + return; + } + + if (regApiPtr->apiConnectstate == CS_ABORTING) + { + jam(); + return; + } + if((indexOpPtr.i = regApiPtr->accumulatingIndexOp) != RNIL) { indexOp = c_theIndexOperationPool.getPtr(indexOpPtr.i); if (saveINDXKEYINFO(signal, indexOp, src, - keyInfoLength)) { + keyInfoLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); @@ -11748,17 +11795,31 @@ TcIndexOperationPtr indexOpPtr; TcIndexOperation* indexOp; + if (compare_transid(regApiPtr->transid, indxAttrInfo->transId) == false) + { + TCKEY_abort(signal, 19); + return; + } + + if (regApiPtr->apiConnectstate == CS_ABORTING) + { + jam(); + return; + } + if((indexOpPtr.i = regApiPtr->accumulatingIndexOp) != RNIL) { indexOp = c_theIndexOperationPool.getPtr(indexOpPtr.i); if (saveINDXATTRINFO(signal, indexOp, src, - attrInfoLength)) { + attrInfoLength) == 0) { jam(); // We have received all we need readIndexTable(signal, regApiPtr, indexOp); + return; } + return; } } @@ -11766,12 +11827,13 @@ * Save signal INDXKEYINFO * Return true if we have received all needed data */ -bool Dbtc::saveINDXKEYINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len) +int +Dbtc::saveINDXKEYINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len) { - if (!indexOp->keyInfo.append(src, len)) { + if (ERROR_INSERTED(8039) || !indexOp->keyInfo.append(src, len)) { jam(); // Failed to seize keyInfo, abort transaction #ifdef VM_TRACE @@ -11781,15 +11843,17 @@ apiConnectptr.i = indexOp->connectionIndex; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); releaseIndexOperation(apiConnectptr.p, indexOp); - terrorCode = 4000; + terrorCode = 289; + if(TcKeyReq::getExecuteFlag(indexOp->tcIndxReq.requestInfo)) + apiConnectptr.p->m_exec_flag= 1; abortErrorLab(signal); - return false; + return -1; } if (receivedAllINDXKEYINFO(indexOp) && receivedAllINDXATTRINFO(indexOp)) { jam(); - return true; + return 0; } - return false; + return 1; } bool Dbtc::receivedAllINDXKEYINFO(TcIndexOperation* indexOp) @@ -11801,12 +11865,13 @@ * Save signal INDXATTRINFO * Return true if we have received all needed data */ -bool Dbtc::saveINDXATTRINFO(Signal* signal, - TcIndexOperation* indexOp, - const Uint32 *src, - Uint32 len) +int +Dbtc::saveINDXATTRINFO(Signal* signal, + TcIndexOperation* indexOp, + const Uint32 *src, + Uint32 len) { - if (!indexOp->attrInfo.append(src, len)) { + if (ERROR_INSERTED(8051) || !indexOp->attrInfo.append(src, len)) { jam(); #ifdef VM_TRACE ndbout_c("Dbtc::saveINDXATTRINFO: Failed to seize attrInfo\n"); @@ -11814,15 +11879,17 @@ apiConnectptr.i = indexOp->connectionIndex; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); releaseIndexOperation(apiConnectptr.p, indexOp); - terrorCode = 4000; + terrorCode = 289; + if(TcKeyReq::getExecuteFlag(indexOp->tcIndxReq.requestInfo)) + apiConnectptr.p->m_exec_flag= 1; abortErrorLab(signal); - return false; + return -1; } if (receivedAllINDXKEYINFO(indexOp) && receivedAllINDXATTRINFO(indexOp)) { jam(); - return true; + return 0; } - return false; + return 1; } bool Dbtc::receivedAllINDXATTRINFO(TcIndexOperation* indexOp) @@ -12006,6 +12073,9 @@ tcIndxRef->transId[0] = tcKeyRef->transId[0]; tcIndxRef->transId[1] = tcKeyRef->transId[1]; tcIndxRef->errorCode = tcKeyRef->errorCode; + + releaseIndexOperation(regApiPtr, indexOp); + sendSignal(regApiPtr->ndbapiBlockref, GSN_TCINDXREF, signal, TcKeyRef::SignalLength, JBB); return; @@ -12538,7 +12608,18 @@ bool Dbtc::seizeIndexOperation(ApiConnectRecord* regApiPtr, TcIndexOperationPtr& indexOpPtr) { - return regApiPtr->theSeizedIndexOperations.seize(indexOpPtr); + if (regApiPtr->theSeizedIndexOperations.seize(indexOpPtr)) + { + ndbassert(indexOpPtr.p->expectedKeyInfo == 0); + ndbassert(indexOpPtr.p->keyInfo.getSize() == 0); + ndbassert(indexOpPtr.p->expectedAttrInfo == 0); + ndbassert(indexOpPtr.p->attrInfo.getSize() == 0); + ndbassert(indexOpPtr.p->expectedTransIdAI == 0); + ndbassert(indexOpPtr.p->transIdAI.getSize() == 0); + return true; + } + + return false; } void Dbtc::releaseIndexOperation(ApiConnectRecord* regApiPtr, --- 1.59/ndb/src/ndbapi/NdbTransaction.cpp 2007-08-13 09:22:46 +02:00 +++ 1.60/ndb/src/ndbapi/NdbTransaction.cpp 2007-08-13 09:22:46 +02:00 @@ -481,12 +481,27 @@ while (1) { int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend); if (noOfComp == 0) { - /** - * This timeout situation can occur if NDB crashes. + /* + * Just for fun, this is only one of two places where + * we could hit this error... It's quite possible we + * hit it in Ndbif.cpp in Ndb::check_send_timeout() + * + * We behave rather similarly in both places. + * Hitting this is certainly a bug though... */ - ndbout << "This timeout should never occur, execute(..)" << endl; - theError.code = 4012; - setOperationErrorCodeAbort(4012); // Error code for "Cluster Failure" + g_eventLogger.error("WARNING: Timeout in executeNoBlobs() waiting for " + "response from NDB data nodes. This should NEVER " + "occur. You have likely hit a NDB Bug. Please " + "file a bug."); + DBUG_PRINT("error",("This timeout should never occure, execute()")); + g_eventLogger.error("Forcibly trying to rollback txn (%p" + ") to try to clean up data node resources.", + this); + executeNoBlobs(NdbTransaction::Rollback); + theError.code = 4012; + theError.status= NdbError::PermanentError; + theError.classification= NdbError::TimeoutExpired; + setOperationErrorCodeAbort(4012); // ndbd timeout DBUG_RETURN(-1); }//if @@ -550,7 +565,12 @@ */ if (theError.code != 0) DBUG_PRINT("enter", ("Resetting error %d on execute", theError.code)); - theError.code = 0; + /** + * for timeout (4012) we want sendROLLBACK to behave differently. + * Else, normal behaviour of reset errcode + */ + if (theError.code != 4012) + theError.code = 0; NdbScanOperation* tcOp = m_theFirstScanOperation; if (tcOp != 0){ // Execute any cursor operations @@ -873,6 +893,12 @@ tSignal.setData(theTCConPtr, 1); tSignal.setData(tTransId1, 2); tSignal.setData(tTransId2, 3); + if(theError.code == 4012) + { + g_eventLogger.error("Sending TCROLLBACKREQ with Bad flag"); + tSignal.setLength(tSignal.getLength() + 1); // + flags + tSignal.setData(0x1, 4); // potentially bad data + } tReturnCode = tp->sendSignal(&tSignal,theDBnode); if (tReturnCode != -1) { theSendStatus = sendTC_ROLLBACK; --- 1.19/ndb/test/ndbapi/testIndex.cpp 2007-08-13 09:22:46 +02:00 +++ 1.20/ndb/test/ndbapi/testIndex.cpp 2007-08-13 09:22:46 +02:00 @@ -1297,6 +1297,102 @@ return res; } +int tcSaveINDX_test(NDBT_Context* ctx, NDBT_Step* step, int inject_err) +{ + int result= NDBT_OK; + Ndb* pNdb = GETNDB(step); + NdbDictionary::Dictionary * dict = pNdb->getDictionary(); + const NdbDictionary::Index * idx = dict->getIndex(pkIdxName, *ctx->getTab()); + + HugoOperations ops(*ctx->getTab(), idx); + + g_err << "Using INDEX: " << pkIdxName << endl; + + NdbRestarter restarter; + + int loops = ctx->getNumLoops(); + const int rows = ctx->getNumRecords(); + const int batchsize = ctx->getProperty("BatchSize", 1); + + for(int bs=1; bs < loops; bs++) + { + int c= 0; + while (c++ < loops) + { + g_err << "BS " << bs << " LOOP #" << c << endl; + + g_err << "inserting error on op#" << c << endl; + + CHECK(ops.startTransaction(pNdb) == 0); + for(int i=1;i<=c;i++) + { + if(i==c) + { + if(restarter.insertErrorInAllNodes(inject_err)!=0) + { + g_err << "**** FAILED to insert error" << endl; + result= NDBT_FAILED; + break; + } + } + CHECK(ops.indexReadRecords(pNdb, pkIdxName, i,false,1) == 0); + if(i%bs==0 || i==c) + { + if(istatus= STATUS_NOT_FOUND; - DBUG_RETURN(ndb_err(trans)); + int err= ndb_err(trans); + if(err==HA_ERR_KEY_NOT_FOUND) + table->status= STATUS_NOT_FOUND; + else + table->status= STATUS_GARBAGE; + + DBUG_RETURN(err); } + // The value have now been fetched from NDB unpack_record(buf); table->status= 0; --- 1.56/ndb/src/ndbapi/ndberror.c 2007-08-13 09:22:46 +02:00 +++ 1.57/ndb/src/ndbapi/ndberror.c 2007-08-13 09:22:46 +02:00 @@ -173,6 +173,8 @@ { 4022, TR, "Out of Send Buffer space in NDB API" }, { 4032, TR, "Out of Send Buffer space in NDB API" }, { 288, TR, "Out of index operations in transaction coordinator (increase MaxNoOfConcurrentIndexOperations)" }, + { 289, TR, "Out of transaction buffer memory in TC (increase TransactionBufferMemory)" }, + /** * InsufficientSpace */