From: Date: December 5 2006 3:11pm Subject: bk commit into 5.1 tree (jonas:1.2343) BUG#24664 List-Archive: http://lists.mysql.com/commits/16456 X-Bug: 24664 Message-Id: <20061205141101.2A1D75276E2@perch.ndb.mysql.com> Below is the list of changes that have just been committed into a local 5.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2006-12-05 15:10:56+01:00, jonas@stripped +7 -0 ndb - bug#24664 1) run lcp snapshot for both MM and DD tables (so I dont have to change restore to use WRITE) 2) fix >= and > bug in lcp skip/keep handling 3) very cool test prog for this :-) storage/ndb/include/kernel/signaldata/BackupContinueB.hpp@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +2 -1 Add new error insert storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +4 -1 Add new error insert storage/ndb/src/kernel/blocks/backup/Backup.cpp@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +35 -0 Add new error insert storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +16 -17 1) get >= and > correct for lcp keep/skip 2) always run lcp snapshot impl. (previously only for dd tables) storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +18 -22 1) get >= and > correct for lcp keep/skip 2) always run lcp snapshot impl. (previously only for dd tables) storage/ndb/test/ndbapi/testSystemRestart.cpp@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +66 -0 add testcase storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2006-12-05 15:10:54+01:00, jonas@stripped +4 -0 add testcase # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: perch.ndb.mysql.com # Root: /home/jonas/src/51-work --- 1.57/storage/ndb/test/run-test/daily-basic-tests.txt 2006-12-05 15:11:01 +01:00 +++ 1.58/storage/ndb/test/run-test/daily-basic-tests.txt 2006-12-05 15:11:01 +01:00 @@ -752,6 +752,10 @@ cmd: testNodeRestart args: -n Bug24543 T1 +max-time: 1500 +cmd: testSystemRestart +args: -n Bug24664 + # OLD FLEX max-time: 500 cmd: flexBench --- 1.4/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp 2006-12-05 15:11:01 +01:00 +++ 1.5/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp 2006-12-05 15:11:01 +01:00 @@ -33,7 +33,8 @@ BUFFER_FULL_FRAG_COMPLETE = 3, BUFFER_FULL_META = 4, BACKUP_FRAGMENT_INFO = 5, - RESET_DISK_SPEED_COUNTER = 6 + RESET_DISK_SPEED_COUNTER = 6, + ZDELAY_SCAN_NEXT = 7 }; }; --- 1.30/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2006-12-05 15:11:01 +01:00 +++ 1.31/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2006-12-05 15:11:01 +01:00 @@ -8,7 +8,7 @@ Next DBDIH 7178 Next DBTC 8039 Next CMVMI 9000 -Next BACKUP 10036 +Next BACKUP 10038 Next DBUTIL 11002 Next DBTUX 12008 Next SUMA 13001 @@ -424,6 +424,9 @@ 10033: backup checkscan 10034: define backup reply error 10035: Fail to allocate buffers + +10036: Halt backup for table >= 2 +10037: Resume backup (from 10036) 11001: Send UTIL_SEQUENCE_REF (in master) --- 1.56/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2006-12-05 15:11:01 +01:00 +++ 1.57/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2006-12-05 15:11:01 +01:00 @@ -356,6 +356,25 @@ GetTabInfoReq::SignalLength, JBB); return; } + case BackupContinueB::ZDELAY_SCAN_NEXT: + if (ERROR_INSERTED(10036)) + { + jam(); + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 300, + signal->getLength()); + return; + } + else + { + jam(); + CLEAR_ERROR_INSERT_VALUE; + ndbout_c("Resuming backup"); + memmove(signal->theData, signal->theData + 1, + 4*ScanFragNextReq::SignalLength); + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); + return ; + } default: ndbrequire(0); }//switch @@ -3920,6 +3939,22 @@ req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->batch_size_rows= 16; req->batch_size_bytes= 0; + + if (ERROR_INSERTED(10036) && + filePtr.p->tableId >= 2 && + filePtr.p->operation.noOfRecords > 0) + { + ndbout_c("halting backup for table %d fragment: %d after %d records", + filePtr.p->tableId, + filePtr.p->fragmentNo, + filePtr.p->operation.noOfRecords); + memmove(signal->theData+1, signal->theData, + 4*ScanFragNextReq::SignalLength); + signal->theData[0] = BackupContinueB::ZDELAY_SCAN_NEXT; + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, + 300, 1+ScanFragNextReq::SignalLength); + return; + } if(ERROR_INSERTED(10032)) sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, 100, ScanFragNextReq::SignalLength); --- 1.16/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2006-12-05 15:11:01 +01:00 +++ 1.17/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp 2006-12-05 15:11:01 +01:00 @@ -152,10 +152,10 @@ static inline bool -operator>=(const Local_key& key1, const Local_key& key2) +operator>(const Local_key& key1, const Local_key& key2) { return key1.m_page_no > key2.m_page_no || - (key1.m_page_no == key2.m_page_no && key1.m_page_idx >= key2.m_page_idx); + (key1.m_page_no == key2.m_page_no && key1.m_page_idx > key2.m_page_idx); } void @@ -187,7 +187,7 @@ Local_key rowid = regOperPtr->m_tuple_location; Local_key scanpos = scanOp.p->m_scanPos.m_key; rowid.m_page_no = page->frag_page_id; - if (rowid >= scanpos) + if (rowid > scanpos) { extra_bits = Tuple_header::LCP_KEEP; // Note REMOVE FREE ptr->m_operation_ptr_i = lcp_keep_list; @@ -215,6 +215,7 @@ { ndbassert(regOperPtr->op_struct.op_type != ZDELETE); + Uint32 lcpScan_ptr_i= regFragPtr->m_lcp_scan_op; Uint32 save= tuple_ptr->m_operation_ptr_i; Uint32 bits= tuple_ptr->m_header_bits; @@ -264,7 +265,6 @@ Local_key key; memcpy(&key, copy->get_disk_ref_ptr(regTabPtr), sizeof(Local_key)); Uint32 logfile_group_id= regFragPtr->m_logfile_group_id; - Uint32 lcpScan_ptr_i= regFragPtr->m_lcp_scan_op; PagePtr diskPagePtr = *(PagePtr*)&m_pgman.m_ptr; ndbassert(diskPagePtr.p->m_page_no == key.m_page_no); @@ -273,19 +273,6 @@ if(copy_bits & Tuple_header::DISK_ALLOC) { disk_page_alloc(signal, regTabPtr, regFragPtr, &key, diskPagePtr, gci); - - if(lcpScan_ptr_i != RNIL) - { - ScanOpPtr scanOp; - c_scanOpPool.getPtr(scanOp, lcpScan_ptr_i); - Local_key rowid = regOperPtr->m_tuple_location; - Local_key scanpos = scanOp.p->m_scanPos.m_key; - rowid.m_page_no = pagePtr.p->frag_page_id; - if(rowid >= scanpos) - { - copy_bits |= Tuple_header::LCP_SKIP; - } - } } if(regTabPtr->m_attributes[DD].m_no_of_varsize == 0) @@ -312,6 +299,18 @@ copy_bits |= Tuple_header::DISK_PART; } + if(lcpScan_ptr_i != RNIL) + { + ScanOpPtr scanOp; + c_scanOpPool.getPtr(scanOp, lcpScan_ptr_i); + Local_key rowid = regOperPtr->m_tuple_location; + Local_key scanpos = scanOp.p->m_scanPos.m_key; + rowid.m_page_no = pagePtr.p->frag_page_id; + if(rowid > scanpos) + { + copy_bits |= Tuple_header::LCP_SKIP; + } + } Uint32 clear= Tuple_header::ALLOC | Tuple_header::FREE | --- 1.11/storage/ndb/test/ndbapi/testSystemRestart.cpp 2006-12-05 15:11:01 +01:00 +++ 1.12/storage/ndb/test/ndbapi/testSystemRestart.cpp 2006-12-05 15:11:01 +01:00 @@ -1162,6 +1162,64 @@ return result; } +int +runBug24664(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + NdbRestarter restarter; + Ndb* pNdb = GETNDB(step); + const Uint32 nodeCount = restarter.getNumDbNodes(); + + int records = ctx->getNumRecords(); + UtilTransactions utilTrans(*ctx->getTab()); + HugoTransactions hugoTrans(*ctx->getTab()); + + int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP }; + int dump[] = { DumpStateOrd::DihStartLcpImmediately }; + + int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; + NdbLogEventHandle handle = + ndb_mgm_create_logevent_handle(restarter.handle, filter); + + struct ndb_logevent event; + + do { + CHECK(restarter.dumpStateAllNodes(args, 1) == 0); + CHECK(restarter.dumpStateAllNodes(dump, 1) == 0); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointStarted); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointCompleted); + + if (hugoTrans.loadTable(GETNDB(step), records) != 0){ + return NDBT_FAILED; + } + + restarter.insertErrorInAllNodes(10036); // Hang LCP + CHECK(restarter.dumpStateAllNodes(dump, 1) == 0); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointStarted); + NdbSleep_SecSleep(3); + CHECK(utilTrans.clearTable(pNdb, records) == 0); + if (hugoTrans.loadTable(GETNDB(step), records) != 0){ + return NDBT_FAILED; + } + + restarter.insertErrorInAllNodes(10037); // Resume LCP + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointCompleted); + + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_GlobalCheckpointCompleted); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_GlobalCheckpointCompleted); + restarter.restartAll(false, false, true); + CHECK(restarter.waitClusterStarted() == 0); + } while(false); + + return result; +} + NDBT_TESTSUITE(testSystemRestart); TESTCASE("SR1", "Basic system restart test. Focus on testing restart from REDO log.\n" @@ -1332,6 +1390,14 @@ INITIALIZER(runWaitStarted); INITIALIZER(runClearTable); STEP(runBug21536); + FINALIZER(runClearTable); +} +TESTCASE("Bug24664", + "Check handling of LCP skip/keep") +{ + INITIALIZER(runWaitStarted); + INITIALIZER(runClearTable); + STEP(runBug24664); FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testSystemRestart); --- 1.16/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp 2006-12-05 15:11:01 +01:00 +++ 1.17/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp 2006-12-05 15:11:01 +01:00 @@ -54,8 +54,7 @@ // flags Uint32 bits = 0; - if (!AccScanReq::getLcpScanFlag(req->requestInfo) || - tablePtr.p->m_no_of_disk_attributes == 0) + if (!AccScanReq::getLcpScanFlag(req->requestInfo)) { // seize from pool and link to per-fragment list LocalDLList list(c_scanOpPool, frag.m_scanList); @@ -1052,24 +1051,21 @@ tablePtr.i = req->tableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - if(tablePtr.p->m_no_of_disk_attributes) - { - jam(); - FragrecordPtr fragPtr; - Uint32 fragId = req->fragmentId; - fragPtr.i = RNIL; - getFragmentrec(fragPtr, fragId, tablePtr.p); - ndbrequire(fragPtr.i != RNIL); - Fragrecord& frag = *fragPtr.p; - - ndbrequire(frag.m_lcp_scan_op == RNIL && c_lcp_scan_op != RNIL); - frag.m_lcp_scan_op = c_lcp_scan_op; - ScanOpPtr scanPtr; - c_scanOpPool.getPtr(scanPtr, frag.m_lcp_scan_op); - ndbrequire(scanPtr.p->m_fragPtrI == RNIL); - scanPtr.p->m_fragPtrI = fragPtr.i; - - scanFirst(signal, scanPtr); - scanPtr.p->m_state = ScanOp::First; - } + jam(); + FragrecordPtr fragPtr; + Uint32 fragId = req->fragmentId; + fragPtr.i = RNIL; + getFragmentrec(fragPtr, fragId, tablePtr.p); + ndbrequire(fragPtr.i != RNIL); + Fragrecord& frag = *fragPtr.p; + + ndbrequire(frag.m_lcp_scan_op == RNIL && c_lcp_scan_op != RNIL); + frag.m_lcp_scan_op = c_lcp_scan_op; + ScanOpPtr scanPtr; + c_scanOpPool.getPtr(scanPtr, frag.m_lcp_scan_op); + ndbrequire(scanPtr.p->m_fragPtrI == RNIL); + scanPtr.p->m_fragPtrI = fragPtr.i; + + scanFirst(signal, scanPtr); + scanPtr.p->m_state = ScanOp::First; }