From: Date: February 21 2007 8:46am Subject: bk commit into 5.1 tree (tomas:1.2425) BUG#25801 List-Archive: http://lists.mysql.com/commits/20248 X-Bug: 25801 Message-Id: <20070221074632.BB95D45E6DA@poseidon.mysql.com> Below is the list of changes that have just been committed into a local 5.1 repository of tomas. When tomas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2425 07/02/21 14:45:58 tomas@stripped +5 -0 ndb - bug#25801 - improve error message if starting wo/ enough REDO - decrease likelyhood of trying to start too early storage/ndb/src/kernel/error/ndbd_exit_codes.c 1.17 07/02/21 14:45:43 tomas@stripped +1 -0 Add new error code (that maybe should have been there a looong time) storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 1.53 07/02/21 14:45:43 tomas@stripped +83 -22 Add new check (during SR) for that sufficient REDO is present before continuing SR storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 1.20 07/02/21 14:45:43 tomas@stripped +1 -0 Add list of GCI's of nodes so that we can check for sufficient REDO during a SR storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 1.104 07/02/21 14:45:43 tomas@stripped +53 -6 Add check for REDO during SR so that 1) cluster is not trying to start too soon 2) a better error message (than internal error) is provided if not enough REDO is present storage/ndb/include/mgmapi/ndbd_exit_codes.h 1.15 07/02/21 14:45:43 tomas@stripped +1 -0 Add new error code (that maybe should have been there a looong time) # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: tomas # Host: poseidon.mysql.com # Root: /home/tomas/mysql-5.1-telco-6.1_2 --- 1.103/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-20 03:29:16 +07:00 +++ 1.104/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-21 14:45:43 +07:00 @@ -1200,11 +1200,58 @@ void Dbdih::execTAB_COMMITREQ(Signal* si void Dbdih::execDIH_RESTARTREQ(Signal* signal) { jamEntry(); - cntrlblockref = signal->theData[0]; - if(m_ctx.m_config.getInitialStart()){ - sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB); - } else { - readGciFileLab(signal); + if (signal->theData[0]) + { + jam(); + cntrlblockref = signal->theData[0]; + if(m_ctx.m_config.getInitialStart()){ + sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB); + } else { + readGciFileLab(signal); + } + } + else + { + /** + * Precondition, (not checked) + * atleast 1 node in each node group + */ + Uint32 i; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, signal->theData + 1); + Uint32 *node_gcis = signal->theData+1+NdbNodeBitmask::Size; + Uint32 node_group_gcis[MAX_NDB_NODES+1]; + bzero(node_group_gcis, sizeof(node_group_gcis)); + for (i = 0; inodeGroups); + ndbrequire(ng < MAX_NDB_NODES); + Uint32 gci = node_gcis[i]; + if (gci > node_group_gcis[ng]) + { + jam(); + node_group_gcis[ng] = gci; + } + } + } + for (i = 0; itheData[0] = i; + return; + } + } + signal->theData[0] = MAX_NDB_NODES; + return; } return; }//Dbdih::execDIH_RESTARTREQ() @@ -12424,7 +12471,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeAr (buf, sizeof(buf), "Illegal initial start, no alive node in nodegroup %u", i); progError(__LINE__, - NDBD_EXIT_SR_RESTARTCONFLICT, + NDBD_EXIT_INSUFFICENT_NODES, buf); } --- 1.19/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-01-24 12:20:36 +07:00 +++ 1.20/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-21 14:45:43 +07:00 @@ -128,6 +128,7 @@ public: Uint32 m_president_candidate_gci; Uint16 m_regReqReqSent; Uint16 m_regReqReqRecv; + Uint32 m_node_gci[MAX_NDB_NODES]; } c_start; NdbNodeBitmask c_definedNodes; // DB nodes in config --- 1.52/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-01-29 03:57:31 +07:00 +++ 1.53/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-02-21 14:45:43 +07:00 @@ -1107,7 +1107,8 @@ void Qmgr::execCM_REGREF(Signal* signal) jam(); c_start.m_starting_nodes_w_log.set(TaddNodeno); } - + c_start.m_node_gci[TaddNodeno] = node_gci; + skip_nodes.bitAND(c_definedNodes); c_start.m_skip_nodes.bitOR(skip_nodes); @@ -1256,6 +1257,7 @@ Qmgr::check_startup(Signal* signal) wait.bitANDC(tmp); Uint32 retVal = 0; + Uint32 incompleteng = MAX_NDB_NODES; // Illegal value NdbNodeBitmask report_mask; if ((c_start.m_latest_gci == 0) || @@ -1341,7 +1343,7 @@ Qmgr::check_startup(Signal* signal) report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); retVal = 1; - goto start_report; + goto check_log; case CheckNodeGroups::Partitioning: ndbrequire(result != CheckNodeGroups::Lose); signal->theData[1] = @@ -1349,7 +1351,7 @@ Qmgr::check_startup(Signal* signal) report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); retVal = 1; - goto start_report; + goto check_log; } } @@ -1373,12 +1375,7 @@ Qmgr::check_startup(Signal* signal) case CheckNodeGroups::Partitioning: if (now < partitioned_timeout && result != CheckNodeGroups::Win) { - signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5; - signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 0; - goto start_report; + goto missinglog; } // Fall through... case CheckNodeGroups::Win: @@ -1386,12 +1383,61 @@ Qmgr::check_startup(Signal* signal) all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; + retVal = 2; + goto check_log; } } ndbrequire(false); +check_log: + jam(); + { + Uint32 save[4+4*NdbNodeBitmask::Size]; + memcpy(save, signal->theData, sizeof(save)); + + signal->theData[0] = 0; + c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1); + memcpy(signal->theData+1+NdbNodeBitmask::Size, c_start.m_node_gci, + 4*MAX_NDB_NODES); + EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal, + 1+NdbNodeBitmask::Size+MAX_NDB_NODES); + + incompleteng = signal->theData[0]; + memcpy(signal->theData, save, sizeof(save)); + + if (incompleteng != MAX_NDB_NODES) + { + jam(); + if (retVal == 1) + { + jam(); + goto incomplete_log; + } + else if (retVal == 2) + { + if (now <= partitioned_timeout) + { + jam(); + goto missinglog; + } + else + { + goto incomplete_log; + } + } + ndbrequire(false); + } + } + goto start_report; + +missinglog: + signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + start_report: jam(); { @@ -1410,17 +1456,32 @@ start_report: missing_nodegroup: jam(); - char buf[100], mask1[100], mask2[100]; - c_start.m_starting_nodes.getText(mask1); - tmp.assign(c_start.m_starting_nodes); - tmp.bitANDC(c_start.m_starting_nodes_w_log); - tmp.getText(mask2); - BaseString::snprintf(buf, sizeof(buf), - "Unable to start missing node group! " - " starting: %s (missing fs for: %s)", - mask1, mask2); - progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); - return 0; // Deadcode + { + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf); + return 0; // Deadcode + } + +incomplete_log: + jam(); + { + char buf[100], mask1[100]; + c_start.m_starting_nodes.getText(mask1); + BaseString::snprintf(buf, sizeof(buf), + "Incomplete log for node group: %d! " + " starting nodes: %s", + incompleteng, mask1); + progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf); + return 0; // Deadcode + } } void --- 1.14/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2006-12-24 02:20:07 +07:00 +++ 1.15/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2007-02-21 14:45:43 +07:00 @@ -146,6 +146,7 @@ typedef ndbd_exit_classification_enum nd #define NDBD_EXIT_AFS_READ_UNDERFLOW 2816 #define NDBD_EXIT_INVALID_LCP_FILE 2352 +#define NDBD_EXIT_INSUFFICENT_NODES 2353 const char * ndbd_exit_message(int faultId, ndbd_exit_classification *cl); --- 1.16/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2006-12-24 02:20:19 +07:00 +++ 1.17/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2007-02-21 14:45:43 +07:00 @@ -160,6 +160,7 @@ static const ErrStruct errArray[] = {NDBD_EXIT_AFS_READ_UNDERFLOW , XFI, "Read underflow"}, {NDBD_EXIT_INVALID_LCP_FILE, XFI, "Invalid LCP" }, + {NDBD_EXIT_INSUFFICENT_NODES, XRE, "Insufficent nodes for system restart" }, /* Sentinel */ {0, XUE,