From: Date: February 5 2007 7:47pm Subject: bk commit into 5.1 tree (jonas:1.2420) BUG#25801 List-Archive: http://lists.mysql.com/commits/19334 X-Bug: 25801 Message-Id: <20070205184728.8175141D412@eel.mysql.com> Below is the list of changes that have just been committed into a local 5.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2007-02-05 19:47:19+01:00, jonas@eel.(none) +5 -0 ndb - bug#25801 - improve error message if starting wo/ enough REDO - decrease likelyhood of trying to start too early storage/ndb/include/mgmapi/ndbd_exit_codes.h@stripped, 2007-02-05 19:47:15+01:00, jonas@eel.(none) +1 -0 Add new error code (that maybe should have been there a looong time) storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-02-05 19:47:16+01:00, jonas@eel.(none) +53 -6 Add new check (during SR) for that sufficient REDO is present before continuing SR storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-02-05 19:47:16+01:00, jonas@eel.(none) +1 -0 Add list of GCI's of nodes so that we can check for sufficient REDO during a SR storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-02-05 19:47:16+01:00, jonas@eel.(none) +83 -22 Add check for REDO during SR so that 1) cluster is not trying to start too soon 2) a better error message (than internal error) is provided if not enough REDO is present storage/ndb/src/kernel/error/ndbd_exit_codes.c@stripped, 2007-02-05 19:47:16+01:00, jonas@eel.(none) +1 -0 Add new error code (that maybe should have been there a looong time) # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: eel.(none) # Root: /home/jonas/src/51-work --- 1.101/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-05 19:47:28 +01:00 +++ 1.102/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-05 19:47:28 +01:00 @@ -1194,11 +1194,58 @@ void Dbdih::execDIH_RESTARTREQ(Signal* signal) { jamEntry(); - cntrlblockref = signal->theData[0]; - if(m_ctx.m_config.getInitialStart()){ - sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB); - } else { - readGciFileLab(signal); + if (signal->theData[0]) + { + jam(); + cntrlblockref = signal->theData[0]; + if(m_ctx.m_config.getInitialStart()){ + sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB); + } else { + readGciFileLab(signal); + } + } + else + { + /** + * Precondition, (not checked) + * atleast 1 node in each node group + */ + Uint32 i; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, signal->theData + 1); + Uint32 *node_gcis = signal->theData+1+NdbNodeBitmask::Size; + Uint32 node_group_gcis[MAX_NDB_NODES+1]; + bzero(node_group_gcis, sizeof(node_group_gcis)); + for (i = 0; inodeGroups); + ndbrequire(ng < MAX_NDB_NODES); + Uint32 gci = node_gcis[i]; + if (gci > node_group_gcis[ng]) + { + jam(); + node_group_gcis[ng] = gci; + } + } + } + for (i = 0; itheData[0] = i; + return; + } + } + signal->theData[0] = MAX_NDB_NODES; + return; } return; }//Dbdih::execDIH_RESTARTREQ() @@ -12391,7 +12438,7 @@ (buf, sizeof(buf), "Illegal initial start, no alive node in nodegroup %u", i); progError(__LINE__, - NDBD_EXIT_SR_RESTARTCONFLICT, + NDBD_EXIT_INSUFFICENT_NODES, buf); } --- 1.18/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-05 19:47:28 +01:00 +++ 1.19/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-05 19:47:28 +01:00 @@ -128,6 +128,7 @@ Uint32 m_president_candidate_gci; Uint16 m_regReqReqSent; Uint16 m_regReqReqRecv; + Uint32 m_node_gci[MAX_NDB_NODES]; } c_start; NdbNodeBitmask c_definedNodes; // DB nodes in config --- 1.49/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-02-05 19:47:28 +01:00 +++ 1.50/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-02-05 19:47:28 +01:00 @@ -1093,7 +1093,8 @@ jam(); c_start.m_starting_nodes_w_log.set(TaddNodeno); } - + c_start.m_node_gci[TaddNodeno] = node_gci; + skip_nodes.bitAND(c_definedNodes); c_start.m_skip_nodes.bitOR(skip_nodes); @@ -1242,6 +1243,7 @@ wait.bitANDC(tmp); Uint32 retVal = 0; + Uint32 incompleteng = MAX_NDB_NODES; // Illegal value NdbNodeBitmask report_mask; if ((c_start.m_latest_gci == 0) || @@ -1327,7 +1329,7 @@ report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); retVal = 1; - goto start_report; + goto check_log; case CheckNodeGroups::Partitioning: ndbrequire(result != CheckNodeGroups::Lose); signal->theData[1] = @@ -1335,7 +1337,7 @@ report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); retVal = 1; - goto start_report; + goto check_log; } } @@ -1359,12 +1361,7 @@ case CheckNodeGroups::Partitioning: if (now < partitioned_timeout && result != CheckNodeGroups::Win) { - signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5; - signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 0; - goto start_report; + goto missinglog; } // Fall through... case CheckNodeGroups::Win: @@ -1372,12 +1369,61 @@ all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; + retVal = 2; + goto check_log; } } ndbrequire(false); +check_log: + jam(); + { + Uint32 save[4+4*NdbNodeBitmask::Size]; + memcpy(save, signal->theData, sizeof(save)); + + signal->theData[0] = 0; + c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1); + memcpy(signal->theData+1+NdbNodeBitmask::Size, c_start.m_node_gci, + 4*MAX_NDB_NODES); + EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal, + 1+NdbNodeBitmask::Size+MAX_NDB_NODES); + + incompleteng = signal->theData[0]; + memcpy(signal->theData, save, sizeof(save)); + + if (incompleteng != MAX_NDB_NODES) + { + jam(); + if (retVal == 1) + { + jam(); + goto incomplete_log; + } + else if (retVal == 2) + { + if (now <= partitioned_timeout) + { + jam(); + goto missinglog; + } + else + { + goto incomplete_log; + } + } + ndbrequire(false); + } + } + goto start_report; + +missinglog: + signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + start_report: jam(); { @@ -1396,17 +1442,32 @@ missing_nodegroup: jam(); - char buf[100], mask1[100], mask2[100]; - c_start.m_starting_nodes.getText(mask1); - tmp.assign(c_start.m_starting_nodes); - tmp.bitANDC(c_start.m_starting_nodes_w_log); - tmp.getText(mask2); - BaseString::snprintf(buf, sizeof(buf), - "Unable to start missing node group! " - " starting: %s (missing fs for: %s)", - mask1, mask2); - progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); - return 0; // Deadcode + { + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf); + return 0; // Deadcode + } + +incomplete_log: + jam(); + { + char buf[100], mask1[100]; + c_start.m_starting_nodes.getText(mask1); + BaseString::snprintf(buf, sizeof(buf), + "Incomplete log for node group: %d! " + " starting nodes: %s", + incompleteng, mask1); + progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf); + return 0; // Deadcode + } } void --- 1.14/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2007-02-05 19:47:28 +01:00 +++ 1.15/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2007-02-05 19:47:28 +01:00 @@ -146,6 +146,7 @@ #define NDBD_EXIT_AFS_READ_UNDERFLOW 2816 #define NDBD_EXIT_INVALID_LCP_FILE 2352 +#define NDBD_EXIT_INSUFFICENT_NODES 2353 const char * ndbd_exit_message(int faultId, ndbd_exit_classification *cl); --- 1.16/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2007-02-05 19:47:28 +01:00 +++ 1.17/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2007-02-05 19:47:28 +01:00 @@ -160,6 +160,7 @@ {NDBD_EXIT_AFS_READ_UNDERFLOW , XFI, "Read underflow"}, {NDBD_EXIT_INVALID_LCP_FILE, XFI, "Invalid LCP" }, + {NDBD_EXIT_INSUFFICENT_NODES, XRE, "Insufficent nodes for system restart" }, /* Sentinel */ {0, XUE,