Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-02-05 19:47:19+01:00, jonas@eel.(none) +5 -0
ndb - bug#25801
- improve error message if starting wo/ enough REDO
- decrease likelyhood of trying to start too early
storage/ndb/include/mgmapi/ndbd_exit_codes.h@stripped, 2007-02-05 19:47:15+01:00,
jonas@eel.(none) +1 -0
Add new error code (that maybe should have been there a looong time)
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-02-05 19:47:16+01:00,
jonas@eel.(none) +53 -6
Add new check (during SR) for that sufficient REDO is present
before continuing SR
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-02-05 19:47:16+01:00,
jonas@eel.(none) +1 -0
Add list of GCI's of nodes so that we can check for sufficient REDO during a SR
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-02-05 19:47:16+01:00,
jonas@eel.(none) +83 -22
Add check for REDO during SR
so that
1) cluster is not trying to start too soon
2) a better error message (than internal error) is provided if not enough REDO is
present
storage/ndb/src/kernel/error/ndbd_exit_codes.c@stripped, 2007-02-05 19:47:16+01:00,
jonas@eel.(none) +1 -0
Add new error code (that maybe should have been there a looong time)
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: eel.(none)
# Root: /home/jonas/src/51-work
--- 1.101/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-05 19:47:28 +01:00
+++ 1.102/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-02-05 19:47:28 +01:00
@@ -1194,11 +1194,58 @@
void Dbdih::execDIH_RESTARTREQ(Signal* signal)
{
jamEntry();
- cntrlblockref = signal->theData[0];
- if(m_ctx.m_config.getInitialStart()){
- sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
- } else {
- readGciFileLab(signal);
+ if (signal->theData[0])
+ {
+ jam();
+ cntrlblockref = signal->theData[0];
+ if(m_ctx.m_config.getInitialStart()){
+ sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
+ } else {
+ readGciFileLab(signal);
+ }
+ }
+ else
+ {
+ /**
+ * Precondition, (not checked)
+ * atleast 1 node in each node group
+ */
+ Uint32 i;
+ NdbNodeBitmask mask;
+ mask.assign(NdbNodeBitmask::Size, signal->theData + 1);
+ Uint32 *node_gcis = signal->theData+1+NdbNodeBitmask::Size;
+ Uint32 node_group_gcis[MAX_NDB_NODES+1];
+ bzero(node_group_gcis, sizeof(node_group_gcis));
+ for (i = 0; i<MAX_NDB_NODES; i++)
+ {
+ if (mask.get(i))
+ {
+ jam();
+ Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
+ ndbrequire(ng < MAX_NDB_NODES);
+ Uint32 gci = node_gcis[i];
+ if (gci > node_group_gcis[ng])
+ {
+ jam();
+ node_group_gcis[ng] = gci;
+ }
+ }
+ }
+ for (i = 0; i<MAX_NDB_NODES && node_group_gcis[i] == 0; i++);
+
+ Uint32 gci = node_group_gcis[i];
+ for (i++ ; i<MAX_NDB_NODES; i++)
+ {
+ jam();
+ if (node_group_gcis[i] && node_group_gcis[i] != gci)
+ {
+ jam();
+ signal->theData[0] = i;
+ return;
+ }
+ }
+ signal->theData[0] = MAX_NDB_NODES;
+ return;
}
return;
}//Dbdih::execDIH_RESTARTREQ()
@@ -12391,7 +12438,7 @@
(buf, sizeof(buf),
"Illegal initial start, no alive node in nodegroup %u", i);
progError(__LINE__,
- NDBD_EXIT_SR_RESTARTCONFLICT,
+ NDBD_EXIT_INSUFFICENT_NODES,
buf);
}
--- 1.18/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-05 19:47:28 +01:00
+++ 1.19/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-05 19:47:28 +01:00
@@ -128,6 +128,7 @@
Uint32 m_president_candidate_gci;
Uint16 m_regReqReqSent;
Uint16 m_regReqReqRecv;
+ Uint32 m_node_gci[MAX_NDB_NODES];
} c_start;
NdbNodeBitmask c_definedNodes; // DB nodes in config
--- 1.49/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-02-05 19:47:28 +01:00
+++ 1.50/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-02-05 19:47:28 +01:00
@@ -1093,7 +1093,8 @@
jam();
c_start.m_starting_nodes_w_log.set(TaddNodeno);
}
-
+ c_start.m_node_gci[TaddNodeno] = node_gci;
+
skip_nodes.bitAND(c_definedNodes);
c_start.m_skip_nodes.bitOR(skip_nodes);
@@ -1242,6 +1243,7 @@
wait.bitANDC(tmp);
Uint32 retVal = 0;
+ Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
NdbNodeBitmask report_mask;
if ((c_start.m_latest_gci == 0) ||
@@ -1327,7 +1329,7 @@
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
- goto start_report;
+ goto check_log;
case CheckNodeGroups::Partitioning:
ndbrequire(result != CheckNodeGroups::Lose);
signal->theData[1] =
@@ -1335,7 +1337,7 @@
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
- goto start_report;
+ goto check_log;
}
}
@@ -1359,12 +1361,7 @@
case CheckNodeGroups::Partitioning:
if (now < partitioned_timeout && result != CheckNodeGroups::Win)
{
- signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
- signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
- report_mask.assign(c_definedNodes);
- report_mask.bitANDC(c_start.m_starting_nodes);
- retVal = 0;
- goto start_report;
+ goto missinglog;
}
// Fall through...
case CheckNodeGroups::Win:
@@ -1372,12 +1369,61 @@
all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
- retVal = 1;
- goto start_report;
+ retVal = 2;
+ goto check_log;
}
}
ndbrequire(false);
+check_log:
+ jam();
+ {
+ Uint32 save[4+4*NdbNodeBitmask::Size];
+ memcpy(save, signal->theData, sizeof(save));
+
+ signal->theData[0] = 0;
+ c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
+ memcpy(signal->theData+1+NdbNodeBitmask::Size, c_start.m_node_gci,
+ 4*MAX_NDB_NODES);
+ EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
+ 1+NdbNodeBitmask::Size+MAX_NDB_NODES);
+
+ incompleteng = signal->theData[0];
+ memcpy(signal->theData, save, sizeof(save));
+
+ if (incompleteng != MAX_NDB_NODES)
+ {
+ jam();
+ if (retVal == 1)
+ {
+ jam();
+ goto incomplete_log;
+ }
+ else if (retVal == 2)
+ {
+ if (now <= partitioned_timeout)
+ {
+ jam();
+ goto missinglog;
+ }
+ else
+ {
+ goto incomplete_log;
+ }
+ }
+ ndbrequire(false);
+ }
+ }
+ goto start_report;
+
+missinglog:
+ signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
+ signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 0;
+ goto start_report;
+
start_report:
jam();
{
@@ -1396,17 +1442,32 @@
missing_nodegroup:
jam();
- char buf[100], mask1[100], mask2[100];
- c_start.m_starting_nodes.getText(mask1);
- tmp.assign(c_start.m_starting_nodes);
- tmp.bitANDC(c_start.m_starting_nodes_w_log);
- tmp.getText(mask2);
- BaseString::snprintf(buf, sizeof(buf),
- "Unable to start missing node group! "
- " starting: %s (missing fs for: %s)",
- mask1, mask2);
- progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
- return 0; // Deadcode
+ {
+ char buf[100], mask1[100], mask2[100];
+ c_start.m_starting_nodes.getText(mask1);
+ tmp.assign(c_start.m_starting_nodes);
+ tmp.bitANDC(c_start.m_starting_nodes_w_log);
+ tmp.getText(mask2);
+ BaseString::snprintf(buf, sizeof(buf),
+ "Unable to start missing node group! "
+ " starting: %s (missing fs for: %s)",
+ mask1, mask2);
+ progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
+ return 0; // Deadcode
+ }
+
+incomplete_log:
+ jam();
+ {
+ char buf[100], mask1[100];
+ c_start.m_starting_nodes.getText(mask1);
+ BaseString::snprintf(buf, sizeof(buf),
+ "Incomplete log for node group: %d! "
+ " starting nodes: %s",
+ incompleteng, mask1);
+ progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
+ return 0; // Deadcode
+ }
}
void
--- 1.14/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2007-02-05 19:47:28 +01:00
+++ 1.15/storage/ndb/include/mgmapi/ndbd_exit_codes.h 2007-02-05 19:47:28 +01:00
@@ -146,6 +146,7 @@
#define NDBD_EXIT_AFS_READ_UNDERFLOW 2816
#define NDBD_EXIT_INVALID_LCP_FILE 2352
+#define NDBD_EXIT_INSUFFICENT_NODES 2353
const char *
ndbd_exit_message(int faultId, ndbd_exit_classification *cl);
--- 1.16/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2007-02-05 19:47:28 +01:00
+++ 1.17/storage/ndb/src/kernel/error/ndbd_exit_codes.c 2007-02-05 19:47:28 +01:00
@@ -160,6 +160,7 @@
{NDBD_EXIT_AFS_READ_UNDERFLOW , XFI, "Read underflow"},
{NDBD_EXIT_INVALID_LCP_FILE, XFI, "Invalid LCP" },
+ {NDBD_EXIT_INSUFFICENT_NODES, XRE, "Insufficent nodes for system restart" },
/* Sentinel */
{0, XUE,
| Thread |
|---|
| • bk commit into 5.1 tree (jonas:1.2420) BUG#25801 | jonas | 5 Feb |