#At file:///home/jonas/src/telco-7.0/ based on revid:jonas@stripped
3880 Jonas Oreland 2010-10-20 [merge]
ndb - merge 63 to 70
modified:
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2010-10-20 07:12:58 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2010-10-20 09:49:55 +0000
@@ -14769,6 +14769,54 @@ bool Dbdih::findStartGci(ConstPtr<Replic
return false;
}//Dbdih::findStartGci()
+static
+Uint32
+count_db_nodes(ndb_mgm_configuration_iterator * iter)
+{
+ Uint32 cnt = 0;
+ for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
+ {
+ Uint32 nodeId = 0;
+ Uint32 type = ~Uint32(0);
+ if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0 &&
+ ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION, &type) == 0 &&
+ type == NodeInfo::DB)
+ {
+ cnt++;
+ }
+ }
+ return cnt;
+}
+
+/**
+ * Compute max time it can take to "resolve" cascading node-failures
+ * given hb-interval, arbit timeout and #db-nodes
+ */
+static
+Uint32
+compute_max_failure_time(const ndb_mgm_configuration_iterator * p,
+ ndb_mgm_configuration_iterator * cluster)
+{
+ Uint32 dbnodes = count_db_nodes(cluster);
+
+ Uint32 hbDBDB = 1500;
+ Uint32 arbitTimeout = 1000;
+ ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
+ ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
+
+ /*
+ * Max time for 1 node failure is
+ */
+ Uint32 max_time_one_failure = arbitTimeout + 4 * hbDBDB;
+
+ /**
+ * And worst case...this can be cascading failure with all but self
+ */
+ Uint32 max_time_total_failure = (dbnodes - 1) * max_time_one_failure;
+
+ return max_time_total_failure;
+}
+
void Dbdih::initCommonData()
{
c_blockCommit = false;
@@ -14841,19 +14889,25 @@ void Dbdih::initCommonData()
"Only up to four replicas are supported. Check NoOfReplicas.");
}
+ Uint32 max_failure_time = compute_max_failure_time
+ (p, m_ctx.m_config.getClusterConfigIterator());
+
bzero(&m_gcp_save, sizeof(m_gcp_save));
bzero(&m_micro_gcp, sizeof(m_micro_gcp));
{
- Uint32 tmp = 2000;
- ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
- tmp = tmp > 60000 ? 60000 : (tmp < 10 ? 10 : tmp);
- m_gcp_save.m_master.m_time_between_gcp = tmp;
-
+ { // Set time-between global checkpoint
+ Uint32 tmp = 2000;
+ ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
+ tmp = tmp > 60000 ? 60000 : (tmp < 10 ? 10 : tmp);
+ m_gcp_save.m_master.m_time_between_gcp = tmp;
+ }
+
+ Uint32 tmp = 0;
if (ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_INTERVAL, &tmp) == 0 &&
tmp)
{
/**
- * A value is set for micro gcp...run new protocol when applicable
+ * Set time-between epochs
*/
if (tmp > m_gcp_save.m_master.m_time_between_gcp)
tmp = m_gcp_save.m_master.m_time_between_gcp;
@@ -14862,24 +14916,19 @@ void Dbdih::initCommonData()
m_micro_gcp.m_master.m_time_between_gcp = tmp;
}
- /**
- * No config...hard code...
- */
- m_gcp_monitor.m_gcp_save.m_max_lag =
- (m_gcp_save.m_master.m_time_between_gcp + 120000) / 100; // 2 minutes
+ { // Set time-between global checkpoint timeout
+ Uint32 tmp = 120000; // No config, hard code 2 minutes
+ tmp += max_failure_time; //
+ m_gcp_monitor.m_gcp_save.m_max_lag =
+ (m_gcp_save.m_master.m_time_between_gcp + tmp) / 100;
+ }
- {
+ { // Set time-between epochs timeout
Uint32 tmp = 4000;
- Uint32 hbDBDB = 1500;
- Uint32 arbitTimeout = 1000;
ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_TIMEOUT, &tmp);
- ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
- ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
- Uint32 max_lag = tmp + 4 * hbDBDB;
- if (tmp + arbitTimeout > max_lag)
- max_lag = tmp + arbitTimeout;
+ tmp += max_failure_time;
m_gcp_monitor.m_micro_gcp.m_max_lag =
- (m_micro_gcp.m_master.m_time_between_gcp + max_lag) / 100;
+ (m_micro_gcp.m_master.m_time_between_gcp + tmp) / 100;
}
}
}//Dbdih::initCommonData()
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2010-10-13 12:22:51 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2010-10-20 09:49:55 +0000
@@ -13173,7 +13173,7 @@ void Dblqh::execBACKUP_FRAGMENT_CONF(Sig
/**
* Update maxGciInLcp after scan has been performed
*/
-#if defined VM_TRACE || defined ERROR_INSERTED
+#if defined VM_TRACE || defined ERROR_INSERT
if (fragptr.p->newestGci != fragptr.p->maxGciInLcp)
{
ndbout_c("tab: %u frag: %u increasing maxGciInLcp from %u to %u",
Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20101020094955-fr3nxe2j2h106p12.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-7.0 branch (jonas:3880) | Jonas Oreland | 20 Oct |