#At file:///home/jonas/src/telco-6.2/
2622 jonas@stripped 2008-06-11
ndb - bug#37338
Fix weird case with node failure and api failure
cause subsequent node restart to fail
modified:
storage/ndb/src/kernel/blocks/suma/Suma.cpp
storage/ndb/test/ndbapi/test_event.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
per-file comments:
storage/ndb/src/kernel/blocks/suma/Suma.cpp
1) put gci in buffer if resend is ongoing
2) fix correct interval for resend
(currently incorrect if no active subscriber)
storage/ndb/test/ndbapi/test_event.cpp
testcase
storage/ndb/test/run-test/daily-basic-tests.txt
testcase
=== modified file 'storage/ndb/src/kernel/blocks/suma/Suma.cpp'
--- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2008-06-11 08:23:36 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2008-06-11 19:49:15 +0000
@@ -3781,12 +3781,13 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* s
/**
* Add GCP COMPLETE REP to buffer
*/
+ bool subscribers = !c_subscriber_nodes.isclear();
for(Uint32 i = 0; i<c_no_of_buckets; i++)
{
if(m_active_buckets.get(i))
continue;
- if (!c_subscriber_nodes.isclear())
+ if (subscribers || (c_buckets[i].m_state & Bucket::BUCKET_RESEND))
{
//Uint32* dst;
get_buffer_ptr(signal, i, gci, 0);
@@ -5020,7 +5021,7 @@ Suma::start_resend(Signal* signal, Uint3
}
Uint64 min= bucket->m_max_acked_gci + 1;
- Uint64 max = pos.m_max_gci;
+ Uint64 max = m_max_seen_gci;
ndbrequire(max <= m_max_seen_gci);
@@ -5029,7 +5030,9 @@ Suma::start_resend(Signal* signal, Uint3
ndbrequire(pos.m_page_id == bucket->m_buffer_tail);
m_active_buckets.set(buck);
m_gcp_complete_rep_count ++;
- ndbout_c("empty bucket -> active");
+ ndbout_c("empty bucket (%u/%u %u/%u) -> active",
+ Uint32(min >> 32), Uint32(min),
+ Uint32(max >> 32), Uint32(max));
return;
}
=== modified file 'storage/ndb/test/ndbapi/test_event.cpp'
--- a/storage/ndb/test/ndbapi/test_event.cpp 2008-06-11 08:23:36 +0000
+++ b/storage/ndb/test/ndbapi/test_event.cpp 2008-06-11 19:49:15 +0000
@@ -2579,6 +2579,72 @@ runBug37279(NDBT_Context* ctx, NDBT_Step
return NDBT_OK;
}
+int
+runBug37338(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter res;
+ if (res.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ int nodeId = res.getDbNodeId(rand() % res.getNumDbNodes());
+
+ Ndb* pNdb = GETNDB(step);
+ NdbDictionary::Dictionary* dict = pNdb->getDictionary();
+ const NdbDictionary::Table* tab = dict->getTable(ctx->getTab()->getName());
+
+ const char * name = "BugXXX";
+ NdbDictionary::Table copy = * tab;
+ copy.setName(name);
+ dict->dropTable(name);
+
+ for (int i = 0; i<ctx->getNumLoops(); i++)
+ {
+ Ndb* ndb0;
+ Ndb_cluster_connection *con0;
+ NdbEventOperation* pOp0;
+ NdbDictionary::Dictionary * dict0;
+
+ cc(&con0, &ndb0);
+ dict0 = ndb0->getDictionary();
+ if (dict0->createTable(copy) != 0)
+ {
+ ndbout << dict0->getNdbError() << endl;
+ return NDBT_FAILED;
+ }
+
+ const NdbDictionary::Table * copyptr = dict0->getTable(name);
+ if (copyptr == 0)
+ {
+ return NDBT_FAILED;
+ }
+ createEvent(ndb0, *copyptr, ctx);
+ pOp0 = createEventOperation(ndb0, *copyptr);
+ dict0 = ndb0->getDictionary();dict->dropTable(name);
+
+ res.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true);
+
+ res.waitNodesNoStart(&nodeId, 1);
+ res.startNodes(&nodeId, 1);
+ if (res.waitClusterStarted())
+ {
+ return NDBT_FAILED;
+ }
+
+ ndb0->dropEventOperation(pOp0);
+
+ delete ndb0;
+ delete con0;
+ }
+
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(test_event);
TESTCASE("BasicEventOperation",
"Verify that we can listen to Events"
@@ -2762,6 +2828,10 @@ TESTCASE("Bug37279", "")
{
INITIALIZER(runBug37279);
}
+TESTCASE("Bug37338", "")
+{
+ INITIALIZER(runBug37338);
+}
NDBT_TESTSUITE_END(test_event);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2008-06-11 08:23:36 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2008-06-11 19:49:15 +0000
@@ -1102,6 +1102,10 @@ max-time: 300
cmd: test_event
args: -n Bug37279 T1
+max-time: 300
+cmd: test_event
+args: -n Bug37338 T1
+
# 2008-04-22
max-time: 1500
cmd: testNodeRestart
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.2 branch (jonas:2622) Bug#37338 | jonas | 11 Jun |