#At file:///home/frazer/bzr/mysql-5.1-telco-6.2/
2724 Frazer Clement 2008-11-08
Bug#39867 MySQL Cluster : Failures during Blob part operations not always
detected
2 parts :
1) The Ndb SQL handler (ha_ndbcluster) reported the error from the NdbBlob
object
rather than from the NdbTransaction object. This results in inconsistent
error
messages in some cases
2) The NDB kernel bypassed the TC block when reporting primary key 'simple
read'
failure in some scenarios. This resulted in the API node not detecting
operation failures in some scenarios, and eventual transaction timeouts.
Fixes :
Change NDB kernel to send LQHKEYREF to TC for early simple read failure.
Direct send
of TCKEYREF to API remains for 'dirty' read.
Change ha_ndbcluster to obtain error information from the NdbTransaction
object rather than the Blob object.
Tests :
Re-enable simple read testing in testOperations and testTransactions.
Extend testing to include Simple Reads in testNdbApi.
Add Blob read transporter overload testcase to MTR test_blob testcase.
modified:
mysql-test/suite/ndb/r/ndb_blob.result
mysql-test/suite/ndb/t/ndb_blob.test
sql/ha_ndbcluster.cc
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/test/ndbapi/testNdbApi.cpp
storage/ndb/test/ndbapi/testOperations.cpp
storage/ndb/test/ndbapi/testTransactions.cpp
per-file messages:
mysql-test/suite/ndb/r/ndb_blob.result
Blob transporter overload testcase
mysql-test/suite/ndb/t/ndb_blob.test
Blob transporter overload testcase
sql/ha_ndbcluster.cc
Modify handler to get error from Transaction object if not available on Blob or
Operation.
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
Add missing signal name.
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
Modify LQH to send LQHKEYREF to TC rather than TCKEYREF to API for simple read failure
scenario.
storage/ndb/test/ndbapi/testNdbApi.cpp
Add error to test behaviour of simple reads with AbortOnError.
storage/ndb/test/ndbapi/testOperations.cpp
Add SimpleRead behaviour verification testcases
storage/ndb/test/ndbapi/testTransactions.cpp
Add SimpleRead behaviour verification testcases
=== modified file 'mysql-test/suite/ndb/r/ndb_blob.result'
--- a/mysql-test/suite/ndb/r/ndb_blob.result 2008-06-03 10:00:31 +0000
+++ b/mysql-test/suite/ndb/r/ndb_blob.result 2008-11-08 20:45:48 +0000
@@ -636,3 +636,16 @@ select (giga = repeat(@stuff, 2000)) fro
(giga = repeat(@stuff, 2000))
1
drop table t1;
+create table t1 (
+id int,
+data longblob,
+primary key(id))
+engine=ndb;
+set @blurb= repeat('B', 1000000);
+set @blurbHash= sha1(@blurb);
+set autocommit = 1;
+insert into t1 values (1, @blurb);
+create table result_space (k int primary key, result int) engine=myisam;
+set autocommit = 0;
+drop table result_space;
+drop table t1;
=== modified file 'mysql-test/suite/ndb/t/ndb_blob.test'
--- a/mysql-test/suite/ndb/t/ndb_blob.test 2008-06-03 10:00:31 +0000
+++ b/mysql-test/suite/ndb/t/ndb_blob.test 2008-11-08 20:45:48 +0000
@@ -589,3 +589,114 @@ select sha1(giga) from t1;
select (giga = repeat(@stuff, 2000)) from t1 where a=0;
drop table t1;
+
+# bug# 39867 : Blob part operation error handling
+# We attempt to generate a transporter overload
+# and check that an error is generated, rather than
+# rubbish being silently returned.
+# Since transporter overload is not easy to reliably
+# reproduce, this testcase passes when all SELECTS
+# return either the correct result, or fail with
+# MySQL error 1297, and warnings about transporter
+# overload from Ndb. (1218)
+# The testcase will fail if there is no error, and
+# incorrect data/NULL, or if there is an error other
+# than 1297 due to 1218.
+#
+
+create table t1 (
+ id int,
+ data longblob,
+ primary key(id))
+ engine=ndb;
+
+set @blurb= repeat('B', 1000000); # ~1MB of stuff
+set @blurbHash= sha1(@blurb);
+
+set autocommit = 1;
+
+let $mysqltest_loopcounter= 500;
+let $success_count=0;
+let $fail_count=0;
+
+
+insert into t1 values (1, @blurb);
+
+create table result_space (k int primary key, result int) engine=myisam;
+
+--disable_result_log
+--disable_query_log
+
+while ($mysqltest_loopcounter)
+{
+ # Allow success or unmapped error type (1297).
+ --error 0,1297
+ insert into result_space select 1, (sha1(data) = @blurbHash) from t1 where id=1;
+
+ let $orig_errno= $mysql_errno;
+
+ # Statement execution success path
+ if (!$orig_errno)
+ {
+ # Test that the SHA1 of the data read was as expected
+ # (e.g. comparison returned true)
+ let $is_right_answer=`SELECT result from result_space where k=1`;
+
+ if (!$is_right_answer)
+ {
+ --enable_result_log
+ echo SELECT succeeded but gave bad result.
+ SHOW WARNINGS;
+ die "Bad data returned";
+ }
+ #echo Success gave correct result;
+ inc $success_count;
+ }
+
+ # Statement execution failure path
+ if ($orig_errno)
+ {
+ inc $fail_count;
+ #echo FAIL;
+
+ # For the error we are interested in (1218), MySQLD returns a generic error
+ # code (1297). We use the warning info to check the Ndb error.
+ # Note that sometimes the end of the warning states 'from NDB', sometimes
+ # 'from NDBCLUSTER'. Probably it should be made consistent.
+ #
+ let $warning_message= query_get_value("SHOW WARNINGS", Message, 1);
+ let $is_correct_message= `SELECT "$warning_message" LIKE "Got temporary error 1218
'Send Buffers overloaded in NDB kernel' from NDB%"`;
+
+ if (!$is_correct_message)
+ {
+ --enable_result_log
+ echo SELECT failed with incorrect error ($mysql_errno);
+ SHOW WARNINGS;
+ die "Incorrect error from statement";
+ }
+ #echo Failed as expected;
+ }
+
+ delete from result_space;
+
+ #echo $mysqltest_loopcounter;
+
+ dec $mysqltest_loopcounter;
+
+ if ($fail_count = 20)
+ {
+ # That's enough punishment
+ #echo Exiting loop early at iteration $mysqltest_loopcounter;
+ let $mysqltest_loopcounter= 0;
+ }
+}
+
+--enable_result_log
+--enable_query_log
+
+# Interesting, but can't output as it's different every time
+# echo Successes $success_count;
+# echo Failures $fail_count;
+set autocommit = 0;
+drop table result_space;
+drop table t1;
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2008-10-29 13:09:15 +0000
+++ b/sql/ha_ndbcluster.cc 2008-11-08 20:45:48 +0000
@@ -655,6 +655,33 @@ ha_ndbcluster::copy_row_to_buffer(Thd_nd
return row;
}
+/**
+ * findBlobError
+ * This method attempts to find an error in the hierarchy of runtime
+ * NDBAPI objects from Blob up to transaction.
+ * It will return -1 if no error is found, 0 if an error is found.
+ */
+int findBlobError(NdbError& error, NdbBlob* pBlob)
+{
+ error= pBlob->getNdbError();
+ if (error.code != 0)
+ return 0;
+
+ const NdbOperation* pOp= pBlob->getNdbOperation();
+ error= pOp->getNdbError();
+ if (error.code != 0)
+ return 0;
+
+ NdbTransaction* pTrans= pOp->getNdbTransaction();
+ error= pTrans->getNdbError();
+ if (error.code != 0)
+ return 0;
+
+ /* No error on any of the objects */
+ return -1;
+}
+
+
int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
{
ha_ndbcluster *ha= (ha_ndbcluster *)arg;
@@ -733,7 +760,19 @@ int g_get_ndb_blobs_value(NdbBlob *ndb_b
uchar *buf= ha->m_blobs_buffer + offset;
uint32 len= ha->m_blobs_buffer_size - offset;
if (ndb_blob->readData(buf, len) != 0)
- ERR_RETURN(ndb_blob->getNdbError());
+ {
+ NdbError err;
+ if (findBlobError(err, ndb_blob) == 0)
+ {
+ ERR_RETURN(err);
+ }
+ else
+ {
+ /* Should always have some error code set */
+ assert(err.code != 0);
+ ERR_RETURN(err);
+ }
+ }
DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u",
i, offset, (long) buf, len));
DBUG_ASSERT(len == len64);
=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalNames.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2007-10-23 09:38:20 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2008-11-08 20:45:48 +0000
@@ -646,5 +646,7 @@ const GsnName SignalNames [] = {
,{ GSN_PREPARE_COPY_FRAG_CONF, "PREPARE_COPY_FRAG_CONF" }
,{ GSN_UPGRADE_PROTOCOL_ORD, "UPGRADE_PROTOCOL_ORD" }
+
+ ,{ GSN_TC_HBREP, "TC_HBREP" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-11-07 08:01:33 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-11-08 20:45:48 +0000
@@ -2284,9 +2284,10 @@ void Dblqh::noFreeRecordLab(Signal* sign
releaseTcrec(signal, tcConnectptr);
}
- if (LqhKeyReq::getSimpleFlag(reqInfo) &&
+ if (LqhKeyReq::getDirtyFlag(reqInfo) &&
LqhKeyReq::getOperation(reqInfo) == ZREAD){
jam();
+ /* Dirty read sends TCKEYREF direct to client, and nothing to TC */
ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
const Uint32 apiRef = lqhKeyReq->variableData[0];
const Uint32 apiOpRec = lqhKeyReq->variableData[1];
@@ -2300,6 +2301,9 @@ void Dblqh::noFreeRecordLab(Signal* sign
sendTCKEYREF(signal, apiRef, signal->getSendersBlockRef(), 0);
} else {
jam();
+ /* All ops apart from dirty read send LQHKEYREF to TC
+ * (This includes simple read)
+ */
const Uint32 clientPtr = lqhKeyReq->clientConnectPtr;
Uint32 TcOprec = clientPtr;
=== modified file 'storage/ndb/test/ndbapi/testNdbApi.cpp'
--- a/storage/ndb/test/ndbapi/testNdbApi.cpp 2008-06-03 12:37:17 +0000
+++ b/storage/ndb/test/ndbapi/testNdbApi.cpp 2008-11-08 20:45:48 +0000
@@ -26,10 +26,8 @@
#define MAX_NDB_OBJECTS 32678
#define CHECK(b) if (!(b)) { \
- ndbout << "ERR: "<< step->getName() \
- << " failed on line " << __LINE__ << endl; \
- result = NDBT_FAILED; \
- continue; }
+ ndbout << "ERR: failed on line " << __LINE__ << endl; \
+ return -1; }
#define CHECKE(b) if (!(b)) { \
errors++; \
@@ -1326,6 +1324,7 @@ op_row(NdbTransaction* pTrans, HugoOpera
case 3:
case 4:
case 5:
+ case 12:
pOp = pTrans->getNdbOperation(pTab->getName());
break;
case 9:
@@ -1365,10 +1364,13 @@ op_row(NdbTransaction* pTrans, HugoOpera
case 11:
pOp->deleteTuple();
break;
+ case 12:
+ CHECK(!pOp->simpleRead());
+ break;
default:
abort();
}
-
+
for(int a = 0; a<pTab->getNoOfColumns(); a++){
if (pTab->getColumn(a)->getPrimaryKey() == true){
if(hugoOps.equalForAttr(pOp, a, row) != 0){
@@ -1384,8 +1386,9 @@ op_row(NdbTransaction* pTrans, HugoOpera
case 6:
case 7:
case 8:
+ case 12:
for(int a = 0; a<pTab->getNoOfColumns(); a++){
- pOp->getValue(a);
+ CHECK(pOp->getValue(a));
}
break;
case 3:
@@ -1427,6 +1430,8 @@ static void print(int op)
case 9: str = "noop "; break;
case 10: str = "uk update "; break;
case 11: str = "uk delete "; break;
+ case 12: str = "pk read-si"; break;
+
default:
abort();
}
@@ -1457,9 +1462,10 @@ runTestIgnoreError(NDBT_Context* ctx, ND
printf("case: <op1> <op2> c/nc ao/ie\n");
Uint32 tno = 0;
- for (Uint32 op1 = 0; op1 < 12; op1++)
+ for (Uint32 op1 = 0; op1 < 13; op1++)
{
- for (Uint32 op2 = op1; op2 < 12; op2++)
+ // NOTE : I get a node crash if the following loop starts from 0!
+ for (Uint32 op2 = op1; op2 < 13; op2++)
{
int ret;
NdbTransaction* pTrans = 0;
@@ -1486,25 +1492,25 @@ runTestIgnoreError(NDBT_Context* ctx, ND
hugoTrans.loadTable(pNdb, 1);
- pTrans = pNdb->startTransaction();
- op_row(pTrans, hugoOps, pTab, op1, 0);
+ CHECK(pTrans = pNdb->startTransaction());
+ CHECK(!op_row(pTrans, hugoOps, pTab, op1, 0));
ret = pTrans->execute(et, ao);
pTrans->close();
printf("%d ", ret);
hugoTrans.clearTable(pNdb);
hugoTrans.loadTable(pNdb, 1);
- pTrans = pNdb->startTransaction();
- op_row(pTrans, hugoOps, pTab, op1, 1);
+ CHECK(pTrans = pNdb->startTransaction());
+ CHECK(!op_row(pTrans, hugoOps, pTab, op1, 1));
ret = pTrans->execute(et, ao);
pTrans->close();
printf("%d ", ret);
hugoTrans.clearTable(pNdb);
hugoTrans.loadTable(pNdb, 1);
- pTrans = pNdb->startTransaction();
- op_row(pTrans, hugoOps, pTab, op1, 0);
- op_row(pTrans, hugoOps, pTab, op2, 1);
+ CHECK(pTrans = pNdb->startTransaction());
+ CHECK(!op_row(pTrans, hugoOps, pTab, op1, 0));
+ CHECK(!op_row(pTrans, hugoOps, pTab, op2, 1));
ret = pTrans->execute(et, ao);
pTrans->close();
printf("%d\n", ret);
@@ -1728,6 +1734,85 @@ done:
return result;
}
+int
+simpleReadAbortOnError(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Simple read has some error handling issues
+ * Setting the operation to be AbortOnError can expose these
+ */
+ Ndb* pNdb = GETNDB(step);
+ const NdbDictionary::Table* pTab= ctx->getTab();
+ HugoOperations hugoOps(*pTab);
+ NdbRestarter restarter;
+
+ hugoOps.startTransaction(pNdb);
+ CHECK(!hugoOps.pkWriteRecord(pNdb,0));
+ CHECK(!hugoOps.execute_Commit(pNdb, AbortOnError));
+
+ NdbTransaction* trans;
+
+ CHECK(trans= pNdb->startTransaction());
+
+ /* Insert error 5047 which causes next LQHKEYREQ to fail due
+ * to 'transporter overload'
+ * Error insert is self-clearing
+ */
+ restarter.insertErrorInAllNodes(5047);
+
+ /* Create SimpleRead on row 0, which exists (though we'll get
+ * 'transporter overload for this'
+ */
+ NdbOperation* op;
+ CHECK(op= trans->getNdbOperation(pTab));
+
+ CHECK(!op->simpleRead());
+
+ for(int a = 0; a<pTab->getNoOfColumns(); a++){
+ if (pTab->getColumn(a)->getPrimaryKey() == true){
+ if(hugoOps.equalForAttr(op, a, 0) != 0){
+ restarter.insertErrorInAllNodes(0);
+ return NDBT_FAILED;
+ }
+ }
+ }
+ for(int a = 0; a<pTab->getNoOfColumns(); a++){
+ CHECK(op->getValue(a));
+ }
+
+ CHECK(!op->setAbortOption(NdbOperation::AbortOnError));
+
+ /* Create normal read on row 0 which will succeed */
+ NdbOperation* op2;
+ CHECK(op2= trans->getNdbOperation(pTab));
+
+ CHECK(!op2->readTuple());
+
+ for(int a = 0; a<pTab->getNoOfColumns(); a++){
+ if (pTab->getColumn(a)->getPrimaryKey() == true){
+ if(hugoOps.equalForAttr(op2, a, 0) != 0){
+ restarter.insertErrorInAllNodes(0);
+ return NDBT_FAILED;
+ }
+ }
+ }
+ for(int a = 0; a<pTab->getNoOfColumns(); a++){
+ CHECK(op2->getValue(a));
+ }
+
+ CHECK(!op2->setAbortOption(NdbOperation::AbortOnError));
+
+
+ CHECK(trans->execute(NoCommit) == -1);
+
+ CHECK(trans->getNdbError().code == 1218); // Transporter Overload
+
+ restarter.insertErrorInAllNodes(0);
+
+ return NDBT_OK;
+
+}
+
+
NDBT_TESTSUITE(testNdbApi);
TESTCASE("MaxNdb",
"Create Ndb objects until no more can be created\n"){
@@ -1840,6 +1925,10 @@ TESTCASE("Bug37158",
""){
INITIALIZER(runBug37158);
}
+TESTCASE("SimpleReadAbortOnError",
+ "Test behaviour of Simple reads with Abort On Error"){
+ INITIALIZER(simpleReadAbortOnError);
+}
NDBT_TESTSUITE_END(testNdbApi);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/ndbapi/testOperations.cpp'
--- a/storage/ndb/test/ndbapi/testOperations.cpp 2007-08-01 03:07:58 +0000
+++ b/storage/ndb/test/ndbapi/testOperations.cpp 2008-11-08 20:45:48 +0000
@@ -59,6 +59,20 @@ OperationTestCase matrix[] = {
{ "FReadUpdate", false, "READ", 626, 0, "UPDATE", 626, 0, 626, 0 },
{ "FReadDelete", false, "READ", 626, 0, "DELETE", 626, 0, 626, 0 },
+ { "FSimpleReadRead", false, "S-READ", 626, 0, "READ", 626, 0, 626, 0 },
+ { "FSimpleReadReadEx",
+ false, "S-READ", 626, 0, "READ-EX", 626, 0, 626, 0 },
+ { "FSimpleReadSimpleRead",
+ false, "S-READ", 626, 0, "S-READ", 626, 0, 626, 0 },
+ { "FSimpleReadDirtyRead",
+ false, "S-READ", 626, 0, "D-READ", 626, 0, 626, 0 },
+ { "FSimpleReadInsert",
+ false, "S-READ", 626, 0, "INSERT", 0, 1, 0, 1 },
+ { "FSimpleReadUpdate",
+ false, "S-READ", 626, 0, "UPDATE", 626, 0, 626, 0 },
+ { "FSimpleReadDelete",
+ false, "S-READ", 626, 0, "DELETE", 626, 0, 626, 0 },
+
{ "ReadExRead", true, "READ-EX", 0, 0, "READ", 0, 0, 0, 0 },
{ "ReadExReadEx", true, "READ-EX", 0, 0, "READ-EX", 0, 0, 0, 0 },
{ "ReadExSimpleRead", true, "READ-EX", 0, 0, "S-READ", 0, 0, 0, 0 },
@@ -118,7 +132,7 @@ runOp(HugoOperations & hugoOps,
} else if(strcmp(op, "READ-EX") == 0){
C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_Exclusive), 0);
} else if(strcmp(op, "S-READ") == 0){
- C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_Read), 0);
+ C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_SimpleRead), 0);
} else if(strcmp(op, "D-READ") == 0){
C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_CommittedRead), 0);
} else if(strcmp(op, "INSERT") == 0){
=== modified file 'storage/ndb/test/ndbapi/testTransactions.cpp'
--- a/storage/ndb/test/ndbapi/testTransactions.cpp 2006-12-23 19:20:40 +0000
+++ b/storage/ndb/test/ndbapi/testTransactions.cpp 2008-11-08 20:45:48 +0000
@@ -109,6 +109,23 @@ OperationTestCase matrix[] = {
{ "ScanExScanDe", true, "SCAN-EX",1, "SCAN-DE", 266, X, 0, 1 },
#endif
+ { "SimpleReadRead", true, "S-READ", 1, "READ", 0, 1, 0, 1 },
+ { "SimpleReadReadEx", true, "S-READ", 1, "READ-EX", 0, 1, 0, 1 }, // no lock held
+ { "SimpleReadSimpleRead",
+ true, "S-READ", 1, "S-READ", 0, 1, 0, 1 },
+ { "SimpleReadDirtyRead",
+ true, "S-READ", 1, "D-READ", 0, 1, 0, 1 },
+ { "SimpleReadInsert", true, "S-READ", 1, "INSERT", 630, X, 0, 1 }, // no lock held
+ { "SimpleReadUpdate", true, "S-READ", 1, "UPDATE", 0, 2, 0, 2 }, // no lock held
+ { "SimpleReadDelete", true, "S-READ", 1, "DELETE", 0, X, 626, X }, // no lock held
+ { "SimpleReadScan", true, "S-READ", 1, "SCAN", 0, 1, 0, 1 },
+ { "SimpleReadScanHl", true, "S-READ", 1, "SCAN-HL", 0, 1, 0, 1 },
+ { "SimpleReadScanEx", true, "S-READ", 1, "SCAN-EX", 0, 1, 0, 1 }, // no lock held
+#if 0
+ { "SimpleReadScanUp", true, "S-READ", 1, "SCAN-UP", 0, 1, 0, 2 }, // no lock held
+ { "SimpleReadScanDe", true, "S-READ", 1, "SCAN-DE", 0, X, 626, X }, // no lock held
+#endif
+
{ "ReadExRead", true, "READ-EX",1, "READ", 266, X, 0, 1 },
{ "ReadExReadEx", true, "READ-EX",1, "READ-EX", 266, X, 0, 1 },
{ "ReadExSimpleRead", true, "READ-EX",1, "S-READ", 266, X, 0, 1 },
@@ -169,6 +186,9 @@ OperationTestCase matrix[] = {
{ "DeleteScanDe", true, "DELETE", X, "SCAN-DE", 266, X, 626, X }
#endif
+
+
+
};
#define CHECK(a, b) { int x = a; int y = b; if (x != y) { \
@@ -193,7 +213,7 @@ runOp(HugoOperations & hugoOps,
} else if(strcmp(op, "READ-EX") == 0){
C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_Exclusive) == 0);
} else if(strcmp(op, "S-READ") == 0){
- C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_Read) == 0);
+ C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_SimpleRead) == 0);
} else if(strcmp(op, "D-READ") == 0){
C2(hugoOps.pkReadRecord(pNdb, 1, 1, NdbOperation::LM_CommittedRead) == 0);
} else if(strcmp(op, "INSERT") == 0){
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (frazer:2724) Bug#39867 | Frazer Clement | 8 Nov |