Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2149 06/05/16 20:56:45 tomas@stripped +6 -0
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
storage/ndb/src/ndbapi/Ndb.cpp
1.69 06/05/16 20:56:38 tomas@stripped +3 -0
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
storage/ndb/src/ndbapi/ClusterMgr.hpp
1.11 06/05/16 20:56:38 tomas@stripped +6 -1
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
storage/ndb/src/ndbapi/ClusterMgr.cpp
1.26 06/05/16 20:56:38 tomas@stripped +13 -0
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
sql/ha_ndbcluster_binlog.cc
1.53 06/05/16 20:56:38 tomas@stripped +100 -4
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
mysql-test/t/ndb_autodiscover3.test
1.2 06/05/16 20:56:38 tomas@stripped +6 -3
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
mysql-test/r/ndb_autodiscover3.result
1.3 06/05/16 20:56:38 tomas@stripped +6 -6
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: tomas
# Host: poseidon.ndb.mysql.com
# Root: /home/tomas/mysql-5.1-new-ndb
--- 1.25/storage/ndb/src/ndbapi/ClusterMgr.cpp 2006-05-16 10:25:46 +02:00
+++ 1.26/storage/ndb/src/ndbapi/ClusterMgr.cpp 2006-05-16 20:56:38 +02:00
@@ -71,6 +71,7 @@
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
m_connect_count = 0;
+ m_cluster_state = CS_waiting_for_clean_cache;
DBUG_VOID_RETURN;
}
@@ -175,6 +176,16 @@
int send_heartbeat_now= global_flag_send_heartbeat_now;
global_flag_send_heartbeat_now= 0;
+ if (m_cluster_state == CS_waiting_for_clean_cache)
+ {
+ theFacade.m_globalDictCache.lock();
+ unsigned sz= theFacade.m_globalDictCache.get_size();
+ theFacade.m_globalDictCache.unlock();
+ if (sz)
+ goto next;
+ m_cluster_state = CS_waiting_for_first_connect;
+ }
+
theFacade.lock_mutex();
for (int i = 1; i < MAX_NODES; i++){
/**
@@ -223,6 +234,7 @@
*/
theFacade.unlock_mutex();
+next:
// Sleep for 100 ms between each Registration Heartbeat
Uint64 before = now;
NdbSleep_MilliSleep(100);
@@ -450,6 +462,7 @@
theFacade.m_globalDictCache.invalidate_all();
theFacade.m_globalDictCache.unlock();
m_connect_count ++;
+ m_cluster_state = CS_waiting_for_clean_cache;
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
--- 1.10/storage/ndb/src/ndbapi/ClusterMgr.hpp 2006-05-16 10:25:46 +02:00
+++ 1.11/storage/ndb/src/ndbapi/ClusterMgr.hpp 2006-05-16 20:56:38 +02:00
@@ -57,6 +57,11 @@
class TransporterFacade & theFacade;
public:
+ enum Cluster_state {
+ CS_waiting_for_clean_cache = 0,
+ CS_waiting_for_first_connect,
+ CS_connected
+ };
struct Node {
Node();
bool defined;
@@ -86,7 +91,7 @@
Uint32 noOfConnectedNodes;
Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread;
-
+ enum Cluster_state m_cluster_state;
/**
* Used for controlling start/stop of the thread
*/
--- 1.68/storage/ndb/src/ndbapi/Ndb.cpp 2006-05-04 13:58:06 +02:00
+++ 1.69/storage/ndb/src/ndbapi/Ndb.cpp 2006-05-16 20:56:38 +02:00
@@ -1287,6 +1287,7 @@
int Ndb::dropEventOperation(NdbEventOperation* tOp)
{
DBUG_ENTER("Ndb::dropEventOperation");
+ DBUG_PRINT("info", ("name: %s", tOp->getEvent()->getTable()->getName()));
// remove it from list
NdbEventOperationImpl *op=
NdbEventBuffer::getEventOperationImpl(tOp);
@@ -1297,6 +1298,8 @@
else
theImpl->m_ev_op= op->m_next;
+ DBUG_PRINT("info", ("first: %s",
+ theImpl->m_ev_op ?
theImpl->m_ev_op->getEvent()->getTable()->getName() : "<empty>"));
assert(theImpl->m_ev_op == 0 || theImpl->m_ev_op->m_prev == 0);
theEventBuffer->dropEventOperation(tOp);
--- 1.2/mysql-test/r/ndb_autodiscover3.result 2006-05-15 17:16:18 +02:00
+++ 1.3/mysql-test/r/ndb_autodiscover3.result 2006-05-16 20:56:38 +02:00
@@ -3,9 +3,9 @@
begin;
insert into t1 values (1);
insert into t1 values (2);
-ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from
ndbcluster
+ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from
NDBCLUSTER
commit;
-ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
+ERROR HY000: Got error 4350 'Transaction already aborted' from NDBCLUSTER
drop table t1;
create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
@@ -14,6 +14,8 @@
1 1
2 1
3 1
+show tables like 't2';
+Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
@@ -22,14 +24,12 @@
2
3
select * from t2 order by a limit 3;
-ERROR HY000: Can't lock file (errno: 241)
-select * from t2 order by a limit 3;
a
1
2
3
-show tables;
-Tables_in_test
+show tables like 't2';
+Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
--- 1.1/mysql-test/t/ndb_autodiscover3.test 2006-05-15 16:23:55 +02:00
+++ 1.2/mysql-test/t/ndb_autodiscover3.test 2006-05-16 20:56:38 +02:00
@@ -36,23 +36,26 @@
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3
--connection server2
+show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
# server 1 should have a stale cache, and in this case wrong frm, transaction must be
retried
--connection server1
---error 1015
-select * from t2 order by a limit 3;
select * from t2 order by a limit 3;
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3
--connection server1
-show tables;
+show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
--- 1.52/sql/ha_ndbcluster_binlog.cc 2006-05-10 16:54:21 +02:00
+++ 1.53/sql/ha_ndbcluster_binlog.cc 2006-05-16 20:56:38 +02:00
@@ -1775,6 +1775,8 @@
// skip
break;
case NDBEVENT::TE_CLUSTER_FAILURE:
+ if (ndb_extra_logging)
+ sql_print_information("NDB Binlog: cluster failure for %s.",
schema_share->key);
// fall through
case NDBEVENT::TE_DROP:
if (ndb_extra_logging &&
@@ -1784,6 +1786,7 @@
free_share(&schema_share);
schema_share= 0;
ndb_binlog_tables_inited= FALSE;
+ close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
// fall through
case NDBEVENT::TE_ALTER:
ndb_handle_schema_change(thd, ndb, pOp, tmp_share);
@@ -2101,7 +2104,14 @@
Functions for start, stop, wait for ndbcluster binlog thread
*********************************************************************/
-static int do_ndbcluster_binlog_close_connection= 0;
+enum Binlog_thread_state
+{
+ BCCC_running= 0,
+ BCCC_exit= 1,
+ BCCC_restart= 2
+};
+
+static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart;
int ndbcluster_binlog_start()
{
@@ -2139,7 +2149,7 @@
DBUG_ENTER("ndbcluster_binlog_close_connection");
const char *save_info= thd->proc_info;
thd->proc_info= "ndbcluster_binlog_close_connection";
- do_ndbcluster_binlog_close_connection= 1;
+ do_ndbcluster_binlog_close_connection= BCCC_exit;
while (ndb_binlog_thread_running > 0)
sleep(1);
thd->proc_info= save_info;
@@ -3296,10 +3306,48 @@
thd->db= db;
}
- for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
- ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); )
+restart:
{
+ // wait for the first event
+ thd->proc_info= "Waiting for first event from ndbcluster";
+ DBUG_PRINT("info", ("Waiting for the first event"));
+ int schema_res= 0;
+ Uint64 schema_gci= 0;
+ while (schema_res == 0 && !abort_loop)
+ {
+ schema_res= s_ndb->pollEvents(100, &schema_gci);
+ }
+ // now check that we have epochs consistant with what we had before the restart
+ if (schema_res > 0)
+ {
+ if (schema_gci < ndb_latest_handled_binlog_epoch)
+ {
+ sql_print_error("NDB Binlog: cluster has been restarted --initial or with older
filesystem. "
+ "ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
+ "RESET MASTER should be issued. Resetting
ndb_latest_handled_binlog_epoch.",
+ (unsigned) ndb_latest_handled_binlog_epoch, (unsigned)
schema_gci);
+ g_latest_trans_gci= 0;
+ ndb_latest_handled_binlog_epoch= 0;
+ ndb_latest_applied_binlog_epoch= 0;
+ ndb_latest_received_binlog_epoch= 0;
+ }
+ }
+ }
+ do_ndbcluster_binlog_close_connection= BCCC_running;
+ for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
+ ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
+ do_ndbcluster_binlog_close_connection != BCCC_restart; )
+ {
+#ifndef DBUG_OFF
+ if (do_ndbcluster_binlog_close_connection)
+ {
+ DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, "
+ "ndb_latest_handled_binlog_epoch: %llu, "
+ "g_latest_trans_gci: %llu",
do_ndbcluster_binlog_close_connection,
+ ndb_latest_handled_binlog_epoch, g_latest_trans_gci));
+ }
+#endif
#ifdef RUN_NDB_BINLOG_TIMER
main_timer.stop();
sql_print_information("main_timer %ld ms", main_timer.elapsed_ms());
@@ -3324,7 +3372,13 @@
ndb_latest_received_binlog_epoch= gci;
while (gci > schema_gci && schema_res >= 0)
+ {
+ static char buf[64];
+ thd->proc_info= "Waiting for schema epoch";
+ my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned)
schema_gci, (unsigned) gci);
+ thd->proc_info= buf;
schema_res= s_ndb->pollEvents(10, &schema_gci);
+ }
if ((abort_loop || do_ndbcluster_binlog_close_connection) &&
(ndb_latest_handled_binlog_epoch >= g_latest_trans_gci ||
@@ -3360,10 +3414,31 @@
while (pOp != NULL)
{
if (!pOp->hasError())
+ {
ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp,
&post_epoch_log_list,
&post_epoch_unlock_list,
&mem_root);
+ DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+
s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+ "<empty>"));
+ DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+
i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+ "<empty>"));
+ if (i_ndb->getEventOperation() == NULL &&
+ s_ndb->getEventOperation() == NULL &&
+ do_ndbcluster_binlog_close_connection == BCCC_running)
+ {
+ DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
+ do_ndbcluster_binlog_close_connection= BCCC_restart;
+ if (ndb_latest_received_binlog_epoch < g_latest_trans_gci &&
ndb_binlog_running)
+ {
+ sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog
"
+ "as latest received epoch is %lld",
+ g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+ }
+ }
+ }
else
sql_print_error("NDB: error %lu (%s) on handling "
"binlog schema event",
@@ -3532,6 +3607,25 @@
ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row);
// reset to catch errors
i_ndb->setDatabaseName("");
+ DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+
s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+ "<empty>"));
+ DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+
i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+ "<empty>"));
+ if (i_ndb->getEventOperation() == NULL &&
+ s_ndb->getEventOperation() == NULL &&
+ do_ndbcluster_binlog_close_connection == BCCC_running)
+ {
+ DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection=
BCCC_restart"));
+ do_ndbcluster_binlog_close_connection= BCCC_restart;
+ if (ndb_latest_received_binlog_epoch < g_latest_trans_gci &&
ndb_binlog_running)
+ {
+ sql_print_error("NDB Binlog: latest transaction in epoch %lld not in
binlog "
+ "as latest received epoch is %lld",
+ g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+ }
+ }
}
pOp= i_ndb->nextEvent();
@@ -3587,6 +3681,8 @@
*root_ptr= old_root;
ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
}
+ if (do_ndbcluster_binlog_close_connection != BCCC_exit)
+ goto restart;
err:
DBUG_PRINT("info",("Shutting down cluster binlog thread"));
thd->proc_info= "Shutting down";
| Thread |
|---|
| • bk commit into 5.1 tree (tomas:1.2149) BUG#16875 | tomas | 16 May |