MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:tomas Date:May 16 2006 6:56pm
Subject:bk commit into 5.1 tree (tomas:1.2149) BUG#16875
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2149 06/05/16 20:56:45 tomas@stripped +6 -0
  Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
  - adoptions to handle binlog

  storage/ndb/src/ndbapi/Ndb.cpp
    1.69 06/05/16 20:56:38 tomas@stripped +3 -0
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

  storage/ndb/src/ndbapi/ClusterMgr.hpp
    1.11 06/05/16 20:56:38 tomas@stripped +6 -1
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

  storage/ndb/src/ndbapi/ClusterMgr.cpp
    1.26 06/05/16 20:56:38 tomas@stripped +13 -0
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

  sql/ha_ndbcluster_binlog.cc
    1.53 06/05/16 20:56:38 tomas@stripped +100 -4
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

  mysql-test/t/ndb_autodiscover3.test
    1.2 06/05/16 20:56:38 tomas@stripped +6 -3
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

  mysql-test/r/ndb_autodiscover3.result
    1.3 06/05/16 20:56:38 tomas@stripped +6 -6
    Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
    - adoptions to handle binlog

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	poseidon.ndb.mysql.com
# Root:	/home/tomas/mysql-5.1-new-ndb

--- 1.25/storage/ndb/src/ndbapi/ClusterMgr.cpp	2006-05-16 10:25:46 +02:00
+++ 1.26/storage/ndb/src/ndbapi/ClusterMgr.cpp	2006-05-16 20:56:38 +02:00
@@ -71,6 +71,7 @@
   noOfConnectedNodes= 0;
   theClusterMgrThread= 0;
   m_connect_count = 0;
+  m_cluster_state = CS_waiting_for_clean_cache;
   DBUG_VOID_RETURN;
 }
 
@@ -175,6 +176,16 @@
     int send_heartbeat_now= global_flag_send_heartbeat_now;
     global_flag_send_heartbeat_now= 0;
 
+    if (m_cluster_state == CS_waiting_for_clean_cache)
+    {
+      theFacade.m_globalDictCache.lock();
+      unsigned sz= theFacade.m_globalDictCache.get_size();
+      theFacade.m_globalDictCache.unlock();
+      if (sz)
+        goto next;
+      m_cluster_state = CS_waiting_for_first_connect;
+    }
+
     theFacade.lock_mutex();
     for (int i = 1; i < MAX_NODES; i++){
       /**
@@ -223,6 +234,7 @@
      */
     theFacade.unlock_mutex();
     
+next:
     // Sleep for 100 ms between each Registration Heartbeat
     Uint64 before = now;
     NdbSleep_MilliSleep(100); 
@@ -450,6 +462,7 @@
     theFacade.m_globalDictCache.invalidate_all();
     theFacade.m_globalDictCache.unlock();
     m_connect_count ++;
+    m_cluster_state = CS_waiting_for_clean_cache;
     NFCompleteRep rep;
     for(Uint32 i = 1; i<MAX_NODES; i++){
       if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){

--- 1.10/storage/ndb/src/ndbapi/ClusterMgr.hpp	2006-05-16 10:25:46 +02:00
+++ 1.11/storage/ndb/src/ndbapi/ClusterMgr.hpp	2006-05-16 20:56:38 +02:00
@@ -57,6 +57,11 @@
   class TransporterFacade & theFacade;
   
 public:
+  enum Cluster_state {
+    CS_waiting_for_clean_cache = 0,
+    CS_waiting_for_first_connect,
+    CS_connected
+  };
   struct Node {
     Node();
     bool defined;
@@ -86,7 +91,7 @@
   Uint32        noOfConnectedNodes;
   Node          theNodes[MAX_NODES];
   NdbThread*    theClusterMgrThread;
-  
+  enum Cluster_state m_cluster_state;
   /**
    * Used for controlling start/stop of the thread
    */

--- 1.68/storage/ndb/src/ndbapi/Ndb.cpp	2006-05-04 13:58:06 +02:00
+++ 1.69/storage/ndb/src/ndbapi/Ndb.cpp	2006-05-16 20:56:38 +02:00
@@ -1287,6 +1287,7 @@
 int Ndb::dropEventOperation(NdbEventOperation* tOp)
 {
   DBUG_ENTER("Ndb::dropEventOperation");
+  DBUG_PRINT("info", ("name: %s", tOp->getEvent()->getTable()->getName()));
   // remove it from list
   NdbEventOperationImpl *op=
     NdbEventBuffer::getEventOperationImpl(tOp);
@@ -1297,6 +1298,8 @@
   else
     theImpl->m_ev_op= op->m_next;
 
+  DBUG_PRINT("info", ("first: %s",
+                      theImpl->m_ev_op ? theImpl->m_ev_op->getEvent()->getTable()->getName() : "<empty>"));
   assert(theImpl->m_ev_op == 0 || theImpl->m_ev_op->m_prev == 0);
 
   theEventBuffer->dropEventOperation(tOp);

--- 1.2/mysql-test/r/ndb_autodiscover3.result	2006-05-15 17:16:18 +02:00
+++ 1.3/mysql-test/r/ndb_autodiscover3.result	2006-05-16 20:56:38 +02:00
@@ -3,9 +3,9 @@
 begin;
 insert into t1 values (1);
 insert into t1 values (2);
-ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from ndbcluster
+ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
 commit;
-ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
+ERROR HY000: Got error 4350 'Transaction already aborted' from NDBCLUSTER
 drop table t1;
 create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
 insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
@@ -14,6 +14,8 @@
 1	1
 2	1
 3	1
+show tables like 't2';
+Tables_in_test (t2)
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;
@@ -22,14 +24,12 @@
 2
 3
 select * from t2 order by a limit 3;
-ERROR HY000: Can't lock file (errno: 241)
-select * from t2 order by a limit 3;
 a
 1
 2
 3
-show tables;
-Tables_in_test
+show tables like 't2';
+Tables_in_test (t2)
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;

--- 1.1/mysql-test/t/ndb_autodiscover3.test	2006-05-15 16:23:55 +02:00
+++ 1.2/mysql-test/t/ndb_autodiscover3.test	2006-05-16 20:56:38 +02:00
@@ -36,23 +36,26 @@
 
 --exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
 --exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3
 
 --connection server2
+show tables like 't2';
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;
 
 # server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
 --connection server1
---error 1015
-select * from t2 order by a limit 3;
 select * from t2 order by a limit 3;
 
 --exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
 --exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3
 
 --connection server1
-show tables;
+show tables like 't2';
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;

--- 1.52/sql/ha_ndbcluster_binlog.cc	2006-05-10 16:54:21 +02:00
+++ 1.53/sql/ha_ndbcluster_binlog.cc	2006-05-16 20:56:38 +02:00
@@ -1775,6 +1775,8 @@
       // skip
       break;
     case NDBEVENT::TE_CLUSTER_FAILURE:
+      if (ndb_extra_logging)
+        sql_print_information("NDB Binlog: cluster failure for %s.", schema_share->key);
       // fall through
     case NDBEVENT::TE_DROP:
       if (ndb_extra_logging &&
@@ -1784,6 +1786,7 @@
       free_share(&schema_share);
       schema_share= 0;
       ndb_binlog_tables_inited= FALSE;
+      close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
       // fall through
     case NDBEVENT::TE_ALTER:
       ndb_handle_schema_change(thd, ndb, pOp, tmp_share);
@@ -2101,7 +2104,14 @@
   Functions for start, stop, wait for ndbcluster binlog thread
 *********************************************************************/
 
-static int do_ndbcluster_binlog_close_connection= 0;
+enum Binlog_thread_state
+{
+  BCCC_running= 0,
+  BCCC_exit= 1,
+  BCCC_restart= 2
+};
+
+static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart;
 
 int ndbcluster_binlog_start()
 {
@@ -2139,7 +2149,7 @@
   DBUG_ENTER("ndbcluster_binlog_close_connection");
   const char *save_info= thd->proc_info;
   thd->proc_info= "ndbcluster_binlog_close_connection";
-  do_ndbcluster_binlog_close_connection= 1;
+  do_ndbcluster_binlog_close_connection= BCCC_exit;
   while (ndb_binlog_thread_running > 0)
     sleep(1);
   thd->proc_info= save_info;
@@ -3296,10 +3306,48 @@
     thd->db= db;
   }
 
-  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
-            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); )
+restart:
   {
+    // wait for the first event
+    thd->proc_info= "Waiting for first event from ndbcluster";
+    DBUG_PRINT("info", ("Waiting for the first event"));
+    int schema_res= 0;
+    Uint64 schema_gci= 0;
+    while (schema_res == 0 && !abort_loop)
+    {
+      schema_res= s_ndb->pollEvents(100, &schema_gci);
+    }
+    // now check that we have epochs consistant with what we had before the restart
+    if (schema_res > 0)
+    {
+      if (schema_gci < ndb_latest_handled_binlog_epoch)
+      {
+        sql_print_error("NDB Binlog: cluster has been restarted --initial or with older filesystem. "
+                        "ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
+                        "RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch.",
+                        (unsigned) ndb_latest_handled_binlog_epoch, (unsigned) schema_gci);
+        g_latest_trans_gci= 0;
+        ndb_latest_handled_binlog_epoch= 0;
+        ndb_latest_applied_binlog_epoch= 0;
+        ndb_latest_received_binlog_epoch= 0;
+      }
+    }
+  }
 
+  do_ndbcluster_binlog_close_connection= BCCC_running;
+  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
+            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
+          do_ndbcluster_binlog_close_connection != BCCC_restart; )
+  {
+#ifndef DBUG_OFF
+    if (do_ndbcluster_binlog_close_connection)
+    {
+      DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, "
+                          "ndb_latest_handled_binlog_epoch: %llu, "
+                          "g_latest_trans_gci: %llu", do_ndbcluster_binlog_close_connection,
+                          ndb_latest_handled_binlog_epoch, g_latest_trans_gci));
+    }
+#endif
 #ifdef RUN_NDB_BINLOG_TIMER
     main_timer.stop();
     sql_print_information("main_timer %ld ms",  main_timer.elapsed_ms());
@@ -3324,7 +3372,13 @@
     ndb_latest_received_binlog_epoch= gci;
 
     while (gci > schema_gci && schema_res >= 0)
+    {
+      static char buf[64];
+      thd->proc_info= "Waiting for schema epoch";
+      my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned) schema_gci, (unsigned) gci);
+      thd->proc_info= buf;
       schema_res= s_ndb->pollEvents(10, &schema_gci);
+    }
 
     if ((abort_loop || do_ndbcluster_binlog_close_connection) &&
         (ndb_latest_handled_binlog_epoch >= g_latest_trans_gci ||
@@ -3360,10 +3414,31 @@
       while (pOp != NULL)
       {
         if (!pOp->hasError())
+        {
           ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp,
                                                 &post_epoch_log_list,
                                                 &post_epoch_unlock_list,
                                                 &mem_root);
+          DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+                              s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                              "<empty>"));
+          DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+                              i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                              "<empty>"));
+          if (i_ndb->getEventOperation() == NULL &&
+              s_ndb->getEventOperation() == NULL &&
+              do_ndbcluster_binlog_close_connection == BCCC_running)
+          {
+            DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
+            do_ndbcluster_binlog_close_connection= BCCC_restart;
+            if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
+            {
+              sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
+                              "as latest received epoch is %lld",
+                              g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+            }
+          }
+        }
         else
           sql_print_error("NDB: error %lu (%s) on handling "
                           "binlog schema event",
@@ -3532,6 +3607,25 @@
             ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row);
             // reset to catch errors
             i_ndb->setDatabaseName("");
+            DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+                                s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                                "<empty>"));
+            DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+                                i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                                "<empty>"));
+            if (i_ndb->getEventOperation() == NULL &&
+                s_ndb->getEventOperation() == NULL &&
+                do_ndbcluster_binlog_close_connection == BCCC_running)
+            {
+              DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
+              do_ndbcluster_binlog_close_connection= BCCC_restart;
+              if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
+              {
+                sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
+                                "as latest received epoch is %lld",
+                                g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+              }
+            }
           }
 
           pOp= i_ndb->nextEvent();
@@ -3587,6 +3681,8 @@
     *root_ptr= old_root;
     ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
   }
+  if (do_ndbcluster_binlog_close_connection != BCCC_exit)
+    goto restart;
 err:
   DBUG_PRINT("info",("Shutting down cluster binlog thread"));
   thd->proc_info= "Shutting down";
Thread
bk commit into 5.1 tree (tomas:1.2149) BUG#16875tomas16 May