List:Internals« Previous MessageNext Message »
From:jonas.oreland Date:July 6 2005 9:16am
Subject:bk commit into 5.0 tree (joreland:1.1864) BUG#9961
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1864 05/07/06 11:16:36 joreland@stripped +2 -0
  bug#9961 - ndb gcp stop
    add lots of printouts when stop is detected

  ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
    1.68 05/07/06 11:15:15 joreland@stripped +72 -1
    Add lots of printouts when crashing due to GCP stop

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.28 05/07/06 11:15:15 joreland@stripped +18 -1
    If waiting for GCP_SAVE_REQ
      just kill nodes we're waiting for

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	joreland
# Host:	eel.(none)
# Root:	/home/jonas/src/mysql-5.0

--- 1.27/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-06-07 18:06:45 +02:00
+++ 1.28/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-07-06 11:15:15 +02:00
@@ -10290,7 +10290,24 @@
  * GCP stop detected, 
  * send SYSTEM_ERROR to all other alive nodes
  */
-void Dbdih::crashSystemAtGcpStop(Signal* signal){
+void Dbdih::crashSystemAtGcpStop(Signal* signal)
+{
+  if(cgcpStatus == GCP_NODE_FINISHED)
+  {
+    /**
+     * We're waiting for a GCP save conf
+     */
+    ndbrequire(!c_GCP_SAVEREQ_Counter.done());
+    NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter);
+    signal->theData[0] = 2305;
+    sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+
+    infoEvent("Detected GCP stop...sending kill to %s", 
+	      c_GCP_SAVEREQ_Counter.getText());
+    ndbout_c("Detected GCP stop...sending kill to %s", 
+	     c_GCP_SAVEREQ_Counter.getText());
+    return;
+  }
   NodeRecordPtr nodePtr;
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     jam();

--- 1.67/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2005-06-08 16:55:04 +02:00
+++ 1.68/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2005-07-06 11:15:15 +02:00
@@ -169,6 +169,8 @@
 /* ------------------------------------------------------------------------- */
 void Dblqh::systemError(Signal* signal) 
 {
+  signal->theData[0] = 2304;
+  execDUMP_STATE_ORD(signal);
   progError(0, 0);
 }//Dblqh::systemError()
 
@@ -12598,6 +12600,22 @@
 
 void Dblqh::writePageZeroLab(Signal* signal) 
 {
+  if (false && logPartPtr.p->logPartState == LogPartRecord::FILE_CHANGE_PROBLEM) 
+  {
+    if (logPartPtr.p->firstLogQueue == RNIL) 
+    {
+      jam();
+      logPartPtr.p->logPartState = LogPartRecord::IDLE;
+      ndbout_c("resetting logPartState to IDLE");
+    } 
+    else 
+    {
+      jam();
+      logPartPtr.p->logPartState = LogPartRecord::ACTIVE;
+      ndbout_c("resetting logPartState to ACTIVE");
+    }
+  }
+  
   logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
 /*---------------------------------------------------------------------------*/
 /* IT COULD HAVE ARRIVED PAGE WRITES TO THE CURRENT FILE WHILE WE WERE       */
@@ -15661,6 +15679,7 @@
 
 void Dblqh::systemErrorLab(Signal* signal) 
 {
+  systemError(signal);
   progError(0, 0);
 /*************************************************************************>*/
 /*       WE WANT TO INVOKE AN IMMEDIATE ERROR HERE SO WE GET THAT BY       */
@@ -18526,8 +18545,60 @@
     return;
   }
 
+  Uint32 arg= dumpState->args[0];
+  if(arg == 2304 || arg == 2305)
+  {
+    jam();
+    Uint32 i;
+    GcpRecordPtr gcp; gcp.i = RNIL;
+    for(i = 0; i<4; i++)
+    {
+      logPartPtr.i = i;
+      ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
+      ndbout_c("LP %d state: %d WW_Gci: %d gcprec: %d flq: %d currfile: %d tailFileNo: %d", 
+		i,
+		logPartPtr.p->logPartState,
+		logPartPtr.p->waitWriteGciLog,
+		logPartPtr.p->gcprec,
+		logPartPtr.p->firstLogQueue,
+		logPartPtr.p->currentLogfile,
+		logPartPtr.p->logTailFileNo);
+
+      if(gcp.i == RNIL && logPartPtr.p->gcprec != RNIL)
+	gcp.i = logPartPtr.p->gcprec;
+
+      LogFileRecordPtr logFilePtr;
+      Uint32 first= logFilePtr.i= logPartPtr.p->firstLogfile;
+      do
+      {
+	ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
+	ndbout_c("  file %d(%d) FileChangeState: %d logFileStatus: %d", 
+		  logFilePtr.p->fileNo,
+		  logFilePtr.i,
+		  logFilePtr.p->fileChangeState,
+		  logFilePtr.p->logFileStatus);
+	logFilePtr.i = logFilePtr.p->nextLogFile;
+      } while(logFilePtr.i != first);
+    }
+    
+    if(gcp.i != RNIL)
+    {
+      ptrCheckGuard(gcp, cgcprecFileSize, gcpRecord);
+      for(i = 0; i<4; i++)
+      {
+	ndbout_c("  GCP %d file: %d state: %d sync: %d",
+		  i, gcp.p->gcpFilePtr[i], gcp.p->gcpLogPartState[i],
+		  gcp.p->gcpSyncReady[i]);      
+      }
+    }
 
-
+    if(arg== 2305)
+    {
+      progError(__LINE__, ERR_SYSTEM_ERROR, 
+		"Shutting down node due to failed handling of GCP_SAVEREQ");
+      
+    }
+  }
 }//Dblqh::execDUMP_STATE_ORD()
 
 void Dblqh::execSET_VAR_REQ(Signal* signal) 
Thread
bk commit into 5.0 tree (joreland:1.1864) BUG#9961jonas.oreland6 Jul