List:Internals« Previous MessageNext Message »
From:sasha Date:March 30 2002 7:36pm
Subject:bk commit into 4.0 tree
View as plain text  
Below is the list of changes that have just been committed into a
4.0 repository of sasha. When sasha does a push, they will be propogated to 
the main repository and within 24 hours after the push to the public repository. 
For information on how to access the public repository
see http://www.mysql.com/doc/I/n/Installing_source_tree.html

ChangeSet@stripped, 2002-03-30 12:36:05-07:00, sasha@stripped
  replication updates. This changeset seems to be working fine on test systems.
  If no problems are discovered in the next week, this will make the replication
  code ready for 4.0.2 release.

  dbug/dbug.c
    1.12 02/03/30 12:36:04 sasha@stripped +4 -4
    cleanup of my previous fix

  sql/mysqld.cc
    1.292 02/03/30 12:36:04 sasha@stripped +40 -39
    fixed a REALLY NASTY BUG - slave threads were being launched before 
    initialization of global thread keys. Thus if the slave thread was slow
    to start everything worked fine, but if it started quickly, we would get
    into trouble using the unitinialized keys

  sql/net_pkg.cc
    1.20 02/03/30 12:36:04 sasha@stripped +5 -1
    make net_printf() work with 0 error code taking the third argument as 
    format string in that case

  sql/slave.cc
    1.163 02/03/30 12:36:04 sasha@stripped +50 -15
    misc fix-ups and debugging instrumentations

  sql/slave.h
    1.35 02/03/30 12:36:04 sasha@stripped +4 -1
    added skip_log_purge member

  sql/sql_class.cc
    1.74 02/03/30 12:36:04 sasha@stripped +8 -0
    debugging instrumentation to track down random memory corruption

  sql/sql_class.h
    1.114 02/03/30 12:36:04 sasha@stripped +8 -0
    added debugging sentry to THD to track down memory corruption

  sql/sql_repl.cc
    1.74 02/03/30 12:36:04 sasha@stripped +12 -6
    fixed bugs in CHANGE MASTER

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sasha
# Host:	mysql.sashanet.com
# Root:	/reiser-data/mysql-4.0

--- 1.11/dbug/dbug.c	Tue Mar 26 22:19:22 2002
+++ 1.12/dbug/dbug.c	Sat Mar 30 12:36:04 2002
@@ -706,8 +706,6 @@
   if (!_no_db_)
   {
     int save_errno=errno;
-    if (!init_done)
-      _db_push_ (_DBUG_START_CONDITION_);
     /* Sasha: the test below is so we could call functions with DBUG_ENTER
        before my_thread_init(). I needed this because I suspected corruption
        of a block allocated by my_thread_init() itself, so I wanted to use
@@ -715,6 +713,8 @@
     */
     if (!(state=code_state()))
       return;
+    if (!init_done)
+      _db_push_ (_DBUG_START_CONDITION_);
 
     *_sfunc_ = state->func;
     *_sfile_ = state->file;
@@ -794,10 +794,10 @@
   if (!_no_db_)
   {
     int save_errno=errno;
+    if (!(state=code_state()))
+      return;				
     if (!init_done)
       _db_push_ ("");
-    if (!(state=code_state()))
-      return;				/* Only happens at end of program */
     if (stack->flags & (TRACE_ON | DEBUG_ON | PROFILE_ON))
     {
       if (!state->locked)

--- 1.291/sql/mysqld.cc	Tue Mar 26 22:23:48 2002
+++ 1.292/sql/mysqld.cc	Sat Mar 30 12:36:04 2002
@@ -1911,45 +1911,6 @@
     using_update_log=1;
   }
  
-  init_slave();
-  
-  if (opt_bin_log && !server_id)
-  {
-    server_id= !master_host ? 1 : 2;
-    switch (server_id) {
-#ifdef EXTRA_DEBUG
-    case 1:
-      sql_print_error("\
-Warning: You have enabled the binary log, but you haven't set server-id:\n\
-Updates will be logged to the binary log, but connections to slaves will\n\
-not be accepted.");
-      break;
-#endif
-    case 2:
-      sql_print_error("\
-Warning: You should set server-id to a non-0 value if master_host is set.\n\
-The server will not act as a slave.");
-      break;
-    }
-  }
-  if (opt_bin_log)
-  {
-    if (!opt_bin_logname)
-    {
-      char tmp[FN_REFLEN];
-      /* TODO: The following should be using fn_format();  We just need to
-	 first change fn_format() to cut the file name if it's too long.
-      */
-      strmake(tmp,glob_hostname,FN_REFLEN-5);
-      strmov(strcend(tmp,'.'),"-bin");
-      opt_bin_logname=my_strdup(tmp,MYF(MY_WME));
-    }
-    mysql_bin_log.set_index_file_name(opt_binlog_index_name);
-    open_log(&mysql_bin_log, glob_hostname, opt_bin_logname, "-bin",
-	     LOG_BIN);
-    using_update_log=1;
-  }
-
   if (opt_slow_log)
     open_log(&mysql_slow_log, glob_hostname, opt_slow_logname, "-slow.log",
 	     LOG_NORMAL);
@@ -2020,6 +1981,46 @@
   if (!opt_noacl)
     udf_init();
 #endif
+  /* init_slave() must be called after the thread keys are created */
+  init_slave();
+  
+  if (opt_bin_log && !server_id)
+  {
+    server_id= !master_host ? 1 : 2;
+    switch (server_id) {
+#ifdef EXTRA_DEBUG
+    case 1:
+      sql_print_error("\
+Warning: You have enabled the binary log, but you haven't set server-id:\n\
+Updates will be logged to the binary log, but connections to slaves will\n\
+not be accepted.");
+      break;
+#endif
+    case 2:
+      sql_print_error("\
+Warning: You should set server-id to a non-0 value if master_host is set.\n\
+The server will not act as a slave.");
+      break;
+    }
+  }
+  if (opt_bin_log)
+  {
+    if (!opt_bin_logname)
+    {
+      char tmp[FN_REFLEN];
+      /* TODO: The following should be using fn_format();  We just need to
+	 first change fn_format() to cut the file name if it's too long.
+      */
+      strmake(tmp,glob_hostname,FN_REFLEN-5);
+      strmov(strcend(tmp,'.'),"-bin");
+      opt_bin_logname=my_strdup(tmp,MYF(MY_WME));
+    }
+    mysql_bin_log.set_index_file_name(opt_binlog_index_name);
+    open_log(&mysql_bin_log, glob_hostname, opt_bin_logname, "-bin",
+	     LOG_BIN);
+    using_update_log=1;
+  }
+
 
   if (opt_bootstrap)
   {

--- 1.19/sql/net_pkg.cc	Wed Jan  2 12:29:40 2002
+++ 1.20/sql/net_pkg.cc	Sat Mar 30 12:36:04 2002
@@ -108,7 +108,11 @@
     thd->query_error = 1;	// if we are here, something is wrong :-)
   query_cache_abort(net);	// Safety
   va_start(args,errcode);
-  format=ER(errcode);
+  // Sasha: this is needed to make net_printf() work with 0 argument for
+  // errorcode and use the argument after that as the format string. This
+  // is usefull for rare errors that are not worth the hassle to put in
+  // errmsg.sys, but at the same time, the message is not fixed text
+  format=errcode ? ER(errcode) : va_arg(args,char*);
   offset= net->return_errno ? 2 : 0;
   text_pos=(char*) net->buff+head_length+offset+1;
   (void) vsprintf(my_const_cast(char*) (text_pos),format,args);

--- 1.162/sql/slave.cc	Tue Mar 26 22:23:48 2002
+++ 1.163/sql/slave.cc	Sat Mar 30 12:36:04 2002
@@ -166,6 +166,7 @@
 		       ulonglong pos, bool need_data_lock,
 		       const char** errmsg)
 {
+  *errmsg=0;
   if (rli->log_pos_current)
     return 0;
   pthread_mutex_t *log_lock=rli->relay_log.get_log_lock();
@@ -348,6 +349,7 @@
   /* is is criticate to test if the slave is running. Otherwise, we might
      be referening freed memory trying to kick it
   */
+  THD_CHECK_SENTRY(thd);
   if (*slave_running)
   {
     KICK_SLAVE(thd);
@@ -966,6 +968,8 @@
   rli->cur_log_fd = -1;
   rli->slave_skip_counter=0;
   rli->log_pos_current=0;
+  rli->abort_pos_wait=0;
+  rli->skip_log_purge=0;
   // TODO: make this work with multi-master
   if (!opt_relay_logname)
   {
@@ -1296,9 +1300,16 @@
   bool pos_reached = 0;
   int event_count = 0;
   pthread_mutex_lock(&data_lock);
-  while (!thd->killed)
+  abort_pos_wait=0; // abort only if master info  changes during wait
+  while (!thd->killed || !abort_pos_wait)
   {
     int cmp_result;
+    if (abort_pos_wait)
+    {
+      abort_pos_wait=0;
+      pthread_mutex_unlock(&data_lock);
+      return -1;
+    }
     DBUG_ASSERT(*master_log_name || master_log_pos == 0);
     if (*master_log_name)
     {
@@ -1350,10 +1361,7 @@
   thd->thread_id = thread_id++;
   pthread_mutex_unlock(&LOCK_thread_count);
 
-  if (init_thr_lock() ||
-      my_pthread_setspecific_ptr(THR_THD,  thd) ||
-      my_pthread_setspecific_ptr(THR_MALLOC, &thd->mem_root) ||
-      my_pthread_setspecific_ptr(THR_NET,  &thd->net))
+  if (init_thr_lock() || thd->store_globals())
   {
     end_thread(thd,0);
     DBUG_RETURN(-1);
@@ -1367,7 +1375,6 @@
   VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals));
 #endif
 
-  thd->mem_root.free=thd->mem_root.used=0;	// Probably not needed
   if (thd->max_join_size == (ulong) ~0L)
     thd->options |= OPTION_BIG_SELECTS;
 
@@ -1381,7 +1388,6 @@
   }
   thd->version=refresh_version;
   thd->set_time();
-
   DBUG_RETURN(0);
 }
 
@@ -1611,6 +1617,7 @@
   my_thread_init();
   thd = new THD; // note that contructor of THD uses DBUG_ !
   DBUG_ENTER("handle_slave_io");
+  THD_CHECK_SENTRY(thd);
 
   pthread_detach_this_thread();
   if (init_slave_thread(thd, SLAVE_THD_IO))
@@ -1808,11 +1815,12 @@
   DBUG_ASSERT(thd->net.buff != 0);
   net_end(&thd->net); // destructor will not free it, because net.vio is 0
   pthread_mutex_lock(&LOCK_thread_count);
+  THD_CHECK_SENTRY(thd);
   delete thd;
   pthread_mutex_unlock(&LOCK_thread_count);
+  my_thread_end(); // clean-up before broadcast
   pthread_cond_broadcast(&mi->stop_cond); // tell the world we are done
   pthread_mutex_unlock(&mi->run_lock);
-  my_thread_end();
 #ifndef DBUG_OFF
   if(abort_slave_event_count && !events_till_abort)
     goto slave_begin;
@@ -1848,7 +1856,8 @@
   my_thread_init();
   thd = new THD; // note that contructor of THD uses DBUG_ !
   DBUG_ENTER("handle_slave_sql");
-
+  THD_CHECK_SENTRY(thd);
+  
   pthread_detach_this_thread();
   if (init_slave_thread(thd, SLAVE_THD_SQL))
   {
@@ -1861,6 +1870,7 @@
     sql_print_error("Failed during slave thread initialization");
     goto err;
   }
+  THD_CHECK_SENTRY(thd);
   thd->thread_stack = (char*)&thd; // remember where our stack is
   thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
   threads.append(thd);
@@ -1891,6 +1901,7 @@
   {
     thd->proc_info = "Processing master log event"; 
     DBUG_ASSERT(rli->sql_thd == thd);
+    THD_CHECK_SENTRY(thd);
     if (exec_relay_log_event(thd,rli))
     {
       // do not scare the user if SQL thread was simply killed or stopped
@@ -1926,14 +1937,16 @@
   DBUG_ASSERT(thd->net.buff != 0);
   net_end(&thd->net); // destructor will not free it, because we are weird
   DBUG_ASSERT(rli->sql_thd == thd);
+  THD_CHECK_SENTRY(thd);
   rli->sql_thd = 0;
   pthread_mutex_lock(&LOCK_thread_count);
+  THD_CHECK_SENTRY(thd);
   delete thd;
   pthread_mutex_unlock(&LOCK_thread_count);
+  my_thread_end(); // clean-up before broadcasting termination
   pthread_cond_broadcast(&rli->stop_cond);
   // tell the world we are done
   pthread_mutex_unlock(&rli->run_lock);
-  my_thread_end();
 #ifndef DBUG_OFF // TODO: reconsider the code below
   if (abort_slave_event_count && !rli->events_till_abort)
     goto slave_begin;
@@ -2429,13 +2442,35 @@
 	end_io_cache(cur_log);
 	DBUG_ASSERT(rli->cur_log_fd >= 0);
 	my_close(rli->cur_log_fd, MYF(MY_WME));
-	rli->cur_log_fd = -1; 
+	rli->cur_log_fd = -1;
 	
-	// purge_first_log will properly set up relay log coordinates in rli 
-	if (rli->relay_log.purge_first_log(rli))
+	// TODO: make skip_log_purge a start-up option. At this point this
+	// is not critical priority
+	if (!rli->skip_log_purge)
 	{
-	  errmsg = "Error purging processed log";
-	  goto err;
+	// purge_first_log will properly set up relay log coordinates in rli
+	  if (rli->relay_log.purge_first_log(rli))
+	  {
+	    errmsg = "Error purging processed log";
+	    goto err;
+	  }
+	}
+	else
+	{
+	  // TODO: verify that no lock is ok here. At this point, if we
+	  // get this wrong, this is actually no big deal - the only time
+	  // this code will ever be executed is if we are recovering from
+	  // a bug when a full reload of the slave is not feasible or
+	  // desirable. 
+	  if (rli->relay_log.find_next_log(&rli->linfo,0/*no lock*/))
+	  {
+	    errmsg = "error switching to the next log";
+	    goto err;
+	  }
+	  rli->relay_log_pos = 4;
+	  strnmov(rli->relay_log_name,rli->linfo.log_file_name,
+		  sizeof(rli->relay_log_name));
+	  flush_relay_log_info(rli);
 	}
 	
 	// next log is hot 

--- 1.73/sql/sql_class.cc	Fri Mar 22 13:55:05 2002
+++ 1.74/sql/sql_class.cc	Sat Mar 30 12:36:04 2002
@@ -104,6 +104,9 @@
   cond_count=0;
   convert_set=0;
   mysys_var=0;
+#ifndef DBUG_OFF
+  dbug_sentry=THD_SENTRY_MAGIC;
+#endif  
   net.vio=0;
   ull=0;
   system_thread=cleanup_done=0;
@@ -191,6 +194,7 @@
 
 THD::~THD()
 {
+  THD_CHECK_SENTRY(this);
   DBUG_ENTER("~THD()");
   /* Close connection */
   if (net.vio)
@@ -223,12 +227,16 @@
   mysys_var=0;					// Safety (shouldn't be needed)
 #ifdef SIGNAL_WITH_VIO_CLOSE
   pthread_mutex_destroy(&active_vio_lock);
+#endif
+#ifndef DBUG_OFF
+  dbug_sentry = THD_SENTRY_GONE;
 #endif  
   DBUG_VOID_RETURN;
 }
 
 void THD::awake(bool prepare_to_die)
 {
+  THD_CHECK_SENTRY(this);
   if (prepare_to_die)
     killed = 1;
   thr_alarm_kill(real_id);

--- 1.113/sql/sql_class.h	Sat Mar 16 01:38:34 2002
+++ 1.114/sql/sql_class.h	Sat Mar 30 12:36:04 2002
@@ -251,6 +251,11 @@
 
 class delayed_insert;
 
+#define THD_SENTRY_MAGIC 0xfeedd1ff
+#define THD_SENTRY_GONE  0xdeadbeef
+
+#define THD_CHECK_SENTRY(thd) DBUG_ASSERT(thd->dbug_sentry == THD_SENTRY_MAGIC)
+
 /* For each client connection we create a separate thread with THD serving as
    a thread/connection descriptor */
 
@@ -312,6 +317,9 @@
   // TODO: document the variables below
   MYSQL_LOCK *lock,*locked_tables;
   ULL	  *ull;
+#ifndef DBUG_OFF
+  uint dbug_sentry; // watch out for memory corruption
+#endif  
   struct st_my_thread_var *mysys_var;
   enum enum_server_command command;
   uint32 server_id;

--- 1.34/sql/slave.h	Sat Mar  2 22:00:36 2002
+++ 1.35/sql/slave.h	Sat Mar 30 12:36:04 2002
@@ -151,10 +151,13 @@
   char last_slave_error[MAX_SLAVE_ERRMSG];
   THD* sql_thd;
   bool log_pos_current;
+  bool abort_pos_wait;
+  bool skip_log_purge;
   
   st_relay_log_info():info_fd(-1),cur_log_fd(-1),inited(0),
 		      cur_log_init_count(0),
-		      log_pos_current(0)
+		      log_pos_current(0),abort_pos_wait(0),
+		      skip_log_purge(0)
     {
       relay_log_name[0] = master_log_name[0] = 0;
       bzero(&info_file,sizeof(info_file));

--- 1.73/sql/sql_repl.cc	Fri Mar 15 18:44:43 2002
+++ 1.74/sql/sql_repl.cc	Sat Mar 30 12:36:04 2002
@@ -714,7 +714,10 @@
     return 1;
   }
 
-  pthread_mutex_lock(&mi->data_lock);
+  /* data lock not needed since we have already stopped the running threads,
+     and we have the hold on the run locks which will keep all threads that
+     could possibly modify the data structures from running
+  */
   if ((lex_mi->host || lex_mi->port) && !lex_mi->log_file_name && !lex_mi->pos)
   {
     // if we change host or port, we must reset the postion
@@ -746,6 +749,7 @@
   if (lex_mi->relay_log_name)
   {
     need_relay_log_purge = 0;
+    mi->rli.skip_log_purge=1;
     strnmov(mi->rli.relay_log_name,lex_mi->relay_log_name,
 	    sizeof(mi->rli.relay_log_name));
   }
@@ -759,16 +763,14 @@
   flush_master_info(mi);
   if (need_relay_log_purge)
   {
-    pthread_mutex_unlock(&mi->data_lock);
+    mi->rli.skip_log_purge=0;
     thd->proc_info="purging old relay logs";
     if (purge_relay_logs(&mi->rli,0 /* not only reset, but also reinit*/,
 			 &errmsg))
     {
-      send_error(&thd->net, 0, "Failed purging old relay logs");
-      unlock_slave_threads(mi);
+      net_printf(&thd->net, 0, "Failed purging old relay logs: %s",errmsg);
       return 1;
     }
-    pthread_mutex_lock(&mi->rli.data_lock);
   }
   else
   {
@@ -778,6 +780,7 @@
 			   0 /*no data lock*/,
 			   &msg))
     {
+      //Sasha: note that I had to change net_printf() to make this work
       net_printf(&thd->net,0,"Failed initializing relay log position: %s",msg);
       unlock_slave_threads(mi);
       return 1;
@@ -789,7 +792,10 @@
 	  sizeof(mi->rli.master_log_name));
   if (!mi->rli.master_log_name[0]) // uninitialized case
     mi->rli.master_log_pos=0;
-  pthread_cond_broadcast(&mi->rli.data_cond);
+
+  pthread_mutex_lock(&mi->rli.data_lock);
+  mi->rli.abort_pos_wait = 1;
+  pthread_cond_broadcast(&mi->data_cond);
   pthread_mutex_unlock(&mi->rli.data_lock);
 
   thd->proc_info = "starting slave";
Thread
bk commit into 4.0 treesasha30 Mar