List:Internals« Previous MessageNext Message »
From:Heikki Tuuri Date:March 13 2005 10:49am
Subject:bk commit into 5.0 tree (heikki:1.1807)
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of heikki. When heikki does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1807 05/03/13 12:49:39 heikki@stripped +8 -0
  set_var.cc, mysqld.cc, ha_innodb.cc, sql_class.h:
    Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log
  trx0trx.h, trx0undo.c, trx0trx.c, trx0roll.c:
    Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit

  sql/set_var.cc
    1.95 05/03/13 12:48:43 heikki@stripped +4 -0
    Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log

  sql/mysqld.cc
    1.443 05/03/13 12:48:42 heikki@stripped +6 -0
    Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log

  sql/ha_innodb.cc
    1.174 05/03/13 12:48:40 heikki@stripped +15 -0
    Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log

  sql/sql_class.h
    1.224 05/03/13 12:48:35 heikki@stripped +1 -0
    Add a settable session variable innodb_support_xa; setting it to 0 can save up to 10 % of CPU time and 150 bytes of space in each undo log

  innobase/include/trx0trx.h
    1.43 05/03/13 12:47:34 heikki@stripped +5 -0
    Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit

  innobase/trx/trx0undo.c
    1.22 05/03/13 12:47:21 heikki@stripped +21 -10
    Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit

  innobase/trx/trx0trx.c
    1.52 05/03/13 12:47:21 heikki@stripped +72 -32
    Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit

  innobase/trx/trx0roll.c
    1.26 05/03/13 12:47:21 heikki@stripped +3 -11
    Enable XA if innodb_support_xa is not set to 0; make prepare to do log fsync's according to innodb_flush_log_at_trx_commit

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	heikki
# Host:	hundin.mysql.fi
# Root:	/home/heikki/mysql-5.0

--- 1.442/sql/mysqld.cc	Wed Mar  9 23:26:10 2005
+++ 1.443/sql/mysqld.cc	Sun Mar 13 12:48:42 2005
@@ -4211,6 +4211,7 @@
   OPT_INNODB_STATUS_FILE,
   OPT_INNODB_MAX_DIRTY_PAGES_PCT,
   OPT_INNODB_TABLE_LOCKS,
+  OPT_INNODB_SUPPORT_XA,
   OPT_INNODB_OPEN_FILES,
   OPT_INNODB_AUTOEXTEND_INCREMENT,
   OPT_INNODB_SYNC_SPIN_LOOPS,
@@ -4513,6 +4514,11 @@
    "Enable InnoDB locking in LOCK TABLES",
    (gptr*) &global_system_variables.innodb_table_locks,
    (gptr*) &global_system_variables.innodb_table_locks,
+   0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
+  {"innodb_support_xa", OPT_INNODB_SUPPORT_XA,
+   "Enable InnoDB support for the XA two-phase commit",
+   (gptr*) &global_system_variables.innodb_support_xa,
+   (gptr*) &global_system_variables.innodb_support_xa,
    0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
 #endif /* End HAVE_INNOBASE_DB */
   {"isam", OPT_ISAM, "Enable ISAM (if this version of MySQL supports it). \

--- 1.223/sql/sql_class.h	Wed Mar  9 14:02:37 2005
+++ 1.224/sql/sql_class.h	Sun Mar 13 12:48:35 2005
@@ -578,6 +578,7 @@
 #endif /* HAVE_REPLICATION */
 #ifdef HAVE_INNOBASE_DB
   my_bool innodb_table_locks;
+  my_bool innodb_support_xa;
 #endif /* HAVE_INNOBASE_DB */
 #ifdef HAVE_NDBCLUSTER_DB
   ulong ndb_autoincrement_prefetch_sz;

--- 1.42/innobase/include/trx0trx.h	Fri Feb 25 22:54:12 2005
+++ 1.43/innobase/include/trx0trx.h	Sun Mar 13 12:47:34 2005
@@ -369,6 +369,11 @@
 	XID		xid;		/* X/Open XA transaction 
 					identification to identify a 
 					transaction branch */
+	ibool		support_xa;	/* normally we do the XA two-phase
+					commit steps, but by setting this to
+					FALSE, one can save CPU time and about
+					150 bytes in the undo log size as then
+					we skip XA steps */
 	dulint		no;		/* transaction serialization number ==
 					max trx id when the transaction is 
 					moved to COMMITTED_IN_MEMORY state */

--- 1.25/innobase/trx/trx0roll.c	Tue Mar  1 08:16:20 2005
+++ 1.26/innobase/trx/trx0roll.c	Sun Mar 13 12:47:21 2005
@@ -441,16 +441,8 @@
 			trx = UT_LIST_GET_NEXT(trx_list, trx);
 		} else if (trx->conc_state == TRX_PREPARED) {
 
-			/* Roll back all prepared transactions if
-			innobase_force_recovery > 0 in my.cnf */
-
-			if (srv_force_recovery > 0) {
-				trx->conc_state = TRX_ACTIVE;
-				break;
-			} else {
-				trx->sess = trx_dummy_sess;
-				trx = UT_LIST_GET_NEXT(trx_list, trx);
-			}
+			trx->sess = trx_dummy_sess;
+			trx = UT_LIST_GET_NEXT(trx_list, trx);
 		} else {
 			break;
 		}
@@ -461,7 +453,7 @@
 	if (trx == NULL) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-		"  InnoDB: Rollback of uncommitted transactions completed\n");
+		"  InnoDB: Rollback of non-prepared transactions completed\n");
 
  		mem_heap_free(heap);
 

--- 1.51/innobase/trx/trx0trx.c	Wed Mar  9 14:37:14 2005
+++ 1.52/innobase/trx/trx0trx.c	Sun Mar 13 12:47:21 2005
@@ -93,6 +93,8 @@
 	trx->id = ut_dulint_zero;
 	trx->no = ut_dulint_max;
 
+	trx->support_xa = TRUE;
+
 	trx->check_foreigns = TRUE;
 	trx->check_unique_secondary = TRUE;
 
@@ -453,9 +455,15 @@
 					ut_dulint_get_high(trx->id),
 					ut_dulint_get_low(trx->id));
 
-					trx->conc_state = TRX_ACTIVE;
+					if (srv_force_recovery == 0) {
 
-					/* trx->conc_state = TRX_PREPARED;*/
+						trx->conc_state = TRX_PREPARED;
+					} else {
+ 						fprintf(stderr,
+"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n");
+
+						trx->conc_state = TRX_ACTIVE;
+					}
 				} else {
 					trx->conc_state =
 						TRX_COMMITTED_IN_MEMORY;
@@ -511,15 +519,20 @@
 					commit or abort decision from MySQL */
 
 					if (undo->state == TRX_UNDO_PREPARED) {
- 						fprintf(stderr,
+ 					    fprintf(stderr,
 "InnoDB: Transaction %lu %lu was in the XA prepared state.\n",
-						ut_dulint_get_high(trx->id),
-						ut_dulint_get_low(trx->id));
+					    ut_dulint_get_high(trx->id),
+					    ut_dulint_get_low(trx->id));
 
-						trx->conc_state = TRX_ACTIVE;
+					    if (srv_force_recovery == 0) {
+
+						trx->conc_state = TRX_PREPARED;
+					    } else {
+ 						fprintf(stderr,
+"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n");
 
-						/* trx->conc_state = 
-							TRX_PREPARED; */
+						trx->conc_state = TRX_ACTIVE;
+					    }
 					} else {
 						trx->conc_state =
 						  TRX_COMMITTED_IN_MEMORY;
@@ -823,9 +836,6 @@
 		trx->read_view = NULL;
 	}
 
-/*	fprintf(stderr, "Trx %lu commit finished\n",
-		ut_dulint_get_low(trx->id)); */
-
 	if (must_flush_log) {
 
 		mutex_exit(&kernel_mutex);
@@ -869,14 +879,15 @@
                         /* Do nothing */
                 } else if (srv_flush_log_at_trx_commit == 1) {
                         if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
-                               /* Write the log but do not flush it to disk */
+                             	/* Write the log but do not flush it to disk */
 
-                               log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+									FALSE);
                         } else {
-                               /* Write the log to the log files AND flush
-                               them to disk */
+                               	/* Write the log to the log files AND flush
+                               	them to disk */
 
-                               log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
                         }
                 } else if (srv_flush_log_at_trx_commit == 2) {
 
@@ -1747,12 +1758,11 @@
 
 void
 trx_prepare_off_kernel(
-/*==================*/
+/*===================*/
 	trx_t*	trx)	/* in: transaction */
 {
 	page_t*		update_hdr_page;
 	trx_rseg_t*	rseg;
-	trx_undo_t*	undo;
 	ibool		must_flush_log	= FALSE;
 	dulint		lsn;
 	mtr_t		mtr;
@@ -1779,19 +1789,18 @@
 		mutex_enter(&(rseg->mutex));
 			
 		if (trx->insert_undo != NULL) {
-			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
-							  		&mtr);
-		}
-
-		undo = trx->update_undo;
 
-		if (undo) {
 			/* It is not necessary to obtain trx->undo_mutex here
 			because only a single OS thread is allowed to do the
 			transaction prepare for this transaction. */
-					
+
+			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
+							  		&mtr);
+		}
+
+		if (trx->update_undo) {
 			update_hdr_page = trx_undo_set_state_at_prepare(trx,
-								undo, &mtr);
+						trx->update_undo, &mtr);
 		}
 
 		mutex_exit(&(rseg->mutex));
@@ -1815,17 +1824,48 @@
 	/*--------------------------------------*/
 
 	if (must_flush_log) {
+                /* Depending on the my.cnf options, we may now write the log
+                buffer to the log files, making the prepared state of the
+		transaction durable if the OS does not crash. We may also
+		flush the log files to disk, making the prepared state of the
+		transaction durable also at an OS crash or a power outage.
+
+                The idea in InnoDB's group prepare is that a group of
+                transactions gather behind a trx doing a physical disk write
+                to log files, and when that physical write has been completed,
+                one of those transactions does a write which prepares the whole
+                group. Note that this group prepare will only bring benefit if
+                there are > 2 users in the database. Then at least 2 users can
+                gather behind one doing the physical log write to disk.
+
+		TODO: find out if MySQL holds some mutex when calling this.
+		That would spoil our group prepare algorithm. */
 
 		mutex_exit(&kernel_mutex);
-	
-		/* Write the log to the log files AND flush them to disk */
 
-		/*-------------------------------------*/
+                if (srv_flush_log_at_trx_commit == 0) {
+                        /* Do nothing */
+                } else if (srv_flush_log_at_trx_commit == 1) {
+                   	if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                        	/* Write the log but do not flush it to disk */
 
-		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+                        	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+								FALSE);
+                        } else {
+                               	/* Write the log to the log files AND flush
+                               	them to disk */
+
+                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+                        }
+                } else if (srv_flush_log_at_trx_commit == 2) {
+
+                        /* Write the log but do not flush it to disk */
+
+                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+                } else {
+                        ut_error;
+                }
 
-		/*-------------------------------------*/
-	
 		mutex_enter(&kernel_mutex);
 	}
 }

--- 1.21/innobase/trx/trx0undo.c	Tue Mar  1 08:16:20 2005
+++ 1.22/innobase/trx/trx0undo.c	Sun Mar 13 12:47:21 2005
@@ -596,7 +596,7 @@
 }
 
 /*******************************************************************
-Adds the XA XID after an undo log old-style header. */
+Adds space for the XA XID after an undo log old-style header. */
 static
 void
 trx_undo_header_add_space_for_xid(
@@ -1488,6 +1488,7 @@
 /*============*/
 				/* out: undo log object, NULL if did not
 				succeed: out of space */
+	trx_t*		trx,	/* in: transaction */
 	trx_rseg_t*	rseg,	/* in: rollback segment memory copy */
 	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
@@ -1530,7 +1531,10 @@
 	
 	offset = trx_undo_header_create(undo_page, trx_id, mtr);
 
-	trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
+	if (trx->support_xa) {
+		trx_undo_header_add_space_for_xid(undo_page,
+					undo_page + offset, mtr);
+	}
 
 	undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
 							page_no, offset);
@@ -1547,6 +1551,7 @@
 /*==================*/
 				/* out: the undo log memory object, NULL if
 				none cached */
+	trx_t*		trx,	/* in: transaction */
 	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
 	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
@@ -1597,16 +1602,22 @@
 
 	if (type == TRX_UNDO_INSERT) {
 		offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
-		trx_undo_header_add_space_for_xid(undo_page, undo_page + offset,
-									mtr);
+		
+		if (trx->support_xa) {
+			trx_undo_header_add_space_for_xid(undo_page,
+						undo_page + offset, mtr);
+		}
 	} else {
 		ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 					+ TRX_UNDO_PAGE_TYPE)
 						== TRX_UNDO_UPDATE);
 
 		offset = trx_undo_header_create(undo_page, trx_id, mtr);
-		trx_undo_header_add_space_for_xid(undo_page, undo_page + offset,
-									mtr);
+
+		if (trx->support_xa) {
+			trx_undo_header_add_space_for_xid(undo_page,
+						undo_page + offset, mtr);
+		}
 	}
 	
 	trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
@@ -1674,11 +1685,11 @@
 #endif /* UNIV_SYNC_DEBUG */
 	mutex_enter(&(rseg->mutex));
 
-	undo = trx_undo_reuse_cached(rseg, type, trx->id, &trx->xid, &mtr);
-
+	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
+									&mtr);
 	if (undo == NULL) {
-		undo = trx_undo_create(rseg, type, trx->id, &trx->xid, &mtr);
-
+		undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
+									&mtr);
 		if (undo == NULL) {
 			/* Did not succeed */
 

--- 1.173/sql/ha_innodb.cc	Sat Mar  5 16:12:29 2005
+++ 1.174/sql/ha_innodb.cc	Sun Mar 13 12:48:40 2005
@@ -690,6 +690,10 @@
 		trx->mysql_query_str = &(thd->query);
                 trx->active_trans = 0;
 
+		/* Update the info whether we should skip XA steps that eat
+		CPU time */
+		trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
                 thd->ha_data[innobase_hton.slot] = trx;
 	} else {
 		if (trx->magic_n != TRX_MAGIC_N) {
@@ -1434,6 +1438,9 @@
 
 	trx = check_trx_exists(thd);
 
+	/* Update the info whether we should skip XA steps that eat CPU time */
+	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
 	first to obey the latching order. */
@@ -1620,6 +1627,9 @@
 
 	trx = check_trx_exists(thd);
 
+	/* Update the info whether we should skip XA steps that eat CPU time */
+	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
 	first to obey the latching order. */
@@ -6307,6 +6317,11 @@
 {
 	int error = 0;
         trx_t* trx;
+
+	if (!thd->variables.innodb_support_xa) {
+
+		return(0);
+	}
 
         trx = check_trx_exists(thd);
 

--- 1.94/sql/set_var.cc	Wed Mar  2 15:39:22 2005
+++ 1.95/sql/set_var.cc	Sun Mar 13 12:48:43 2005
@@ -392,6 +392,8 @@
 							&srv_max_purge_lag);
 sys_var_thd_bool	sys_innodb_table_locks("innodb_table_locks",
                                                &SV::innodb_table_locks);
+sys_var_thd_bool	sys_innodb_support_xa("innodb_support_xa",
+                                               &SV::innodb_support_xa);
 sys_var_long_ptr	sys_innodb_autoextend_increment("innodb_autoextend_increment",
 							&srv_auto_extend_increment);
 sys_var_long_ptr	sys_innodb_sync_spin_loops("innodb_sync_spin_loops",
@@ -689,6 +691,7 @@
   &sys_innodb_max_dirty_pages_pct,
   &sys_innodb_max_purge_lag,
   &sys_innodb_table_locks,
+  &sys_innodb_support_xa,
   &sys_innodb_max_purge_lag,
   &sys_innodb_autoextend_increment,
   &sys_innodb_sync_spin_loops,
@@ -810,6 +813,7 @@
   {"innodb_open_files", (char*) &innobase_open_files, SHOW_LONG },
   {sys_innodb_sync_spin_loops.name, (char*) &sys_innodb_sync_spin_loops, SHOW_SYS},
   {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
+  {sys_innodb_support_xa.name, (char*) &sys_innodb_support_xa, SHOW_SYS},
   {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
   {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
 #endif
Thread
bk commit into 5.0 tree (heikki:1.1807)Heikki Tuuri13 Mar