List:Commits« Previous MessageNext Message »
From:Inaam Rana Date:July 19 2011 3:23pm
Subject:bzr push into mysql-trunk branch (inaam.rana:3256 to 3257)
View as plain text  
 3257 Inaam Rana	2011-07-19
      A port of random readahead fix which went in trunk with following revid.
      
        revno: 3278 [merge]
        revision-id: inaam.rana@stripped

    added:
      mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result
      mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test
    modified:
      storage/innobase/buf/buf0buf.c
      storage/innobase/buf/buf0rea.c
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/buf0buf.h
      storage/innobase/include/buf0buf.ic
      storage/innobase/include/buf0lru.h
      storage/innobase/include/buf0rea.h
      storage/innobase/include/srv0srv.h
      storage/innobase/srv/srv0srv.c
 3256 kevin.lewis@stripped	2011-07-18
      WL#5756 -  InnoDB: Support Page Sizes 4k and 8k
      
      This patch allows one to change the page size of an entire InnoDB database
      at bootstrap.  
      
      A new parameter is introduced to specify the InnoDB universal page size.
      It must be put into the config or the command line when starting mysqld.
      UNIV_PAGE_SIZE and UNIV_PAGE_SHIFT_SIZE are now mapped to integer variables
      srv_page_size and srv_page_size_shift.  Since they are no longer constants,
      a number of code locations were changed to handle variables instead.
      
      The extent size of a filespace remains 1Mb.  So the number of pages needed
      to fill that extent changes with page size.
      
      Quite a few testcases are modified or added.  Individual tests that depend
      on the page size or assume a specific page size are moved to
         suite/innodb/t/innodb_16k.test
      and some are modified in innodb_8k.test and innodb_4k.test.
      Some tests and whole test files are isolated to 16k pages only if the
      purpose of the test can be fully accomplished with only 16k pages.

    added:
      mysql-test/include/have_innodb_16k.inc
      mysql-test/include/have_innodb_4k.inc
      mysql-test/include/have_innodb_8k.inc
      mysql-test/suite/innodb/r/innodb_16k.result
      mysql-test/suite/innodb/r/innodb_4k.result
      mysql-test/suite/innodb/r/innodb_8k.result
      mysql-test/suite/innodb/t/innodb_16k.test
      mysql-test/suite/innodb/t/innodb_4k.test
      mysql-test/suite/innodb/t/innodb_8k.test
      mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result
      mysql-test/suite/sys_vars/r/innodb_page_size_basic.result
      mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test
      mysql-test/suite/sys_vars/t/innodb_page_size_basic.test
    modified:
      mysql-test/suite/innodb/r/innodb-create-options.result*
      mysql-test/suite/innodb/r/innodb-index.result*
      mysql-test/suite/innodb/r/innodb-system-table-view.result*
      mysql-test/suite/innodb/r/innodb-zip.result*
      mysql-test/suite/innodb/r/innodb.result
      mysql-test/suite/innodb/r/innodb_buffer_pool_load.result*
      mysql-test/suite/innodb/r/innodb_bug53591.result*
      mysql-test/suite/innodb/r/innodb_index_large_prefix.result*
      mysql-test/suite/innodb/r/innodb_mysql.result*
      mysql-test/suite/innodb/t/innodb-create-options.test*
      mysql-test/suite/innodb/t/innodb-index.test*
      mysql-test/suite/innodb/t/innodb-system-table-view.test*
      mysql-test/suite/innodb/t/innodb-zip.test*
      mysql-test/suite/innodb/t/innodb.test*
      mysql-test/suite/innodb/t/innodb_buffer_pool_load.test*
      mysql-test/suite/innodb/t/innodb_bug36172.test*
      mysql-test/suite/innodb/t/innodb_bug53591.test*
      mysql-test/suite/innodb/t/innodb_bug60049.test*
      mysql-test/suite/innodb/t/innodb_index_large_prefix.test*
      mysql-test/suite/innodb/t/innodb_mysql.test*
      mysql-test/suite/innodb/t/innodb_prefix_index_liftedlimit.test*
      mysql-test/suite/innodb/t/innodb_prefix_index_restart_server.test*
      mysql-test/suite/innodb/t/innodb_trx_weight.test*
      mysql-test/suite/sys_vars/r/all_vars.result
      storage/innobase/buf/buf0buddy.c
      storage/innobase/fil/fil0fil.c
      storage/innobase/fsp/fsp0fsp.c
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/handler/i_s.cc
      storage/innobase/include/buf0buf.h
      storage/innobase/include/buf0types.h
      storage/innobase/include/dict0mem.h
      storage/innobase/include/fsp0types.h
      storage/innobase/include/sync0rw.ic
      storage/innobase/include/trx0sys.h
      storage/innobase/include/univ.i
      storage/innobase/row/row0merge.c
      storage/innobase/srv/srv0srv.c
      storage/innobase/srv/srv0start.c
      storage/innobase/trx/trx0sys.c
=== added file 'mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result	revid:inaam.rana@stripped
@@ -0,0 +1,92 @@
+SET @start_global_value = @@global.innodb_random_read_ahead;
+SELECT @start_global_value;
+@start_global_value
+0
+Valid values are 'ON' and 'OFF' 
+select @@global.innodb_random_read_ahead in (0, 1);
+@@global.innodb_random_read_ahead in (0, 1)
+1
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select @@session.innodb_random_read_ahead;
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable
+show global variables like 'innodb_random_read_ahead';
+Variable_name	Value
+innodb_random_read_ahead	OFF
+show session variables like 'innodb_random_read_ahead';
+Variable_name	Value
+innodb_random_read_ahead	OFF
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+set global innodb_random_read_ahead='ON';
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+set @@global.innodb_random_read_ahead=0;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+set global innodb_random_read_ahead=1;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+set @@global.innodb_random_read_ahead='OFF';
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	OFF
+set session innodb_random_read_ahead='OFF';
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL
+set @@session.innodb_random_read_ahead='ON';
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_random_read_ahead=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead'
+set global innodb_random_read_ahead=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead'
+set global innodb_random_read_ahead=2;
+ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of '2'
+NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643)
+set global innodb_random_read_ahead=-3;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD	ON
+set global innodb_random_read_ahead='AUTO';
+ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of 'AUTO'
+SET @@global.innodb_random_read_ahead = @start_global_value;
+SELECT @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0

=== added file 'mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test'
--- a/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test	revid:inaam.rana@stripped
@@ -0,0 +1,70 @@
+
+
+# 2010-01-25 - Added
+#
+
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_random_read_ahead;
+SELECT @start_global_value;
+
+#
+# exists as global only
+#
+--echo Valid values are 'ON' and 'OFF' 
+select @@global.innodb_random_read_ahead in (0, 1);
+select @@global.innodb_random_read_ahead;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_random_read_ahead;
+show global variables like 'innodb_random_read_ahead';
+show session variables like 'innodb_random_read_ahead';
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+
+#
+# show that it's writable
+#
+set global innodb_random_read_ahead='ON';
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set @@global.innodb_random_read_ahead=0;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set global innodb_random_read_ahead=1;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set @@global.innodb_random_read_ahead='OFF';
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--error ER_GLOBAL_VARIABLE
+set session innodb_random_read_ahead='OFF';
+--error ER_GLOBAL_VARIABLE
+set @@session.innodb_random_read_ahead='ON';
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_random_read_ahead=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_random_read_ahead=1e1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_random_read_ahead=2;
+--echo NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643)
+set global innodb_random_read_ahead=-3;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_random_read_ahead='AUTO';
+
+#
+# Cleanup
+#
+
+SET @@global.innodb_random_read_ahead = @start_global_value;
+SELECT @@global.innodb_random_read_ahead;

=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c	revid:kevin.lewis@stripped
+++ b/storage/innobase/buf/buf0buf.c	revid:inaam.rana@stripped
@@ -407,6 +407,7 @@ buf_get_total_stat(
 		tot_stat->n_pages_read += buf_stat->n_pages_read;
 		tot_stat->n_pages_written += buf_stat->n_pages_written;
 		tot_stat->n_pages_created += buf_stat->n_pages_created;
+		tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
 		tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
 		tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
 		tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
@@ -2505,6 +2506,9 @@ loop:
 		}
 
 		if (buf_read_page(space, zip_size, offset)) {
+			buf_read_ahead_random(space, zip_size, offset,
+					      ibuf_inside(mtr));
+
 			retries = 0;
 		} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
 			++retries;
@@ -4717,6 +4721,7 @@ buf_stats_aggregate_pool_info(
 	total_info->n_pages_created += pool_info->n_pages_created;
 	total_info->n_pages_written += pool_info->n_pages_written;
 	total_info->n_page_gets += pool_info->n_page_gets;
+	total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
 	total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
 	total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
 	total_info->page_made_young_rate += pool_info->page_made_young_rate;
@@ -4729,6 +4734,7 @@ buf_stats_aggregate_pool_info(
 	total_info->page_read_delta += pool_info->page_read_delta;
 	total_info->young_making_delta += pool_info->young_making_delta;
 	total_info->not_young_making_delta += pool_info->not_young_making_delta;
+	total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
 	total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
 	total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
 	total_info->unzip_lru_len += pool_info->unzip_lru_len;
@@ -4803,6 +4809,7 @@ buf_stats_get_pool_info(
 
 	pool_info->n_page_gets = buf_pool->stat.n_page_gets;
 
+	pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
 	pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
 
 	pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
@@ -4842,6 +4849,10 @@ buf_stats_get_pool_info(
 			buf_pool->stat.n_pages_not_made_young
 			- buf_pool->old_stat.n_pages_not_made_young;
 	}
+	pool_info->pages_readahead_rnd_rate =
+		 (buf_pool->stat.n_ra_pages_read_rnd
+		  - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
+
 
 	pool_info->pages_readahead_rate =
 		 (buf_pool->stat.n_ra_pages_read
@@ -4926,9 +4937,12 @@ buf_print_io_instance(
 
 	/* Statistics about read ahead algorithm */
 	fprintf(file, "Pages read ahead %.2f/s,"
-		" evicted without access %.2f/s\n",
+		" evicted without access %.2f/s,"
+		" Random read ahead %.2f/s\n",
+
 		pool_info->pages_readahead_rate,
-		pool_info->pages_evicted_rate);
+		pool_info->pages_evicted_rate,
+		pool_info->pages_readahead_rnd_rate);
 
 	/* Print some values to help us with visualizing what is
 	happening with LRU eviction. */

=== modified file 'storage/innobase/buf/buf0rea.c'
--- a/storage/innobase/buf/buf0rea.c	revid:kevin.lewis@stripped
+++ b/storage/innobase/buf/buf0rea.c	revid:inaam.rana@stripped
@@ -40,8 +40,10 @@ Created 11/5/1995 Heikki Tuuri
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
 
-/** The linear read-ahead area size */
-#define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
+/** There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b)	\
+				(5 + BUF_READ_AHEAD_AREA(b) / 8)
 
 /** If there are buf_pool->curr_size per the number below pending reads, then
 read-ahead is not done: this is to prevent flooding the buffer pool with
@@ -175,6 +177,171 @@ buf_read_page_low(
 }
 
 /********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value!
+@return	number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_random(
+/*==================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes,
+				or 0 */
+	ulint	offset,		/*!< in: page number of a page which
+				the current thread wants to access */
+	ibool	inside_ibuf)	/*!< in: TRUE if we are inside ibuf
+				routine */
+{
+	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	ib_int64_t	tablespace_version;
+	ulint		recent_blocks	= 0;
+	ulint		ibuf_mode;
+	ulint		count;
+	ulint		low, high;
+	ulint		err;
+	ulint		i;
+	const ulint	buf_read_ahead_random_area
+				= BUF_READ_AHEAD_AREA(buf_pool);
+
+	if (!srv_random_read_ahead) {
+		/* Disabled by user */
+		return(0);
+	}
+
+	if (srv_startup_is_before_trx_rollback_phase) {
+		/* No read-ahead to avoid thread deadlocks */
+		return(0);
+	}
+
+	if (ibuf_bitmap_page(zip_size, offset)
+	    || trx_sys_hdr_page(space, offset)) {
+
+		/* If it is an ibuf bitmap page or trx sys hdr, we do
+		no read-ahead, as that could break the ibuf page access
+		order */
+
+		return(0);
+	}
+
+	/* Remember the tablespace version before we ask te tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
+	low  = (offset / buf_read_ahead_random_area)
+		* buf_read_ahead_random_area;
+	high = (offset / buf_read_ahead_random_area + 1)
+		* buf_read_ahead_random_area;
+	if (high > fil_space_get_size(space)) {
+
+		high = fil_space_get_size(space);
+	}
+
+	buf_pool_mutex_enter(buf_pool);
+
+	if (buf_pool->n_pend_reads
+	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		buf_pool_mutex_exit(buf_pool);
+
+		return(0);
+	}
+
+	/* Count how many blocks in the area have been recently accessed,
+	that is, reside near the start of the LRU list. */
+
+	for (i = low; i < high; i++) {
+		const buf_page_t* bpage =
+			buf_page_hash_get(buf_pool, space, i);
+
+		if (bpage
+		    && buf_page_is_accessed(bpage)
+		    && buf_page_peek_if_young(bpage)) {
+
+			recent_blocks++;
+
+			if (recent_blocks
+			    >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
+
+				buf_pool_mutex_exit(buf_pool);
+				goto read_ahead;
+			}
+		}
+	}
+
+	buf_pool_mutex_exit(buf_pool);
+	/* Do nothing */
+	return(0);
+
+read_ahead:
+	/* Read all the suitable blocks within the area */
+
+	if (inside_ibuf) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync aio
+		mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(zip_size, i)) {
+			count += buf_read_page_low(
+				&err, FALSE,
+				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+				space, zip_size, FALSE,
+				tablespace_version, i);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"  InnoDB: Warning: in random"
+					" readahead trying to access\n"
+					"InnoDB: tablespace %lu page %lu,\n"
+					"InnoDB: but the tablespace does not"
+					" exist or is just being dropped.\n",
+					(ulong) space, (ulong) i);
+			}
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+
+	os_aio_simulated_wake_handler_threads();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints && (count > 0)) {
+		fprintf(stderr,
+			"Random read-ahead space %lu offset %lu pages %lu\n",
+			(ulong) space, (ulong) offset,
+			(ulong) count);
+	}
+#endif /* UNIV_DEBUG */
+
+	/* Read ahead is considered one I/O operation for the purpose of
+	LRU policy decision. */
+	buf_LRU_stat_inc_io();
+
+	buf_pool->stat.n_ra_pages_read_rnd += count;
+	srv_buf_pool_reads += count;
+	return(count);
+}
+
+/********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
@@ -317,7 +484,7 @@ buf_read_ahead_linear(
 	ulint		err;
 	ulint		i;
 	const ulint	buf_read_ahead_linear_area
-		= BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
+		= BUF_READ_AHEAD_AREA(buf_pool);
 	ulint		threshold;
 
 	/* check if readahead is disabled */

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	revid:kevin.lewis@stripped
+++ b/storage/innobase/handler/ha_innodb.cc	revid:inaam.rana@stripped
@@ -410,6 +410,8 @@ static SHOW_VAR innodb_status_variables[
   (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
   {"buffer_pool_pages_total",
   (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
+  {"buffer_pool_read_ahead_rnd",
+  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
   {"buffer_pool_read_ahead",
   (char*) &export_vars.innodb_buffer_pool_read_ahead,	  SHOW_LONG},
   {"buffer_pool_read_ahead_evicted",
@@ -12591,6 +12593,11 @@ static MYSQL_SYSVAR_UINT(change_bufferin
   NULL, NULL, 0, 0, 1, 0);
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
+static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
+  PLUGIN_VAR_NOCMDARG,
+  "Whether to use read ahead for random access within an extent.",
+  NULL, NULL, FALSE);
+
 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
   PLUGIN_VAR_RQCMDARG,
   "Number of pages that must be accessed sequentially for InnoDB to "
@@ -12699,6 +12706,7 @@ static struct st_mysql_sys_var* innobase
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
   MYSQL_SYSVAR(change_buffering_debug),
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+  MYSQL_SYSVAR(random_read_ahead),
   MYSQL_SYSVAR(read_ahead_threshold),
   MYSQL_SYSVAR(io_capacity),
   MYSQL_SYSVAR(monitor_enable),

=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h	revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0buf.h	revid:inaam.rana@stripped
@@ -153,6 +153,8 @@ struct buf_pool_info_struct{
 	ulint	n_pages_created;	/*!< buf_pool->n_pages_created */
 	ulint	n_pages_written;	/*!< buf_pool->n_pages_written */
 	ulint	n_page_gets;		/*!< buf_pool->n_page_gets */
+	ulint	n_ra_pages_read_rnd;	/*!< buf_pool->n_ra_pages_read_rnd,
+					number of pages readahead */
 	ulint	n_ra_pages_read;	/*!< buf_pool->n_ra_pages_read, number
 					of pages readahead */
 	ulint	n_ra_pages_evicted;	/*!< buf_pool->n_ra_pages_evicted,
@@ -177,6 +179,8 @@ struct buf_pool_info_struct{
 					last printout */
 
 	/* Statistics about read ahead algorithm.  */
+	double	pages_readahead_rnd_rate;/*!< random readahead rate in pages per
+					second */
 	double	pages_readahead_rate;	/*!< readahead rate in pages per
 					second */
 	double	pages_evicted_rate;	/*!< rate of readahead page evicted
@@ -547,6 +551,18 @@ buf_block_get_freed_page_clock(
 	__attribute__((pure));
 
 /********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage);	/*!< in: block */
+/********************************************************************//**
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
@@ -1695,6 +1711,8 @@ struct buf_pool_stat_struct{
 	ulint	n_pages_written;/*!< number write operations */
 	ulint	n_pages_created;/*!< number of pages created
 				in the pool with no read */
+	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
+				as part of random read ahead */
 	ulint	n_ra_pages_read;/*!< number of pages read in
 				as part of read ahead */
 	ulint	n_ra_pages_evicted;/*!< number of read ahead
@@ -1841,7 +1859,7 @@ struct buf_pool_struct{
 	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
 					/*!< base node of the LRU list */
 	buf_page_t*	LRU_old;	/*!< pointer to the about
-					buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 					oldest blocks in the LRU list;
 					NULL if LRU length less than
 					BUF_LRU_OLD_MIN_LEN;

=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic	revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0buf.ic	revid:inaam.rana@stripped
@@ -135,6 +135,29 @@ buf_block_get_freed_page_clock(
 }
 
 /********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+{
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
+	/* FIXME: bpage->freed_page_clock is 31 bits */
+	return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+	       < ((ulint) bpage->freed_page_clock
+		  + (buf_pool->curr_size
+		     * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
+		     / (BUF_LRU_OLD_RATIO_DIV * 4))));
+}
+
+/********************************************************************//**
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
@@ -164,12 +187,7 @@ buf_page_peek_if_too_old(
 		buf_pool->stat.n_pages_not_made_young++;
 		return(FALSE);
 	} else {
-		/* FIXME: bpage->freed_page_clock is 31 bits */
-		return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
-		       > ((ulint) bpage->freed_page_clock
-			  + (buf_pool->curr_size
-			     * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
-			     / (BUF_LRU_OLD_RATIO_DIV * 4))));
+		return(!buf_page_peek_if_young(bpage));
 	}
 }
 #endif /* !UNIV_HOTBACKUP */

=== modified file 'storage/innobase/include/buf0lru.h'
--- a/storage/innobase/include/buf0lru.h	revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0lru.h	revid:inaam.rana@stripped
@@ -184,7 +184,7 @@ buf_LRU_make_block_old(
 /*===================*/
 	buf_page_t*	bpage);	/*!< in: control block */
 /**********************************************************************//**
-Updates buf_LRU_old_ratio.
+Updates buf_pool->LRU_old_ratio.
 @return	updated old_pct */
 UNIV_INTERN
 ulint
@@ -193,7 +193,7 @@ buf_LRU_old_ratio_update(
 	uint	old_pct,/*!< in: Reserve this percentage of
 			the buffer pool for "old" blocks. */
 	ibool	adjust);/*!< in: TRUE=adjust the LRU list;
-			FALSE=just assign buf_LRU_old_ratio
+			FALSE=just assign buf_pool->LRU_old_ratio
 			during the initialization of InnoDB */
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
@@ -222,18 +222,15 @@ buf_LRU_print(void);
 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 /** @name Heuristics for detecting index scan @{ */
-/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
-"old" blocks.  Protected by buf_pool->mutex. */
-extern uint	buf_LRU_old_ratio;
-/** The denominator of buf_LRU_old_ratio. */
+/** The denominator of buf_pool->LRU_old_ratio. */
 #define BUF_LRU_OLD_RATIO_DIV	1024
-/** Maximum value of buf_LRU_old_ratio.
+/** Maximum value of buf_pool->LRU_old_ratio.
 @see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update */
+@see buf_pool->LRU_old_ratio_update */
 #define BUF_LRU_OLD_RATIO_MAX	BUF_LRU_OLD_RATIO_DIV
-/** Minimum value of buf_LRU_old_ratio.
+/** Minimum value of buf_pool->LRU_old_ratio.
 @see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update
+@see buf_pool->LRU_old_ratio_update
 The minimum must exceed
 (BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
 #define BUF_LRU_OLD_RATIO_MIN	51

=== modified file 'storage/innobase/include/buf0rea.h'
--- a/storage/innobase/include/buf0rea.h	revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0rea.h	revid:inaam.rana@stripped
@@ -55,6 +55,31 @@ buf_read_page_async(
 	ulint	space,	/*!< in: space id */
 	ulint	offset);/*!< in: page number */
 /********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value!
+@return	number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_random(
+/*==================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes,
+				or 0 */
+	ulint	offset,		/*!< in: page number of a page which
+				the current thread wants to access */
+	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf
+				routine */
+/********************************************************************//**
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	revid:kevin.lewis@stripped
+++ b/storage/innobase/include/srv0srv.h	revid:inaam.rana@stripped
@@ -174,6 +174,7 @@ extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;
 
 extern ulint	srv_n_file_io_threads;
+extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
 extern ulint	srv_n_read_io_threads;
 extern ulint	srv_n_write_io_threads;
@@ -721,6 +722,7 @@ struct export_var_struct{
 	ulint innodb_buffer_pool_wait_free;	/*!< srv_buf_pool_wait_free */
 	ulint innodb_buffer_pool_pages_flushed;	/*!< srv_buf_pool_flushed */
 	ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+	ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
 	ulint innodb_buffer_pool_read_ahead;	/*!< srv_read_ahead */
 	ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
 	ulint innodb_dblwr_pages_written;	/*!< srv_dblwr_pages_written */

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	revid:kevin.lewis@stripped
+++ b/storage/innobase/srv/srv0srv.c	revid:inaam.rana@stripped
@@ -210,6 +210,8 @@ UNIV_INTERN ulint	srv_n_file_io_threads
 UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
 UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
 
+/* Switch to enable random read ahead. */
+UNIV_INTERN my_bool	srv_random_read_ahead	= FALSE;
 /* User settable value of the number of pages that must be present
 in the buffer cache and accessed sequentially for InnoDB to trigger a
 readahead request. */
@@ -1295,6 +1297,8 @@ srv_export_innodb_status(void)
 	export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
 	export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
 	export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+	export_vars.innodb_buffer_pool_read_ahead_rnd
+		= stat.n_ra_pages_read_rnd;
 	export_vars.innodb_buffer_pool_read_ahead
 		= stat.n_ra_pages_read;
 	export_vars.innodb_buffer_pool_read_ahead_evicted


Attachment: [text/bzr-bundle] bzr/inaam.rana@oracle.com-20110719152121-2813xt8s4w6n0kcb.bundle
Thread
bzr push into mysql-trunk branch (inaam.rana:3256 to 3257) Inaam Rana20 Jul