3257 Inaam Rana 2011-07-19
A port of random readahead fix which went in trunk with following revid.
revno: 3278 [merge]
revision-id: inaam.rana@stripped
added:
mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result
mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test
modified:
storage/innobase/buf/buf0buf.c
storage/innobase/buf/buf0rea.c
storage/innobase/handler/ha_innodb.cc
storage/innobase/include/buf0buf.h
storage/innobase/include/buf0buf.ic
storage/innobase/include/buf0lru.h
storage/innobase/include/buf0rea.h
storage/innobase/include/srv0srv.h
storage/innobase/srv/srv0srv.c
3256 kevin.lewis@stripped 2011-07-18
WL#5756 - InnoDB: Support Page Sizes 4k and 8k
This patch allows one to change the page size of an entire InnoDB database
at bootstrap.
A new parameter is introduced to specify the InnoDB universal page size.
It must be put into the config or the command line when starting mysqld.
UNIV_PAGE_SIZE and UNIV_PAGE_SHIFT_SIZE are now mapped to integer variables
srv_page_size and srv_page_size_shift. Since they are no longer constants,
a number of code locations were changed to handle variables instead.
The extent size of a filespace remains 1Mb. So the number of pages needed
to fill that extent changes with page size.
Quite a few testcases are modified or added. Individual tests that depend
on the page size or assume a specific page size are moved to
suite/innodb/t/innodb_16k.test
and some are modified in innodb_8k.test and innodb_4k.test.
Some tests and whole test files are isolated to 16k pages only if the
purpose of the test can be fully accomplished with only 16k pages.
added:
mysql-test/include/have_innodb_16k.inc
mysql-test/include/have_innodb_4k.inc
mysql-test/include/have_innodb_8k.inc
mysql-test/suite/innodb/r/innodb_16k.result
mysql-test/suite/innodb/r/innodb_4k.result
mysql-test/suite/innodb/r/innodb_8k.result
mysql-test/suite/innodb/t/innodb_16k.test
mysql-test/suite/innodb/t/innodb_4k.test
mysql-test/suite/innodb/t/innodb_8k.test
mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result
mysql-test/suite/sys_vars/r/innodb_page_size_basic.result
mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test
mysql-test/suite/sys_vars/t/innodb_page_size_basic.test
modified:
mysql-test/suite/innodb/r/innodb-create-options.result*
mysql-test/suite/innodb/r/innodb-index.result*
mysql-test/suite/innodb/r/innodb-system-table-view.result*
mysql-test/suite/innodb/r/innodb-zip.result*
mysql-test/suite/innodb/r/innodb.result
mysql-test/suite/innodb/r/innodb_buffer_pool_load.result*
mysql-test/suite/innodb/r/innodb_bug53591.result*
mysql-test/suite/innodb/r/innodb_index_large_prefix.result*
mysql-test/suite/innodb/r/innodb_mysql.result*
mysql-test/suite/innodb/t/innodb-create-options.test*
mysql-test/suite/innodb/t/innodb-index.test*
mysql-test/suite/innodb/t/innodb-system-table-view.test*
mysql-test/suite/innodb/t/innodb-zip.test*
mysql-test/suite/innodb/t/innodb.test*
mysql-test/suite/innodb/t/innodb_buffer_pool_load.test*
mysql-test/suite/innodb/t/innodb_bug36172.test*
mysql-test/suite/innodb/t/innodb_bug53591.test*
mysql-test/suite/innodb/t/innodb_bug60049.test*
mysql-test/suite/innodb/t/innodb_index_large_prefix.test*
mysql-test/suite/innodb/t/innodb_mysql.test*
mysql-test/suite/innodb/t/innodb_prefix_index_liftedlimit.test*
mysql-test/suite/innodb/t/innodb_prefix_index_restart_server.test*
mysql-test/suite/innodb/t/innodb_trx_weight.test*
mysql-test/suite/sys_vars/r/all_vars.result
storage/innobase/buf/buf0buddy.c
storage/innobase/fil/fil0fil.c
storage/innobase/fsp/fsp0fsp.c
storage/innobase/handler/ha_innodb.cc
storage/innobase/handler/i_s.cc
storage/innobase/include/buf0buf.h
storage/innobase/include/buf0types.h
storage/innobase/include/dict0mem.h
storage/innobase/include/fsp0types.h
storage/innobase/include/sync0rw.ic
storage/innobase/include/trx0sys.h
storage/innobase/include/univ.i
storage/innobase/row/row0merge.c
storage/innobase/srv/srv0srv.c
storage/innobase/srv/srv0start.c
storage/innobase/trx/trx0sys.c
=== added file 'mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_random_read_ahead_basic.result revid:inaam.rana@stripped
@@ -0,0 +1,92 @@
+SET @start_global_value = @@global.innodb_random_read_ahead;
+SELECT @start_global_value;
+@start_global_value
+0
+Valid values are 'ON' and 'OFF'
+select @@global.innodb_random_read_ahead in (0, 1);
+@@global.innodb_random_read_ahead in (0, 1)
+1
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select @@session.innodb_random_read_ahead;
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable
+show global variables like 'innodb_random_read_ahead';
+Variable_name Value
+innodb_random_read_ahead OFF
+show session variables like 'innodb_random_read_ahead';
+Variable_name Value
+innodb_random_read_ahead OFF
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+set global innodb_random_read_ahead='ON';
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+set @@global.innodb_random_read_ahead=0;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+set global innodb_random_read_ahead=1;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+set @@global.innodb_random_read_ahead='OFF';
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD OFF
+set session innodb_random_read_ahead='OFF';
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL
+set @@session.innodb_random_read_ahead='ON';
+ERROR HY000: Variable 'innodb_random_read_ahead' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_random_read_ahead=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead'
+set global innodb_random_read_ahead=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_random_read_ahead'
+set global innodb_random_read_ahead=2;
+ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of '2'
+NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643)
+set global innodb_random_read_ahead=-3;
+select @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+1
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+VARIABLE_NAME VARIABLE_VALUE
+INNODB_RANDOM_READ_AHEAD ON
+set global innodb_random_read_ahead='AUTO';
+ERROR 42000: Variable 'innodb_random_read_ahead' can't be set to the value of 'AUTO'
+SET @@global.innodb_random_read_ahead = @start_global_value;
+SELECT @@global.innodb_random_read_ahead;
+@@global.innodb_random_read_ahead
+0
=== added file 'mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test'
--- a/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test revid:inaam.rana@stripped
@@ -0,0 +1,70 @@
+
+
+# 2010-01-25 - Added
+#
+
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_random_read_ahead;
+SELECT @start_global_value;
+
+#
+# exists as global only
+#
+--echo Valid values are 'ON' and 'OFF'
+select @@global.innodb_random_read_ahead in (0, 1);
+select @@global.innodb_random_read_ahead;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_random_read_ahead;
+show global variables like 'innodb_random_read_ahead';
+show session variables like 'innodb_random_read_ahead';
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+
+#
+# show that it's writable
+#
+set global innodb_random_read_ahead='ON';
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set @@global.innodb_random_read_ahead=0;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set global innodb_random_read_ahead=1;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+set @@global.innodb_random_read_ahead='OFF';
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--error ER_GLOBAL_VARIABLE
+set session innodb_random_read_ahead='OFF';
+--error ER_GLOBAL_VARIABLE
+set @@session.innodb_random_read_ahead='ON';
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_random_read_ahead=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_random_read_ahead=1e1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_random_read_ahead=2;
+--echo NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643)
+set global innodb_random_read_ahead=-3;
+select @@global.innodb_random_read_ahead;
+select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
+select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_random_read_ahead='AUTO';
+
+#
+# Cleanup
+#
+
+SET @@global.innodb_random_read_ahead = @start_global_value;
+SELECT @@global.innodb_random_read_ahead;
=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c revid:kevin.lewis@stripped
+++ b/storage/innobase/buf/buf0buf.c revid:inaam.rana@stripped
@@ -407,6 +407,7 @@ buf_get_total_stat(
tot_stat->n_pages_read += buf_stat->n_pages_read;
tot_stat->n_pages_written += buf_stat->n_pages_written;
tot_stat->n_pages_created += buf_stat->n_pages_created;
+ tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
@@ -2505,6 +2506,9 @@ loop:
}
if (buf_read_page(space, zip_size, offset)) {
+ buf_read_ahead_random(space, zip_size, offset,
+ ibuf_inside(mtr));
+
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
@@ -4717,6 +4721,7 @@ buf_stats_aggregate_pool_info(
total_info->n_pages_created += pool_info->n_pages_created;
total_info->n_pages_written += pool_info->n_pages_written;
total_info->n_page_gets += pool_info->n_page_gets;
+ total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
total_info->page_made_young_rate += pool_info->page_made_young_rate;
@@ -4729,6 +4734,7 @@ buf_stats_aggregate_pool_info(
total_info->page_read_delta += pool_info->page_read_delta;
total_info->young_making_delta += pool_info->young_making_delta;
total_info->not_young_making_delta += pool_info->not_young_making_delta;
+ total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
total_info->unzip_lru_len += pool_info->unzip_lru_len;
@@ -4803,6 +4809,7 @@ buf_stats_get_pool_info(
pool_info->n_page_gets = buf_pool->stat.n_page_gets;
+ pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
@@ -4842,6 +4849,10 @@ buf_stats_get_pool_info(
buf_pool->stat.n_pages_not_made_young
- buf_pool->old_stat.n_pages_not_made_young;
}
+ pool_info->pages_readahead_rnd_rate =
+ (buf_pool->stat.n_ra_pages_read_rnd
+ - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
+
pool_info->pages_readahead_rate =
(buf_pool->stat.n_ra_pages_read
@@ -4926,9 +4937,12 @@ buf_print_io_instance(
/* Statistics about read ahead algorithm */
fprintf(file, "Pages read ahead %.2f/s,"
- " evicted without access %.2f/s\n",
+ " evicted without access %.2f/s,"
+ " Random read ahead %.2f/s\n",
+
pool_info->pages_readahead_rate,
- pool_info->pages_evicted_rate);
+ pool_info->pages_evicted_rate,
+ pool_info->pages_readahead_rnd_rate);
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
=== modified file 'storage/innobase/buf/buf0rea.c'
--- a/storage/innobase/buf/buf0rea.c revid:kevin.lewis@stripped
+++ b/storage/innobase/buf/buf0rea.c revid:inaam.rana@stripped
@@ -40,8 +40,10 @@ Created 11/5/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
-/** The linear read-ahead area size */
-#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
+/** There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \
+ (5 + BUF_READ_AHEAD_AREA(b) / 8)
/** If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
@@ -175,6 +177,171 @@ buf_read_page_low(
}
/********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value!
+@return number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_random(
+/*==================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes,
+ or 0 */
+ ulint offset, /*!< in: page number of a page which
+ the current thread wants to access */
+ ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf
+ routine */
+{
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ib_int64_t tablespace_version;
+ ulint recent_blocks = 0;
+ ulint ibuf_mode;
+ ulint count;
+ ulint low, high;
+ ulint err;
+ ulint i;
+ const ulint buf_read_ahead_random_area
+ = BUF_READ_AHEAD_AREA(buf_pool);
+
+ if (!srv_random_read_ahead) {
+ /* Disabled by user */
+ return(0);
+ }
+
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+ }
+
+ if (ibuf_bitmap_page(zip_size, offset)
+ || trx_sys_hdr_page(space, offset)) {
+
+ /* If it is an ibuf bitmap page or trx sys hdr, we do
+ no read-ahead, as that could break the ibuf page access
+ order */
+
+ return(0);
+ }
+
+ /* Remember the tablespace version before we ask te tablespace size
+ below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+ do not try to read outside the bounds of the tablespace! */
+
+ tablespace_version = fil_space_get_version(space);
+
+ low = (offset / buf_read_ahead_random_area)
+ * buf_read_ahead_random_area;
+ high = (offset / buf_read_ahead_random_area + 1)
+ * buf_read_ahead_random_area;
+ if (high > fil_space_get_size(space)) {
+
+ high = fil_space_get_size(space);
+ }
+
+ buf_pool_mutex_enter(buf_pool);
+
+ if (buf_pool->n_pend_reads
+ > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+ buf_pool_mutex_exit(buf_pool);
+
+ return(0);
+ }
+
+ /* Count how many blocks in the area have been recently accessed,
+ that is, reside near the start of the LRU list. */
+
+ for (i = low; i < high; i++) {
+ const buf_page_t* bpage =
+ buf_page_hash_get(buf_pool, space, i);
+
+ if (bpage
+ && buf_page_is_accessed(bpage)
+ && buf_page_peek_if_young(bpage)) {
+
+ recent_blocks++;
+
+ if (recent_blocks
+ >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
+
+ buf_pool_mutex_exit(buf_pool);
+ goto read_ahead;
+ }
+ }
+ }
+
+ buf_pool_mutex_exit(buf_pool);
+ /* Do nothing */
+ return(0);
+
+read_ahead:
+ /* Read all the suitable blocks within the area */
+
+ if (inside_ibuf) {
+ ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+ } else {
+ ibuf_mode = BUF_READ_ANY_PAGE;
+ }
+
+ count = 0;
+
+ for (i = low; i < high; i++) {
+ /* It is only sensible to do read-ahead in the non-sync aio
+ mode: hence FALSE as the first parameter */
+
+ if (!ibuf_bitmap_page(zip_size, i)) {
+ count += buf_read_page_low(
+ &err, FALSE,
+ ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+ space, zip_size, FALSE,
+ tablespace_version, i);
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Warning: in random"
+ " readahead trying to access\n"
+ "InnoDB: tablespace %lu page %lu,\n"
+ "InnoDB: but the tablespace does not"
+ " exist or is just being dropped.\n",
+ (ulong) space, (ulong) i);
+ }
+ }
+ }
+
+ /* In simulated aio we wake the aio handler threads only after
+ queuing all aio requests, in native aio the following call does
+ nothing: */
+
+ os_aio_simulated_wake_handler_threads();
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints && (count > 0)) {
+ fprintf(stderr,
+ "Random read-ahead space %lu offset %lu pages %lu\n",
+ (ulong) space, (ulong) offset,
+ (ulong) count);
+ }
+#endif /* UNIV_DEBUG */
+
+ /* Read ahead is considered one I/O operation for the purpose of
+ LRU policy decision. */
+ buf_LRU_stat_inc_io();
+
+ buf_pool->stat.n_ra_pages_read_rnd += count;
+ srv_buf_pool_reads += count;
+ return(count);
+}
+
+/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
@@ -317,7 +484,7 @@ buf_read_ahead_linear(
ulint err;
ulint i;
const ulint buf_read_ahead_linear_area
- = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
+ = BUF_READ_AHEAD_AREA(buf_pool);
ulint threshold;
/* check if readahead is disabled */
=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc revid:kevin.lewis@stripped
+++ b/storage/innobase/handler/ha_innodb.cc revid:inaam.rana@stripped
@@ -410,6 +410,8 @@ static SHOW_VAR innodb_status_variables[
(char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
{"buffer_pool_pages_total",
(char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
+ {"buffer_pool_read_ahead_rnd",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
{"buffer_pool_read_ahead",
(char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
{"buffer_pool_read_ahead_evicted",
@@ -12591,6 +12593,11 @@ static MYSQL_SYSVAR_UINT(change_bufferin
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
+ PLUGIN_VAR_NOCMDARG,
+ "Whether to use read ahead for random access within an extent.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
PLUGIN_VAR_RQCMDARG,
"Number of pages that must be accessed sequentially for InnoDB to "
@@ -12699,6 +12706,7 @@ static struct st_mysql_sys_var* innobase
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+ MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(io_capacity),
MYSQL_SYSVAR(monitor_enable),
=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0buf.h revid:inaam.rana@stripped
@@ -153,6 +153,8 @@ struct buf_pool_info_struct{
ulint n_pages_created; /*!< buf_pool->n_pages_created */
ulint n_pages_written; /*!< buf_pool->n_pages_written */
ulint n_page_gets; /*!< buf_pool->n_page_gets */
+ ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd,
+ number of pages readahead */
ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number
of pages readahead */
ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted,
@@ -177,6 +179,8 @@ struct buf_pool_info_struct{
last printout */
/* Statistics about read ahead algorithm. */
+ double pages_readahead_rnd_rate;/*!< random readahead rate in pages per
+ second */
double pages_readahead_rate; /*!< readahead rate in pages per
second */
double pages_evicted_rate; /*!< rate of readahead page evicted
@@ -547,6 +551,18 @@ buf_block_get_freed_page_clock(
__attribute__((pure));
/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+ const buf_page_t* bpage); /*!< in: block */
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
@@ -1695,6 +1711,8 @@ struct buf_pool_stat_struct{
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
in the pool with no read */
+ ulint n_ra_pages_read_rnd;/*!< number of pages read in
+ as part of random read ahead */
ulint n_ra_pages_read;/*!< number of pages read in
as part of read ahead */
ulint n_ra_pages_evicted;/*!< number of read ahead
@@ -1841,7 +1859,7 @@ struct buf_pool_struct{
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
/*!< base node of the LRU list */
buf_page_t* LRU_old; /*!< pointer to the about
- buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+ LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
oldest blocks in the LRU list;
NULL if LRU length less than
BUF_LRU_OLD_MIN_LEN;
=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0buf.ic revid:inaam.rana@stripped
@@ -135,6 +135,29 @@ buf_block_get_freed_page_clock(
}
/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+ const buf_page_t* bpage) /*!< in: block */
+{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ /* FIXME: bpage->freed_page_clock is 31 bits */
+ return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+ < ((ulint) bpage->freed_page_clock
+ + (buf_pool->curr_size
+ * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
+ / (BUF_LRU_OLD_RATIO_DIV * 4))));
+}
+
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex.
@@ -164,12 +187,7 @@ buf_page_peek_if_too_old(
buf_pool->stat.n_pages_not_made_young++;
return(FALSE);
} else {
- /* FIXME: bpage->freed_page_clock is 31 bits */
- return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
- > ((ulint) bpage->freed_page_clock
- + (buf_pool->curr_size
- * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
- / (BUF_LRU_OLD_RATIO_DIV * 4))));
+ return(!buf_page_peek_if_young(bpage));
}
}
#endif /* !UNIV_HOTBACKUP */
=== modified file 'storage/innobase/include/buf0lru.h'
--- a/storage/innobase/include/buf0lru.h revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0lru.h revid:inaam.rana@stripped
@@ -184,7 +184,7 @@ buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /*!< in: control block */
/**********************************************************************//**
-Updates buf_LRU_old_ratio.
+Updates buf_pool->LRU_old_ratio.
@return updated old_pct */
UNIV_INTERN
ulint
@@ -193,7 +193,7 @@ buf_LRU_old_ratio_update(
uint old_pct,/*!< in: Reserve this percentage of
the buffer pool for "old" blocks. */
ibool adjust);/*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_LRU_old_ratio
+ FALSE=just assign buf_pool->LRU_old_ratio
during the initialization of InnoDB */
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
@@ -222,18 +222,15 @@ buf_LRU_print(void);
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/** @name Heuristics for detecting index scan @{ */
-/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
-"old" blocks. Protected by buf_pool->mutex. */
-extern uint buf_LRU_old_ratio;
-/** The denominator of buf_LRU_old_ratio. */
+/** The denominator of buf_pool->LRU_old_ratio. */
#define BUF_LRU_OLD_RATIO_DIV 1024
-/** Maximum value of buf_LRU_old_ratio.
+/** Maximum value of buf_pool->LRU_old_ratio.
@see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update */
+@see buf_pool->LRU_old_ratio_update */
#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
-/** Minimum value of buf_LRU_old_ratio.
+/** Minimum value of buf_pool->LRU_old_ratio.
@see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update
+@see buf_pool->LRU_old_ratio_update
The minimum must exceed
(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
#define BUF_LRU_OLD_RATIO_MIN 51
=== modified file 'storage/innobase/include/buf0rea.h'
--- a/storage/innobase/include/buf0rea.h revid:kevin.lewis@stripped
+++ b/storage/innobase/include/buf0rea.h revid:inaam.rana@stripped
@@ -55,6 +55,31 @@ buf_read_page_async(
ulint space, /*!< in: space id */
ulint offset);/*!< in: page number */
/********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value!
+@return number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_random(
+/*==================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes,
+ or 0 */
+ ulint offset, /*!< in: page number of a page which
+ the current thread wants to access */
+ ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf
+ routine */
+/********************************************************************//**
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h revid:kevin.lewis@stripped
+++ b/storage/innobase/include/srv0srv.h revid:inaam.rana@stripped
@@ -174,6 +174,7 @@ extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
+extern my_bool srv_random_read_ahead;
extern ulong srv_read_ahead_threshold;
extern ulint srv_n_read_io_threads;
extern ulint srv_n_write_io_threads;
@@ -721,6 +722,7 @@ struct export_var_struct{
ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+ ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c revid:kevin.lewis@stripped
+++ b/storage/innobase/srv/srv0srv.c revid:inaam.rana@stripped
@@ -210,6 +210,8 @@ UNIV_INTERN ulint srv_n_file_io_threads
UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
+/* Switch to enable random read ahead. */
+UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
/* User settable value of the number of pages that must be present
in the buffer cache and accessed sequentially for InnoDB to trigger a
readahead request. */
@@ -1295,6 +1297,8 @@ srv_export_innodb_status(void)
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+ export_vars.innodb_buffer_pool_read_ahead_rnd
+ = stat.n_ra_pages_read_rnd;
export_vars.innodb_buffer_pool_read_ahead
= stat.n_ra_pages_read;
export_vars.innodb_buffer_pool_read_ahead_evicted
Attachment: [text/bzr-bundle] bzr/inaam.rana@oracle.com-20110719152121-2813xt8s4w6n0kcb.bundle
| Thread |
|---|
| • bzr push into mysql-trunk branch (inaam.rana:3256 to 3257) | Inaam Rana | 20 Jul |