Below is the list of changes that have just been committed into a local
maria repository of guilhem. When guilhem does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-02-27 18:19:20+01:00, guilhem@stripped +5 -0
Fix for BUG#34089 "Maria crash on LOAD INDEX after FLUSH TABLES".
mysql-test/r/maria-preload.result@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +173 -0
result. The values of maria_pagecache_reads look too high, code
incrementing global_cache_read looks suspicious, I mailed
Sanja to ask him if it's normal.
mysql-test/r/maria-preload.result@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +0 -0
mysql-test/t/maria-preload.test@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +108 -0
Test of index preloading in Maria
mysql-test/t/maria-preload.test@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +0 -0
storage/maria/ha_maria.cc@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +0 -2
enable LOAD INDEX for Maria
storage/maria/ma_preload.c@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +49 -38
Making index preloading work for Maria.
storage/maria/maria_def.h@stripped, 2008-02-27 18:19:18+01:00, guilhem@stripped +1 -1
We don't need this #define anymore, ma_test_recovery.pl uses
zerofill to do comparisons of physical files.
diff -Nrup a/mysql-test/r/maria-preload.result b/mysql-test/r/maria-preload.result
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/r/maria-preload.result 2008-02-27 18:19:18 +01:00
@@ -0,0 +1,173 @@
+drop table if exists t1, t2;
+create table t1 (
+a int not null auto_increment,
+b char(16) not null,
+primary key (a),
+key (b)
+) engine=maria row_format=dynamic;
+create table t2(
+a int not null auto_increment,
+b char(16) not null,
+primary key (a),
+key (b)
+) engine=maria row_format=dynamic;
+insert into t1(b) values
+('test0'),
+('test1'),
+('test2'),
+('test3'),
+('test4'),
+('test5'),
+('test6'),
+('test7');
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+select count(*) from t1;
+count(*)
+33448
+select count(*) from t2;
+count(*)
+20672
+flush tables;
+flush status;
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211388
+Maria_pagecache_reads 115
+select count(*) from t1 where b = 'test1';
+count(*)
+4181
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211414
+Maria_pagecache_reads 122
+select count(*) from t1 where b = 'test1';
+count(*)
+4181
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211440
+Maria_pagecache_reads 122
+flush tables;
+flush status;
+select @@preload_buffer_size;
+@@preload_buffer_size
+32768
+load index into cache t1;
+Table Op Msg_type Msg_text
+test.t1 preload_keys status OK
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211511
+Maria_pagecache_reads 193
+select count(*) from t1 where b = 'test1';
+count(*)
+4181
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211537
+Maria_pagecache_reads 193
+flush tables;
+flush status;
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211537
+Maria_pagecache_reads 193
+set session preload_buffer_size=256*1024;
+select @@preload_buffer_size;
+@@preload_buffer_size
+262144
+load index into cache t1 ignore leaves;
+Table Op Msg_type Msg_text
+test.t1 preload_keys status OK
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211608
+Maria_pagecache_reads 264
+select count(*) from t1 where b = 'test1';
+count(*)
+4181
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211634
+Maria_pagecache_reads 270
+flush tables;
+flush status;
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211634
+Maria_pagecache_reads 270
+set session preload_buffer_size=1*1024;
+select @@preload_buffer_size;
+@@preload_buffer_size
+1024
+load index into cache t1, t2 key (primary,b) ignore leaves;
+Table Op Msg_type Msg_text
+test.t1 preload_keys status OK
+test.t2 preload_keys status OK
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211748
+Maria_pagecache_reads 384
+select count(*) from t1 where b = 'test1';
+count(*)
+4181
+select count(*) from t2 where b = 'test1';
+count(*)
+2584
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211788
+Maria_pagecache_reads 387
+flush tables;
+flush status;
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211788
+Maria_pagecache_reads 387
+load index into cache t3, t2 key (primary,b) ;
+Table Op Msg_type Msg_text
+test.t3 preload_keys Error Table 'test.t3' doesn't exist
+test.t3 preload_keys error Corrupt
+test.t2 preload_keys status OK
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211831
+Maria_pagecache_reads 430
+flush tables;
+flush status;
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211831
+Maria_pagecache_reads 430
+load index into cache t3 key (b), t2 key (c) ;
+Table Op Msg_type Msg_text
+test.t3 preload_keys Error Table 'test.t3' doesn't exist
+test.t3 preload_keys error Corrupt
+test.t2 preload_keys Error Key 'c' doesn't exist in table 't2'
+test.t2 preload_keys status Operation failed
+show status like "maria_pagecache_read%";
+Variable_name Value
+Maria_pagecache_read_requests 211831
+Maria_pagecache_reads 430
+drop table t1, t2;
+show status like "key_read%";
+Variable_name Value
+Key_read_requests 0
+Key_reads 0
diff -Nrup a/mysql-test/t/maria-preload.test b/mysql-test/t/maria-preload.test
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/t/maria-preload.test 2008-02-27 18:19:18 +01:00
@@ -0,0 +1,108 @@
+#
+# Testing of PRELOAD
+#
+
+-- source include/have_maria.inc
+
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+
+
+# we don't use block-format because we want page cache stats
+# about indices and not data pages.
+
+create table t1 (
+ a int not null auto_increment,
+ b char(16) not null,
+ primary key (a),
+ key (b)
+) engine=maria row_format=dynamic;
+
+create table t2(
+ a int not null auto_increment,
+ b char(16) not null,
+ primary key (a),
+ key (b)
+) engine=maria row_format=dynamic;
+
+insert into t1(b) values
+ ('test0'),
+ ('test1'),
+ ('test2'),
+ ('test3'),
+ ('test4'),
+ ('test5'),
+ ('test6'),
+ ('test7');
+
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+insert into t2(b) select b from t1;
+insert into t1(b) select b from t2;
+
+select count(*) from t1;
+select count(*) from t2;
+
+flush tables; flush status;
+show status like "maria_pagecache_read%";
+
+select count(*) from t1 where b = 'test1';
+show status like "maria_pagecache_read%";
+select count(*) from t1 where b = 'test1';
+show status like "maria_pagecache_read%";
+
+flush tables; flush status;
+select @@preload_buffer_size;
+load index into cache t1;
+show status like "maria_pagecache_read%";
+select count(*) from t1 where b = 'test1';
+show status like "maria_pagecache_read%";
+
+flush tables; flush status;
+show status like "maria_pagecache_read%";
+set session preload_buffer_size=256*1024;
+select @@preload_buffer_size;
+load index into cache t1 ignore leaves;
+show status like "maria_pagecache_read%";
+select count(*) from t1 where b = 'test1';
+show status like "maria_pagecache_read%";
+
+flush tables; flush status;
+show status like "maria_pagecache_read%";
+set session preload_buffer_size=1*1024;
+select @@preload_buffer_size;
+load index into cache t1, t2 key (primary,b) ignore leaves;
+show status like "maria_pagecache_read%";
+select count(*) from t1 where b = 'test1';
+select count(*) from t2 where b = 'test1';
+show status like "maria_pagecache_read%";
+
+flush tables; flush status;
+show status like "maria_pagecache_read%";
+load index into cache t3, t2 key (primary,b) ;
+show status like "maria_pagecache_read%";
+
+flush tables; flush status;
+show status like "maria_pagecache_read%";
+load index into cache t3 key (b), t2 key (c) ;
+show status like "maria_pagecache_read%";
+
+drop table t1, t2;
+
+# check that Maria didn't use key cache
+show status like "key_read%";
diff -Nrup a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
--- a/storage/maria/ha_maria.cc 2008-02-20 01:47:04 +01:00
+++ b/storage/maria/ha_maria.cc 2008-02-27 18:19:18 +01:00
@@ -1494,7 +1494,6 @@ int ha_maria::preload_keys(THD * thd, HA
maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
(void*) &thd->variables.preload_buff_size);
-#ifdef NOT_YET
int error;
if ((error= maria_preload(file, map, table_list->ignore_leaves)))
@@ -1525,7 +1524,6 @@ int ha_maria::preload_keys(THD * thd, HA
_ma_check_print_error(¶m, errmsg);
DBUG_RETURN(HA_ADMIN_FAILED);
}
-#endif
DBUG_RETURN(HA_ADMIN_OK);
}
diff -Nrup a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c
--- a/storage/maria/ma_preload.c 2008-01-10 20:21:31 +01:00
+++ b/storage/maria/ma_preload.c 2008-02-27 18:19:18 +01:00
@@ -36,70 +36,81 @@
At present pages for all indexes are preloaded.
In future only pages for indexes specified in the key_map parameter
of the table will be preloaded.
+ We don't yet use preload_buff_size (we read page after page).
*/
int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves)
{
- ulong length, block_length= 0;
+ ulong block_length= 0;
uchar *buff= NULL;
MARIA_SHARE* share= info->s;
- uint keys= share->state.header.keys;
+ uint keynr;
my_off_t key_file_length= share->state.state.key_file_length;
- my_off_t pos= share->base.keystart;
+ pgcache_page_no_t page_no, page_no_max;
+ PAGECACHE_BLOCK_LINK *page_link;
DBUG_ENTER("maria_preload");
- if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos)
+ if (!share->state.header.keys || !maria_is_any_key_active(key_map) ||
+ (key_file_length == share->base.keystart))
DBUG_RETURN(0);
block_length= share->pagecache->block_size;
- length= info->preload_buff_size/block_length * block_length;
- set_if_bigger(length, block_length);
- if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME))))
+ if (!(buff= (uchar *) my_malloc(block_length, MYF(MY_WME))))
DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE))
goto err;
- do
+ /*
+ Currently when we come here all other open instances of the table have
+ been closed, and we flushed all pages of our own instance, so there
+ cannot be any page of this table in the cache. Thus my_pread() would be
+ safe. But in the future, we will allow more concurrency during
+ preloading, so we use pagecache_read() instead of my_pread() because we
+ observed that on some Linux, concurrent pread() and pwrite() (which
+ could be from a page eviction by another thread) to the same page can
+ make pread() see an half-written page.
+ In this future, we should find a way to read state.key_file_length
+ reliably, handle concurrent shrinks (delete_all_rows()) etc.
+ */
+ for ((page_no= share->base.keystart / block_length),
+ (page_no_max= key_file_length / block_length);
+ page_no < page_no_max; page_no++)
{
- uchar *end;
- /* Read the next block of index file into the preload buffer */
- if ((my_off_t) length > (key_file_length-pos))
- length= (ulong) (key_file_length-pos);
- if (my_pread(share->kfile.file, (uchar*) buff, length, pos,
- MYF(MY_FAE|MY_FNABP)))
+ /**
+ @todo instead of reading pages one by one we could have a call
+ pagecache_read_several_pages() which does a single my_pread() for many
+ consecutive pages (like the my_pread() in mi_preload()).
+ */
+ if (pagecache_read(share->pagecache, &share->kfile, page_no,
+ DFLT_INIT_HITS, (uchar*) buff, share->page_type,
+ PAGECACHE_LOCK_WRITE, &page_link) == NULL)
goto err;
-
- for (end= buff + length ; buff < end ; buff+= block_length)
+ keynr= _ma_get_keynr(share, buff);
+ if (((ignore_leaves && !_ma_test_if_nod(share, buff)) ||
+ keynr == MARIA_DELETE_KEY_NR ||
+ !(key_map & ((ulonglong) 1 << keynr))) &&
+ (pagecache_pagelevel(page_link) == DFLT_INIT_HITS))
{
- uint keynr= _ma_get_keynr(share, buff);
- if ((ignore_leaves && !_ma_test_if_nod(share, buff)) ||
- keynr == MARIA_DELETE_KEY_NR ||
- !(key_map & ((ulonglong) 1 << keynr)))
- {
- DBUG_ASSERT(share->pagecache->block_size == block_length);
- if (pagecache_write(share->pagecache,
- &share->kfile,
- (pgcache_page_no_t) (pos / block_length),
- DFLT_INIT_HITS,
- (uchar*) buff,
- PAGECACHE_PLAIN_PAGE,
- PAGECACHE_LOCK_LEFT_UNLOCKED,
- PAGECACHE_PIN_LEFT_UNPINNED,
- PAGECACHE_WRITE_DONE, 0,
- LSN_IMPOSSIBLE))
- goto err;
- }
- pos+= block_length;
+ /*
+ This page is not interesting, and (last condition above) we are the
+ ones who put it in the cache, so nobody else is interested in it.
+ */
+ if (pagecache_delete_by_link(share->pagecache, page_link,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, FALSE))
+ goto err;
}
+ else /* otherwise it stays in cache: */
+ pagecache_unlock_by_link(share->pagecache, page_link,
+ PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE);
}
- while (pos != key_file_length);
- my_free((char*) buff, MYF(0));
+ my_free(buff, MYF(0));
DBUG_RETURN(0);
err:
- my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(buff, MYF(MY_ALLOW_ZERO_PTR));
DBUG_RETURN(my_errno= errno);
}
diff -Nrup a/storage/maria/maria_def.h b/storage/maria/maria_def.h
--- a/storage/maria/maria_def.h 2008-02-19 00:00:55 +01:00
+++ b/storage/maria/maria_def.h 2008-02-27 18:19:18 +01:00
@@ -29,7 +29,7 @@
#include "ma_control_file.h"
/* For testing recovery */
-#ifndef DBUG_OFF
+#ifdef TO_BE_REMOVED
#define IDENTICAL_PAGES_AFTER_RECOVERY 1
#endif
/* Do extra sanity checking */