#At file:///home/mikael/mysql_clones/mixed_buf_page_hash/
2847 Mikael Ronstrom 2009-05-19
Split buffer page hash mutex from buffer pool mutex, mix of ideas from Sun, Google and Percona
modified:
storage/innobase/buf/buf0buf.c
storage/innobase/buf/buf0flu.c
storage/innobase/buf/buf0lru.c
storage/innobase/buf/buf0rea.c
storage/innobase/include/buf0buf.h
storage/innobase/include/buf0buf.ic
storage/innobase/include/sync0sync.h
storage/innobase/sync/sync0sync.c
=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c 2008-10-15 18:54:18 +0000
+++ b/storage/innobase/buf/buf0buf.c 2009-05-19 11:15:22 +0000
@@ -81,8 +81,8 @@ maybe every 10 microseconds. We gave up
for each control block, for instance, because it seemed to be
complicated.
-A solution to reduce mutex contention of the buf_pool mutex is to
-create a separate mutex for the page hash table. On Pentium,
+To reduce mutex contention of the buf_pool mutex we have created
+a separate array of mutexes for the page hash table. On Pentium,
accessing the hash table takes 2 microseconds, about half
of the total buf_pool mutex hold time.
@@ -595,6 +595,7 @@ buf_pool_init(
/* 1. Initialize general fields
---------------------------- */
mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
+ buf_page_hash_create_locks();
mutex_enter(&(buf_pool->mutex));
@@ -704,8 +705,7 @@ buf_pool_init(
}
}
- buf_pool->page_hash = hash_create(2 * max_size);
-
+ buf_page_hash_create(max_size);
buf_pool->n_pend_reads = 0;
buf_pool->last_printout_time = time(NULL);
@@ -998,7 +998,7 @@ buf_page_peek_block(
mutex_enter_fast(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
mutex_exit(&(buf_pool->mutex));
@@ -1019,7 +1019,7 @@ buf_reset_check_index_page_at_flush(
mutex_enter_fast(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (block) {
block->check_index_page_at_flush = FALSE;
@@ -1046,7 +1046,7 @@ buf_page_peek_if_search_hashed(
mutex_enter_fast(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (!block) {
is_hashed = FALSE;
@@ -1098,7 +1098,7 @@ buf_page_set_file_page_was_freed(
mutex_enter_fast(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (block) {
block->file_page_was_freed = TRUE;
@@ -1127,7 +1127,7 @@ buf_page_reset_file_page_was_freed(
mutex_enter_fast(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (block) {
block->file_page_was_freed = FALSE;
@@ -1174,27 +1174,42 @@ buf_page_get_gen(
buf_pool->n_page_gets++;
loop:
block = NULL;
- mutex_enter_fast(&(buf_pool->mutex));
+ /* We have removed buf_pool->mutex here. I have verified it is safe
+ to access the following block members below with only block->mutex:
+ offset, space, state, io_fix, buf_fix_count. Other functions call
+ buf_block_align without protection, so that should be fine too. */
if (guess) {
block = buf_block_align(guess);
+ mutex_enter(&block->mutex);
if ((offset != block->offset) || (space != block->space)
|| (block->state != BUF_BLOCK_FILE_PAGE)) {
+ mutex_exit(&block->mutex);
block = NULL;
}
}
if (block == NULL) {
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
+ if(block) {
+ mutex_enter(&block->mutex);
+ /* Verify block contains the data we want. It may have
+ changed before acquiring block->mutex, because we don't
+ lock buf_pool->mutex before buf_page_hash_get. */
+ if (UNIV_UNLIKELY((offset != block->offset) ||
+ (space != block->space) ||
+ (block->state != BUF_BLOCK_FILE_PAGE))) {
+ mutex_exit(&block->mutex);
+ block = NULL;
+ }
+ }
}
if (block == NULL) {
/* Page not in buf_pool: needs to be read from file */
- mutex_exit(&(buf_pool->mutex));
-
if (mode == BUF_GET_IF_IN_POOL) {
return(NULL);
@@ -1212,7 +1227,7 @@ loop:
goto loop;
}
- mutex_enter(&block->mutex);
+ /* Now we know block is not null, and we hold block->mutex */
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
@@ -1224,7 +1239,6 @@ loop:
if (mode == BUF_GET_IF_IN_POOL) {
/* The page is only being read to buffer */
- mutex_exit(&buf_pool->mutex);
mutex_exit(&block->mutex);
return(NULL);
@@ -1237,11 +1251,18 @@ loop:
if (block->frame == NULL) {
ut_a(srv_use_awe);
+ /* TODO: Let buf_awe_map_page_to_frame do its own locking,
+ but this requires an overhaul to buf_flush_try_page */
+ mutex_exit(&(block->mutex));
+ mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&(block->mutex));
+
/* We set second parameter TRUE because the block is in the
LRU list and we must put it to awe_LRU_free_mapped list once
mapped to a frame */
buf_awe_map_page_to_frame(block, TRUE);
+ mutex_exit(&(buf_pool->mutex));
}
#ifdef UNIV_SYNC_DEBUG
@@ -1249,7 +1270,6 @@ loop:
#else
buf_block_buf_fix_inc(block);
#endif
- mutex_exit(&buf_pool->mutex);
/* Check if this is the first access to the page */
@@ -1630,6 +1650,8 @@ buf_page_init(
in units of a page */
buf_block_t* block) /* in: block to init */
{
+ ulint fold;
+ ulint latch_number;
ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(&(block->mutex)));
@@ -1658,7 +1680,7 @@ buf_page_init(
/* Insert into the hash table of file pages */
- if (buf_page_hash_get(space, offset)) {
+ if (buf_page_hash_get(space, offset, FALSE)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu already found"
" in the hash table\n",
@@ -1673,8 +1695,12 @@ buf_page_init(
ut_a(0);
}
- HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), block);
+ fold = buf_page_address_fold(space, offset);
+ latch_number = buf_page_hash_table(space, offset);
+ rw_lock_x_lock(&(buf_pool->hash_latches[latch_number]));
+ HASH_INSERT(buf_block_t, hash, buf_pool->page_hash[latch_number],
+ fold, block);
+ rw_lock_x_unlock(&(buf_pool->hash_latches[latch_number]));
block->freed_page_clock = 0;
@@ -1756,7 +1782,7 @@ buf_page_init_for_read(
}
if (*err == DB_TABLESPACE_DELETED
- || NULL != buf_page_hash_get(space, offset)) {
+ || NULL != buf_page_hash_get(space, offset, FALSE)) {
/* The page belongs to a space which has been
deleted or is being deleted, or the page is
@@ -1832,7 +1858,7 @@ buf_page_create(
mutex_enter(&(buf_pool->mutex));
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (block != NULL) {
#ifdef UNIV_IBUF_DEBUG
@@ -2151,12 +2177,11 @@ buf_validate(void)
block = buf_pool_get_nth_block(buf_pool, i);
- mutex_enter(&block->mutex);
-
if (block->state == BUF_BLOCK_FILE_PAGE) {
ut_a(buf_page_hash_get(block->space,
- block->offset) == block);
+ block->offset) == block,
+ FALSE);
n_page++;
#ifdef UNIV_IBUF_DEBUG
@@ -2197,8 +2222,6 @@ buf_validate(void)
} else if (block->state == BUF_BLOCK_NOT_USED) {
n_free++;
}
-
- mutex_exit(&block->mutex);
}
if (n_lru + n_free > buf_pool->curr_size) {
@@ -2385,16 +2408,13 @@ buf_get_modified_ratio_pct(void)
{
ulint ratio;
- mutex_enter(&(buf_pool->mutex));
-
+ /* Unprotected reads of buf_pool variables should be okay here. */
ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+ UT_LIST_GET_LEN(buf_pool->free));
/* 1 + is there to avoid division by zero */
- mutex_exit(&(buf_pool->mutex));
-
return(ratio);
}
@@ -2426,6 +2446,7 @@ buf_print_io(
(ulong)
UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
}
+ if (file) {
fprintf(file,
"Buffer pool size %lu\n"
"Free buffers %lu\n"
@@ -2443,12 +2464,13 @@ buf_print_io(
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
+ buf_pool->init_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-
+ } // if (file)
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
buf_pool->last_printout_time);
buf_pool->last_printout_time = current_time;
+ if (file) {
fprintf(file,
"Pages read %lu, created %lu, written %lu\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
@@ -2461,6 +2483,7 @@ buf_print_io(
/ time_elapsed,
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
/ time_elapsed);
+ } // if (file)
if (srv_use_awe) {
fprintf(file, "AWE: %.2f page remaps/s\n",
@@ -2470,15 +2493,18 @@ buf_print_io(
}
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
- fprintf(file, "Buffer pool hit rate %lu / 1000\n",
- (ulong)
- (1000 - ((1000 * (buf_pool->n_pages_read
- - buf_pool->n_pages_read_old))
- / (buf_pool->n_page_gets
- - buf_pool->n_page_gets_old))));
+ ulong buf_pool_hit_per_k = (ulong) (1000 - ((1000 *
+ (buf_pool->n_pages_read - buf_pool->n_pages_read_old))
+ / (buf_pool->n_page_gets - buf_pool->n_page_gets_old)));
+ if (file) {
+ fprintf(file, "Buffer pool hit rate %lu / 1000\n",
+ buf_pool_hit_per_k);
+ } // if (file)
} else {
- fputs("No buffer pool page gets since the last printout\n",
- file);
+ if (file) {
+ fputs("No buffer pool page gets since the last printout\n",
+ file);
+ } // if (file)
}
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
=== modified file 'storage/innobase/buf/buf0flu.c'
--- a/storage/innobase/buf/buf0flu.c 2008-02-01 10:55:39 +0000
+++ b/storage/innobase/buf/buf0flu.c 2009-05-19 11:15:22 +0000
@@ -546,22 +546,30 @@ buf_flush_try_page(
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
- mutex_enter(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
+ block = buf_page_hash_get(space, offset, FALSE);
if (!block) {
- mutex_exit(&(buf_pool->mutex));
return(0);
}
+ mutex_enter(&(buf_pool->mutex));
+
mutex_enter(&block->mutex);
- if (flush_type == BUF_FLUSH_LIST
+ /* Verify block contains the data we want. It may have
+ changed before acquiring block->mutex, because we don't
+ lock buf_pool->mutex before buf_page_hash_get. */
+ if (UNIV_UNLIKELY((offset != block->offset) ||
+ (space != block->space) ||
+ (block->state != BUF_BLOCK_FILE_PAGE))) {
+ /* Block changed before we acquired block->mutex. Do not
+ try to flush. */
+
+ } else if (flush_type == BUF_FLUSH_LIST
&& buf_flush_ready_for_flush(block, flush_type)) {
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
@@ -630,6 +638,8 @@ buf_flush_try_page(
the page not to be bufferfixed (in function
..._ready_for_flush). */
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
@@ -670,6 +680,8 @@ buf_flush_try_page(
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE
&& buf_flush_ready_for_flush(block, flush_type)) {
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
block->io_fix = BUF_IO_WRITE;
/* If AWE is enabled and the page is not mapped to a frame,
@@ -759,7 +771,7 @@ buf_flush_try_neighbors(
for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
+ block = buf_page_hash_get(space, i, FALSE);
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
if (!block) {
=== modified file 'storage/innobase/buf/buf0lru.c'
--- a/storage/innobase/buf/buf0lru.c 2008-12-14 20:47:17 +0000
+++ b/storage/innobase/buf/buf0lru.c 2009-05-19 11:15:22 +0000
@@ -1029,6 +1029,9 @@ buf_LRU_block_remove_hashed_page(
be in a state where it can be freed; there
may or may not be a hash index to the page */
{
+ ulint fold;
+ ulint latch_number;
+
ut_ad(mutex_own(&(buf_pool->mutex)));
ut_ad(mutex_own(&block->mutex));
ut_ad(block);
@@ -1046,22 +1049,22 @@ buf_LRU_block_remove_hashed_page(
buf_block_modify_clock_inc(block);
- if (block != buf_page_hash_get(block->space, block->offset)) {
+ if (block != buf_page_hash_get(block->space, block->offset, FALSE)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu not found"
" in the hash table\n",
(ulong) block->space,
(ulong) block->offset);
- if (buf_page_hash_get(block->space, block->offset)) {
+ if (buf_page_hash_get(block->space, block->offset, FALSE)) {
fprintf(stderr,
"InnoDB: In hash table we find block"
" %p of %lu %lu which is not %p\n",
(void*) buf_page_hash_get
- (block->space, block->offset),
+ (block->space, block->offset, FALSE),
(ulong) buf_page_hash_get
- (block->space, block->offset)->space,
+ (block->space, block->offset, FALSE)->space,
(ulong) buf_page_hash_get
- (block->space, block->offset)->offset,
+ (block->space, block->offset, FALSE)->offset,
(void*) block);
}
@@ -1073,10 +1076,12 @@ buf_LRU_block_remove_hashed_page(
#endif
ut_a(0);
}
-
- HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(block->space, block->offset),
- block);
+ fold = buf_page_address_fold(block->space, block->offset);
+ latch_number = buf_page_hash_table(block->space, block->offset);
+ rw_lock_x_lock(&(buf_pool->hash_latches[latch_number]));
+ HASH_DELETE(buf_block_t, hash, buf_pool->page_hash[latch_number],
+ fold, block);
+ rw_lock_x_unlock(&(buf_pool->hash_latches[latch_number]));
UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
block->state = BUF_BLOCK_REMOVE_HASH;
@@ -1197,6 +1202,7 @@ buf_LRU_print(void)
while (block != NULL) {
+ mutex_enter(&block->mutex);
fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
if (block->old) {
@@ -1225,6 +1231,8 @@ buf_LRU_print(void)
(ulong) ut_dulint_get_low
(btr_page_get_index_id(frame)));
+ mutex_exit(&block->mutex);
+
block = UT_LIST_GET_NEXT(LRU, block);
if (++len == 10) {
len = 0;
=== modified file 'storage/innobase/buf/buf0rea.c'
--- a/storage/innobase/buf/buf0rea.c 2006-09-21 07:39:09 +0000
+++ b/storage/innobase/buf/buf0rea.c 2009-05-19 11:15:22 +0000
@@ -175,6 +175,7 @@ buf_read_ahead_random(
ulint low, high;
ulint err;
ulint i;
+ ulint latch_number;
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
@@ -223,8 +224,10 @@ buf_read_ahead_random(
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
+ latch_number = buf_page_hash_table(space, low);
+ rw_lock_s_lock(&(buf_pool->hash_latches[latch_number]));
for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
+ block = buf_page_hash_get(space, i, TRUE);
if ((block)
&& (block->LRU_position > LRU_recent_limit)
@@ -233,6 +236,7 @@ buf_read_ahead_random(
recent_blocks++;
}
}
+ rw_lock_s_unlock(&(buf_pool->hash_latches[latch_number]));
mutex_exit(&(buf_pool->mutex));
@@ -385,6 +389,7 @@ buf_read_ahead_linear(
ulint low, high;
ulint err;
ulint i;
+ ulint latch_number;
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
@@ -445,8 +450,10 @@ buf_read_ahead_linear(
fail_count = 0;
+ latch_number = buf_page_hash_table(space, low);
+ rw_lock_s_lock(&(buf_pool->hash_latches[latch_number]));
for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
+ block = buf_page_hash_get(space, i, TRUE);
if ((block == NULL) || !block->accessed) {
/* Not accessed */
@@ -462,6 +469,7 @@ buf_read_ahead_linear(
pred_block = block;
}
}
+ rw_lock_s_unlock(&(buf_pool->hash_latches[latch_number]));
if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
- BUF_READ_AHEAD_LINEAR_THRESHOLD) {
@@ -475,7 +483,7 @@ buf_read_ahead_linear(
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
- block = buf_page_hash_get(space, offset);
+ block = buf_page_hash_get(space, offset, FALSE);
if (block == NULL) {
mutex_exit(&(buf_pool->mutex));
=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h 2008-08-20 00:37:41 +0000
+++ b/storage/innobase/include/buf0buf.h 2009-05-19 11:15:22 +0000
@@ -690,7 +690,25 @@ buf_page_hash_get(
/*==============*/
/* out: block, NULL if not found */
ulint space, /* in: space id */
- ulint offset);/* in: offset of the page within space */
+ ulint offset, /* in: offset of the page within space */
+ ibool hold_lock);/* in: Do we hold RW-lock on buffer page hash */
+/***********************************************************************
+Create Page Cache Hashes */
+UNIV_INLINE
+void
+buf_page_hash_create(ulint max_size);
+/***********************************************************************
+Create Page Cache Hash RW-locks */
+UNIV_INLINE
+void
+buf_page_hash_create_locks();
+/***********************************************************************
+Calculate which page table to use */
+UNIV_INLINE
+ulint
+buf_page_hash_table(
+ ulint space, /* in: space id */
+ ulint offset);/* in: offset of the page within space */
/***********************************************************************
Increments the pool clock by one and returns its new value. Remember that
in the 32 bit version the clock wraps around at 4 billion! */
@@ -733,8 +751,10 @@ struct buf_block_struct{
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
(normally = 4) physical memory
pages; otherwise NULL */
- ulint space; /* space id of the page */
- ulint offset; /* page number within the space */
+ ulint space; /* space id of the page.
+ protected by block->mutex.*/
+ ulint offset; /* page number within the space.
+ protected by block->mutex.*/
ulint lock_hash_val; /* hashed value of the page address
in the record lock hash table */
mutex_t mutex; /* mutex protecting this block:
@@ -884,6 +904,11 @@ struct buf_block_struct{
#define BUF_BLOCK_MAGIC_N 41526563
+/* Number of Page Hash latches, that is how many different latches
+ do we use to protect the Page Cache Hash. Needs to be on the form
+ 2**n to make AND fast */
+#define NUM_PAGE_HASH_LATCHES 16
+
/* The buffer pool structure. NOTE! The definition appears here only for
other modules of this directory (buf) to see it. Do not use from outside! */
@@ -925,7 +950,11 @@ struct buf_pool_struct{
ulint curr_size; /* current pool size in pages;
currently always the same as
max_size */
- hash_table_t* page_hash; /* hash table of the file pages */
+ hash_table_t* page_hash[NUM_PAGE_HASH_LATCHES];
+ /* hash tables of the file pages */
+ rw_lock_t hash_latches[NUM_PAGE_HASH_LATCHES];
+ /* Read-write latch protecting the
+ parts of the page cache hash above */
ulint n_pend_reads; /* number of pending read operations */
=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic 2008-10-15 18:54:18 +0000
+++ b/storage/innobase/include/buf0buf.ic 2009-05-19 11:15:22 +0000
@@ -421,7 +421,7 @@ buf_frame_get_newest_modification(
block = buf_block_align(frame);
- mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
if (block->state == BUF_BLOCK_FILE_PAGE) {
lsn = block->newest_modification;
@@ -429,7 +429,7 @@ buf_frame_get_newest_modification(
lsn = ut_dulint_zero;
}
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
return(lsn);
}
@@ -533,6 +533,27 @@ buf_block_buf_fix_inc(
block->buf_fix_count++;
}
#endif /* UNIV_SYNC_DEBUG */
+/************************************************************************
+Calculate which page table to use */
+#define LOG_BUF_READ_AHEAD_AREA 6
+UNIV_INLINE
+ulint
+buf_page_hash_table(
+ ulint space, /* in: space id */
+ ulint offset) /* in: offset of the page within space */
+{
+ ulint ignored_offset, fold, hash_table;
+ /* Calculate proper page hash to use for this space and offset.
+ We ignore the six first bits to ensure that all pages within
+ 1 MByte have the same page hash to ensure that flushing of
+ neighbours and read ahead algorithms don't have to lock
+ several locks. */
+ ignored_offset = offset >> LOG_BUF_READ_AHEAD_AREA;
+ fold = buf_page_address_fold(space, ignored_offset);
+ hash_table = fold & (NUM_PAGE_HASH_LATCHES - 1);
+ return hash_table;
+}
+
/**********************************************************************
Returns the control block of a file page, NULL if not found. */
UNIV_INLINE
@@ -541,26 +562,60 @@ buf_page_hash_get(
/*==============*/
/* out: block, NULL if not found */
ulint space, /* in: space id */
- ulint offset) /* in: offset of the page within space */
+ ulint offset, /* in: offset of the page within space */
+ ibool hold_lock) /* in: Do we hold RW-lock on buffer page hash */
{
buf_block_t* block;
ulint fold;
+ ulint latch_number;
ut_ad(buf_pool);
- ut_ad(mutex_own(&(buf_pool->mutex)));
/* Look for the page in the hash table */
fold = buf_page_address_fold(space, offset);
- HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
+ latch_number = buf_page_hash_table(space, offset);
+ if (!hold_lock)
+ rw_lock_s_lock(&buf_pool->hash_latches[latch_number]);
+ HASH_SEARCH(hash, buf_pool->page_hash[latch_number], fold, block,
(block->space == space) && (block->offset == offset));
+ if (!hold_lock)
+ rw_lock_s_unlock(&buf_pool->hash_latches[latch_number]);
ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
return(block);
}
/************************************************************************
+Create Page Cache Hashes */
+UNIV_INLINE
+void
+buf_page_hash_create(ulint max_size)
+{
+ ulint i;
+ for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+ {
+ buf_pool->page_hash[i] = hash_create((2 * max_size) /
+ NUM_PAGE_HASH_LATCHES);
+ }
+}
+
+/************************************************************************
+Create Page Cache Hash RW-locks */
+UNIV_INLINE
+void
+buf_page_hash_create_locks()
+{
+ ulint i;
+ for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+ {
+ rw_lock_create(&(buf_pool->hash_latches[i]),
+ SYNC_NO_ORDER_CHECK);
+ }
+}
+
+/************************************************************************
Tries to get the page, but if file io is required, releases all latches
in mtr down to the given savepoint. If io is required, this function
retrieves the page to buffer buf_pool, but does not bufferfix it or latch
=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h 2008-12-04 10:57:56 +0000
+++ b/storage/innobase/include/sync0sync.h 2009-05-19 11:15:22 +0000
@@ -454,6 +454,7 @@ or row lock! */
the level is SYNC_MEM_HASH. */
#define SYNC_BUF_POOL 150
#define SYNC_BUF_BLOCK 149
+#define SYNC_BUF_PAGE_HASH 143
#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
#define SYNC_THR_LOCAL 133
=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c 2008-10-30 09:23:36 +0000
+++ b/storage/innobase/sync/sync0sync.c 2009-05-19 11:15:22 +0000
@@ -1107,6 +1107,9 @@ sync_thread_add_level(
case SYNC_BUF_POOL:
ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
break;
+ case SYNC_BUF_PAGE_HASH:
+ ut_a(sync_thread_levels_g(array, SYNC_BUF_PAGE_HASH));
+ break;
case SYNC_SEARCH_SYS:
ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
break;
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (mikael:2847) | Mikael Ronstrom | 19 May |