List:Commits« Previous MessageNext Message »
From:Mikael Ronstrom Date:May 19 2009 12:46pm
Subject:bzr commit into mysql-5.1 branch (mikael:2847)
View as plain text  
#At file:///home/mikael/mysql_clones/buf_page_hash_split_MR/

 2847 Mikael Ronstrom	2009-05-19
      My variant of buffer page hash split mutex implementation
      modified:
        storage/innobase/buf/buf0buf.c
        storage/innobase/buf/buf0flu.c
        storage/innobase/buf/buf0lru.c
        storage/innobase/buf/buf0rea.c
        storage/innobase/include/buf0buf.h
        storage/innobase/include/buf0buf.ic

=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c	2008-10-15 18:54:18 +0000
+++ b/storage/innobase/buf/buf0buf.c	2009-05-19 12:46:08 +0000
@@ -595,8 +595,10 @@ buf_pool_init(
 	/* 1. Initialize general fields
 	---------------------------- */
 	mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
+        buf_page_hash_create_locks();
 
 	mutex_enter(&(buf_pool->mutex));
+        buf_page_hash_lock_all();
 
 	if (srv_use_awe) {
 		/*----------------------------------------*/
@@ -704,7 +706,7 @@ buf_pool_init(
 		}
 	}
 
-	buf_pool->page_hash = hash_create(2 * max_size);
+        buf_page_hash_create(max_size);
 
 	buf_pool->n_pend_reads = 0;
 
@@ -774,6 +776,7 @@ buf_pool_init(
 	}
 
 	mutex_exit(&(buf_pool->mutex));
+        buf_page_hash_unlock_all();
 
 	if (srv_use_adaptive_hash_indexes) {
 		btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
@@ -995,12 +998,13 @@ buf_page_peek_block(
 	ulint	offset)	/* in: page number */
 {
 	buf_block_t*	block;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
-	mutex_enter_fast(&(buf_pool->mutex));
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	return(block);
 }
@@ -1016,8 +1020,9 @@ buf_reset_check_index_page_at_flush(
 	ulint	offset)	/* in: page number */
 {
 	buf_block_t*	block;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
-	mutex_enter_fast(&(buf_pool->mutex));
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -1025,7 +1030,7 @@ buf_reset_check_index_page_at_flush(
 		block->check_index_page_at_flush = FALSE;
 	}
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 }
 
 /************************************************************************
@@ -1043,8 +1048,9 @@ buf_page_peek_if_search_hashed(
 {
 	buf_block_t*	block;
 	ibool		is_hashed;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
-	mutex_enter_fast(&(buf_pool->mutex));
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -1054,7 +1060,7 @@ buf_page_peek_if_search_hashed(
 		is_hashed = block->is_hashed;
 	}
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	return(is_hashed);
 }
@@ -1095,8 +1101,9 @@ buf_page_set_file_page_was_freed(
 	ulint	offset)	/* in: page number */
 {
 	buf_block_t*	block;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
-	mutex_enter_fast(&(buf_pool->mutex));
+        rw_lock_x_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -1104,7 +1111,7 @@ buf_page_set_file_page_was_freed(
 		block->file_page_was_freed = TRUE;
 	}
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	return(block);
 }
@@ -1124,8 +1131,9 @@ buf_page_reset_file_page_was_freed(
 	ulint	offset)	/* in: page number */
 {
 	buf_block_t*	block;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
-	mutex_enter_fast(&(buf_pool->mutex));
+        rw_lock_x_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -1133,7 +1141,7 @@ buf_page_reset_file_page_was_freed(
 		block->file_page_was_freed = FALSE;
 	}
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	return(block);
 }
@@ -1160,6 +1168,7 @@ buf_page_get_gen(
 	ulint		fix_type;
 	ibool		success;
 	ibool		must_read;
+        ulint           hash_table;
 
 	ut_ad(mtr);
 	ut_ad((rw_latch == RW_S_LATCH)
@@ -1174,27 +1183,31 @@ buf_page_get_gen(
 	buf_pool->n_page_gets++;
 loop:
 	block = NULL;
-	mutex_enter_fast(&(buf_pool->mutex));
 
 	if (guess) {
 		block = buf_block_align(guess);
+	        mutex_enter(&block->mutex);
 
 		if ((offset != block->offset) || (space != block->space)
 		    || (block->state != BUF_BLOCK_FILE_PAGE)) {
 
+	                mutex_exit(&block->mutex);
 			block = NULL;
 		}
 	}
 
 	if (block == NULL) {
+                hash_table = buf_page_hash_table(space, offset);
+                rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 		block = buf_page_hash_get(space, offset);
+                if (block)
+	                mutex_enter(&block->mutex);
+                rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 	}
 
 	if (block == NULL) {
 		/* Page not in buf_pool: needs to be read from file */
 
-		mutex_exit(&(buf_pool->mutex));
-
 		if (mode == BUF_GET_IF_IN_POOL) {
 
 			return(NULL);
@@ -1212,8 +1225,6 @@ loop:
 		goto loop;
 	}
 
-	mutex_enter(&block->mutex);
-
 	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
 	must_read = FALSE;
@@ -1224,9 +1235,7 @@ loop:
 
 		if (mode == BUF_GET_IF_IN_POOL) {
 			/* The page is only being read to buffer */
-			mutex_exit(&buf_pool->mutex);
-			mutex_exit(&block->mutex);
-
+		        mutex_exit(&block->mutex);
 			return(NULL);
 		}
 	}
@@ -1241,7 +1250,9 @@ loop:
 		LRU list and we must put it to awe_LRU_free_mapped list once
 		mapped to a frame */
 
+		mutex_enter_fast(&buf_pool->mutex);
 		buf_awe_map_page_to_frame(block, TRUE);
+		mutex_exit(&buf_pool->mutex);
 	}
 
 #ifdef UNIV_SYNC_DEBUG
@@ -1249,8 +1260,6 @@ loop:
 #else
 	buf_block_buf_fix_inc(block);
 #endif
-	mutex_exit(&buf_pool->mutex);
-
 	/* Check if this is the first access to the page */
 
 	accessed = block->accessed;
@@ -1673,7 +1682,8 @@ buf_page_init(
 		ut_a(0);
 	}
 
-	HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
+	HASH_INSERT(buf_block_t, hash,
+                    buf_pool->page_hash[buf_page_hash_table(space, offset)],
 		    buf_page_address_fold(space, offset), block);
 
 	block->freed_page_clock = 0;
@@ -1720,6 +1730,7 @@ buf_page_init_for_read(
 {
 	buf_block_t*	block;
 	mtr_t		mtr;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
 	ut_ad(buf_pool);
 
@@ -1748,6 +1759,7 @@ buf_page_init_for_read(
 	ut_a(block);
 
 	mutex_enter(&(buf_pool->mutex));
+        rw_lock_x_lock(&(buf_pool->hash_latches[hash_table]));
 	mutex_enter(&block->mutex);
 
 	if (fil_tablespace_deleted_or_being_deleted_in_mem(
@@ -1762,8 +1774,9 @@ buf_page_init_for_read(
 		deleted or is being deleted, or the page is
 		already in buf_pool, return */
 
-		mutex_exit(&block->mutex);
 		mutex_exit(&(buf_pool->mutex));
+                rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
+		mutex_exit(&block->mutex);
 
 		buf_block_free(block);
 
@@ -1778,6 +1791,7 @@ buf_page_init_for_read(
 	ut_ad(block);
 
 	buf_page_init(space, offset, block);
+        rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	/* The block must be put to the LRU list, to the old blocks */
 
@@ -1796,8 +1810,8 @@ buf_page_init_for_read(
 
 	rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
 
-	mutex_exit(&block->mutex);
 	mutex_exit(&(buf_pool->mutex));
+	mutex_exit(&block->mutex);
 
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
@@ -1825,12 +1839,14 @@ buf_page_create(
 	buf_frame_t*	frame;
 	buf_block_t*	block;
 	buf_block_t*	free_block	= NULL;
+        ulint           hash_table = buf_page_hash_table(space, offset);
 
 	ut_ad(mtr);
 
 	free_block = buf_LRU_get_free_block();
 
 	mutex_enter(&(buf_pool->mutex));
+        rw_lock_x_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -1842,6 +1858,7 @@ buf_page_create(
 
 		/* Page can be found in buf_pool */
 		mutex_exit(&(buf_pool->mutex));
+                rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 
 		buf_block_free(free_block);
 
@@ -1864,6 +1881,7 @@ buf_page_create(
 	mutex_enter(&block->mutex);
 
 	buf_page_init(space, offset, block);
+        rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	/* The block must be put to the LRU list */
 	buf_LRU_add_block(block, FALSE);
@@ -2088,8 +2106,8 @@ buf_page_io_complete(
 #endif /* UNIV_DEBUG */
 	}
 
-	mutex_exit(&block->mutex);
 	mutex_exit(&(buf_pool->mutex));
+	mutex_exit(&block->mutex);
 
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
@@ -2146,6 +2164,7 @@ buf_validate(void)
 	ut_ad(buf_pool);
 
 	mutex_enter(&(buf_pool->mutex));
+        buf_page_hash_lock_all();
 
 	for (i = 0; i < buf_pool->curr_size; i++) {
 
@@ -2221,6 +2240,7 @@ buf_validate(void)
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
 
 	mutex_exit(&(buf_pool->mutex));
+        buf_page_hash_unlock_all();
 
 	ut_a(buf_LRU_validate());
 	ut_a(buf_flush_validate());

=== modified file 'storage/innobase/buf/buf0flu.c'
--- a/storage/innobase/buf/buf0flu.c	2008-02-01 10:55:39 +0000
+++ b/storage/innobase/buf/buf0flu.c	2009-05-19 12:46:08 +0000
@@ -542,11 +542,14 @@ buf_flush_try_page(
 {
 	buf_block_t*	block;
 	ibool		locked;
+        ulint           hash_table;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
 	      || flush_type == BUF_FLUSH_SINGLE_PAGE);
 
+        hash_table = buf_page_hash_table(space, offset);
 	mutex_enter(&(buf_pool->mutex));
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 
 	block = buf_page_hash_get(space, offset);
 
@@ -554,10 +557,12 @@ buf_flush_try_page(
 
 	if (!block) {
 		mutex_exit(&(buf_pool->mutex));
+                rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 		return(0);
 	}
 
 	mutex_enter(&block->mutex);
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	if (flush_type == BUF_FLUSH_LIST
 	    && buf_flush_ready_for_flush(block, flush_type)) {
@@ -598,8 +603,8 @@ buf_flush_try_page(
 			locked = TRUE;
 		}
 
-		mutex_exit(&block->mutex);
 		mutex_exit(&(buf_pool->mutex));
+		mutex_exit(&block->mutex);
 
 		if (!locked) {
 			buf_flush_buffered_writes();
@@ -660,8 +665,8 @@ buf_flush_try_page(
 		buf_pool mutex: this ensures that the latch is acquired
 		immediately. */
 
-		mutex_exit(&block->mutex);
 		mutex_exit(&(buf_pool->mutex));
+		mutex_exit(&block->mutex);
 
 		buf_flush_write_block_low(block);
 
@@ -694,8 +699,8 @@ buf_flush_try_page(
 
 		(buf_pool->n_flush[flush_type])++;
 
-		mutex_exit(&block->mutex);
 		mutex_exit(&(buf_pool->mutex));
+		mutex_exit(&block->mutex);
 
 		rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
 
@@ -714,8 +719,8 @@ buf_flush_try_page(
 		return(1);
 	}
 
-	mutex_exit(&block->mutex);
 	mutex_exit(&(buf_pool->mutex));
+	mutex_exit(&block->mutex);
 
 	return(0);
 }
@@ -735,6 +740,7 @@ buf_flush_try_neighbors(
 	ulint		low, high;
 	ulint		count		= 0;
 	ulint		i;
+        ulint           hash_table;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
@@ -755,7 +761,9 @@ buf_flush_try_neighbors(
 		high = fil_space_get_size(space);
 	}
 
+        hash_table = buf_page_hash_table(space, low);
 	mutex_enter(&(buf_pool->mutex));
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 
 	for (i = low; i < high; i++) {
 
@@ -787,9 +795,10 @@ buf_flush_try_neighbors(
 				flush the doublewrite buffer before we start
 				waiting. */
 
-				mutex_exit(&block->mutex);
-
 				mutex_exit(&(buf_pool->mutex));
+                                rw_lock_s_unlock(
+                                     &(buf_pool->hash_latches[hash_table]));
+				mutex_exit(&block->mutex);
 
 				/* Note: as we release the buf_pool mutex
 				above, in buf_flush_try_page we cannot be sure
@@ -801,6 +810,8 @@ buf_flush_try_neighbors(
 							    flush_type);
 
 				mutex_enter(&(buf_pool->mutex));
+                                rw_lock_s_lock(
+                                     &(buf_pool->hash_latches[hash_table]));
 			} else {
 				mutex_exit(&block->mutex);
 			}
@@ -808,6 +819,7 @@ buf_flush_try_neighbors(
 	}
 
 	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	return(count);
 }
@@ -907,8 +919,8 @@ buf_flush_batch(
 				space = block->space;
 				offset = block->offset;
 
-				mutex_exit(&block->mutex);
 				mutex_exit(&(buf_pool->mutex));
+				mutex_exit(&block->mutex);
 
 				old_page_count = page_count;
 

=== modified file 'storage/innobase/buf/buf0lru.c'
--- a/storage/innobase/buf/buf0lru.c	2008-12-14 20:47:17 +0000
+++ b/storage/innobase/buf/buf0lru.c	2009-05-19 12:46:08 +0000
@@ -207,6 +207,7 @@ buf_LRU_invalidate_tablespace(
 
 scan_again:
 	mutex_enter(&(buf_pool->mutex));
+        buf_page_hash_lock_all();
 
 	all_freed = TRUE;
 
@@ -244,9 +245,9 @@ scan_again:
 			if (block->is_hashed) {
 				page_no = block->offset;
 
-				mutex_exit(&block->mutex);
-
 				mutex_exit(&(buf_pool->mutex));
+                                buf_page_hash_unlock_all();
+				mutex_exit(&block->mutex);
 
 				/* Note that the following call will acquire
 				an S-latch on the page */
@@ -277,6 +278,7 @@ next_page:
 	}
 
 	mutex_exit(&(buf_pool->mutex));
+        buf_page_hash_unlock_all();
 
 	if (!all_freed) {
 		os_thread_sleep(20000);
@@ -338,14 +340,22 @@ buf_LRU_search_and_free_block(
 	buf_block_t*	block;
 	ulint		distance = 0;
 	ibool		freed;
+        ulint           space;
+        ulint           offset;
+        ulint           hash_table;
 
 	mutex_enter(&(buf_pool->mutex));
-
 	freed = FALSE;
 	block = UT_LIST_GET_LAST(buf_pool->LRU);
 
 	while (block != NULL) {
 		ut_a(block->in_LRU_list);
+                /* Space and offset are defined and stable when in
+                LRU list */
+                space = block->space;
+                offset = block->offset;
+                hash_table = buf_page_hash_table(space, offset);
+                rw_lock_x_lock(&(buf_pool->hash_latches[hash_table]));
 
 		mutex_enter(&block->mutex);
 
@@ -364,6 +374,7 @@ buf_LRU_search_and_free_block(
 			buf_LRU_block_remove_hashed_page(block);
 
 			mutex_exit(&(buf_pool->mutex));
+                        rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 			mutex_exit(&block->mutex);
 
 			/* Remove possible adaptive hash index built on the
@@ -394,6 +405,7 @@ buf_LRU_search_and_free_block(
 			break;
 		}
 
+                rw_lock_x_unlock(&(buf_pool->hash_latches[hash_table]));
 		mutex_exit(&block->mutex);
 
 		block = UT_LIST_GET_PREV(LRU, block);
@@ -586,9 +598,8 @@ loop:
 		block->state = BUF_BLOCK_READY_FOR_USE;
 		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
-		mutex_exit(&block->mutex);
-
 		mutex_exit(&(buf_pool->mutex));
+		mutex_exit(&block->mutex);
 
 		if (started_monitor) {
 			srv_print_innodb_monitor = mon_value_was;
@@ -1074,7 +1085,9 @@ buf_LRU_block_remove_hashed_page(
 		ut_a(0);
 	}
 
-	HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
+	HASH_DELETE(buf_block_t, hash,
+                    buf_pool->page_hash[buf_page_hash_table(
+                             block->space, block->offset)],
 		    buf_page_address_fold(block->space, block->offset),
 		    block);
 

=== modified file 'storage/innobase/buf/buf0rea.c'
--- a/storage/innobase/buf/buf0rea.c	2006-09-21 07:39:09 +0000
+++ b/storage/innobase/buf/buf0rea.c	2009-05-19 12:46:08 +0000
@@ -175,6 +175,7 @@ buf_read_ahead_random(
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
+        ulint           hash_table;
 
 	if (srv_startup_is_before_trx_rollback_phase) {
 		/* No read-ahead to avoid thread deadlocks */
@@ -211,21 +212,18 @@ buf_read_ahead_random(
 
 	LRU_recent_limit = buf_LRU_get_recent_limit();
 
-	mutex_enter(&(buf_pool->mutex));
-
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		mutex_exit(&(buf_pool->mutex));
 
 		return(0);
 	}
 
 	/* Count how many blocks in the area have been recently accessed,
 	that is, reside near the start of the LRU list. */
-
+        hash_table = buf_page_hash_table(space, low);
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 	for (i = low; i < high; i++) {
 		block = buf_page_hash_get(space, i);
-
 		if ((block)
 		    && (block->LRU_position > LRU_recent_limit)
 		    && block->accessed) {
@@ -233,8 +231,7 @@ buf_read_ahead_random(
 			recent_blocks++;
 		}
 	}
-
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
 		/* Do nothing */
@@ -385,6 +382,7 @@ buf_read_ahead_linear(
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
+        ulint           hash_table;
 
 	if (srv_startup_is_before_trx_rollback_phase) {
 		/* No read-ahead to avoid thread deadlocks */
@@ -417,10 +415,7 @@ buf_read_ahead_linear(
 
 	tablespace_version = fil_space_get_version(space);
 
-	mutex_enter(&(buf_pool->mutex));
-
 	if (high > fil_space_get_size(space)) {
-		mutex_exit(&(buf_pool->mutex));
 		/* The area is not whole, return */
 
 		return(0);
@@ -428,7 +423,6 @@ buf_read_ahead_linear(
 
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		mutex_exit(&(buf_pool->mutex));
 
 		return(0);
 	}
@@ -445,6 +439,8 @@ buf_read_ahead_linear(
 
 	fail_count = 0;
 
+        hash_table = buf_page_hash_table(space, low);
+        rw_lock_s_lock(&(buf_pool->hash_latches[hash_table]));
 	for (i = low; i < high; i++) {
 		block = buf_page_hash_get(space, i);
 
@@ -466,8 +462,7 @@ buf_read_ahead_linear(
 	if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
 	    - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
 		/* Too many failures: return */
-
-		mutex_exit(&(buf_pool->mutex));
+                rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 		return(0);
 	}
@@ -478,7 +473,7 @@ buf_read_ahead_linear(
 	block = buf_page_hash_get(space, offset);
 
 	if (block == NULL) {
-		mutex_exit(&(buf_pool->mutex));
+                rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 		return(0);
 	}
@@ -494,7 +489,7 @@ buf_read_ahead_linear(
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);
 
-	mutex_exit(&(buf_pool->mutex));
+        rw_lock_s_unlock(&(buf_pool->hash_latches[hash_table]));
 
 	if ((offset == low) && (succ_offset == offset + 1)) {
 

=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h	2008-08-20 00:37:41 +0000
+++ b/storage/innobase/include/buf0buf.h	2009-05-19 12:46:08 +0000
@@ -692,6 +692,34 @@ buf_page_hash_get(
 	ulint	space,	/* in: space id */
 	ulint	offset);/* in: offset of the page within space */
 /***********************************************************************
+Create Page Cache Hashes */
+UNIV_INLINE
+void
+buf_page_hash_create(ulint max_size);
+/***********************************************************************
+Create Page Cache Hash RW-locks */
+UNIV_INLINE
+void
+buf_page_hash_create_locks();
+/***********************************************************************
+Calculate which page table to use */
+UNIV_INLINE
+ulint
+buf_page_hash_table(
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: offset of the page within space */
+/***********************************************************************
+Lock the Page Cache Hash parts with an exclusive lock */
+UNIV_INLINE
+void
+buf_page_hash_lock_all();
+/***********************************************************************
+Unlock the Page Cache Hash parts with an exclusive lock */
+UNIV_INLINE
+void
+buf_page_hash_unlock_all();
+
+/***********************************************************************
 Increments the pool clock by one and returns its new value. Remember that
 in the 32 bit version the clock wraps around at 4 billion! */
 UNIV_INLINE
@@ -884,6 +912,11 @@ struct buf_block_struct{
 
 #define BUF_BLOCK_MAGIC_N	41526563
 
+/* Number of Page Hash latches, that is how many different latches
+   do we use to protect the Page Cache Hash. Needs to be on the form
+   2**n to make AND fast */
+#define NUM_PAGE_HASH_LATCHES 16
+
 /* The buffer pool structure. NOTE! The definition appears here only for
 other modules of this directory (buf) to see it. Do not use from outside! */
 
@@ -925,7 +958,11 @@ struct buf_pool_struct{
 	ulint		curr_size;	/* current pool size in pages;
 					currently always the same as
 					max_size */
-	hash_table_t*	page_hash;	/* hash table of the file pages */
+	hash_table_t*	page_hash[NUM_PAGE_HASH_LATCHES];
+                                        /* hash tables of the file pages */
+        rw_lock_t       hash_latches[NUM_PAGE_HASH_LATCHES];
+                                        /* Read-write latch protecting the
+                                         parts of the page cache hash above */
 
 	ulint		n_pend_reads;	/* number of pending read operations */
 

=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic	2008-10-15 18:54:18 +0000
+++ b/storage/innobase/include/buf0buf.ic	2009-05-19 12:46:08 +0000
@@ -545,15 +545,17 @@ buf_page_hash_get(
 {
 	buf_block_t*	block;
 	ulint		fold;
+        ulint           hash_table;
 
 	ut_ad(buf_pool);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
 
 	/* Look for the page in the hash table */
 
 	fold = buf_page_address_fold(space, offset);
 
-	HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
+	HASH_SEARCH(hash,
+                    buf_pool->page_hash[buf_page_hash_table(space, offset)],
+                    fold, block,
 		    (block->space == space) && (block->offset == offset));
 	ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
 
@@ -561,6 +563,75 @@ buf_page_hash_get(
 }
 
 /************************************************************************
+Create Page Cache Hashes */
+UNIV_INLINE
+void
+buf_page_hash_create(ulint max_size)
+{
+        ulint i;
+        for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+        {
+                buf_pool->page_hash[i] = hash_create((2 * max_size) /
+                                         NUM_PAGE_HASH_LATCHES);
+        }
+}
+/************************************************************************
+Create Page Cache Hash RW-locks */
+UNIV_INLINE
+void
+buf_page_hash_create_locks()
+{
+        ulint i;
+        for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+        {
+          rw_lock_create(&(buf_pool->hash_latches[i]),
+                         SYNC_NO_ORDER_CHECK);
+        }
+}
+/************************************************************************
+Calculate which page table to use */
+#define LOG_BUF_READ_AHEAD_AREA 6
+UNIV_INLINE
+ulint
+buf_page_hash_table(
+	ulint	space,	/* in: space id */
+	ulint	offset) /* in: offset of the page within space */
+{
+        ulint ignored_offset, fold, hash_table;
+        /* Calculate proper page hash to use for this space and offset.
+           We ignore the six first bits to ensure that all pages within
+           1 MByte have the same page hash to ensure that flushing of
+           neighbours and read ahead algorithms don't have to lock
+           several locks. */
+        ignored_offset = offset >> LOG_BUF_READ_AHEAD_AREA;
+        fold = buf_page_address_fold(space, ignored_offset);
+        hash_table = fold & (NUM_PAGE_HASH_LATCHES - 1);
+        return hash_table;
+}
+/************************************************************************
+Lock the Page Cache Hash parts with an exclusive lock */
+UNIV_INLINE
+void
+buf_page_hash_lock_all()
+{
+        ulint i;
+
+        for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+          rw_lock_x_lock(&(buf_pool->hash_latches[i]));
+}
+/************************************************************************
+Unlock the Page Cache Hash parts with an exclusive lock */
+UNIV_INLINE
+void
+buf_page_hash_unlock_all()
+{
+        ulint i;
+
+        for (i = 0; i < NUM_PAGE_HASH_LATCHES; i++)
+          rw_lock_x_unlock(&(buf_pool->hash_latches[i]));
+}
+
+/************************************************************************
 Tries to get the page, but if file io is required, releases all latches
 in mtr down to the given savepoint. If io is required, this function
 retrieves the page to buffer buf_pool, but does not bufferfix it or latch

Thread
bzr commit into mysql-5.1 branch (mikael:2847) Mikael Ronstrom19 May