List:Commits« Previous MessageNext Message »
From:Inaam Rana Date:April 29 2010 5:24pm
Subject:bzr commit into mysql-5.1-innodb branch (inaam.rana:3431)
View as plain text  
#At file:///home/inaam/w/5.1/ based on revid:vasil.dimov@stripped

 3431 Inaam Rana	2010-04-29
      page_hash mutex patch port

    modified:
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/btr/btr0sea.c
      storage/innodb_plugin/buf/buf0buddy.c
      storage/innodb_plugin/buf/buf0buf.c
      storage/innodb_plugin/buf/buf0flu.c
      storage/innodb_plugin/buf/buf0lru.c
      storage/innodb_plugin/buf/buf0rea.c
      storage/innodb_plugin/ha/ha0ha.c
      storage/innodb_plugin/ha/hash0hash.c
      storage/innodb_plugin/include/buf0buf.h
      storage/innodb_plugin/include/buf0buf.ic
      storage/innodb_plugin/include/buf0lru.h
      storage/innodb_plugin/include/hash0hash.h
      storage/innodb_plugin/include/sync0sync.h
      storage/innodb_plugin/sync/sync0sync.c
=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c	2010-04-28 08:46:27 +0000
+++ b/storage/innodb_plugin/btr/btr0cur.c	2010-04-29 17:23:32 +0000
@@ -3760,7 +3760,6 @@ btr_blob_free(
 	mtr_commit(mtr);
 
 	buf_pool_mutex_enter();
-	mutex_enter(&block->mutex);
 
 	/* Only free the block if it is still allocated to
 	the same file page. */
@@ -3781,7 +3780,6 @@ btr_blob_free(
 	}
 
 	buf_pool_mutex_exit();
-	mutex_exit(&block->mutex);
 }
 
 /*******************************************************************//**

=== modified file 'storage/innodb_plugin/btr/btr0sea.c'
--- a/storage/innodb_plugin/btr/btr0sea.c	2009-11-02 09:42:56 +0000
+++ b/storage/innodb_plugin/btr/btr0sea.c	2010-04-29 17:23:32 +0000
@@ -173,6 +173,10 @@ btr_search_sys_create(
 	btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
 
 	btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	btr_search_sys->hash_index->adaptive = TRUE;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
 }
 
 /*****************************************************************//**
@@ -1769,6 +1773,7 @@ btr_search_validate(void)
 			const buf_block_t*	block
 				= buf_block_align(node->data);
 			const buf_block_t*	hash_block;
+			mutex_t*		hash_mutex;
 
 			if (UNIV_LIKELY(buf_block_get_state(block)
 					== BUF_BLOCK_FILE_PAGE)) {
@@ -1780,13 +1785,15 @@ btr_search_validate(void)
 				assertion and the comment below) */
 				hash_block = buf_block_hash_get(
 					buf_block_get_space(block),
-					buf_block_get_page_no(block));
+					buf_block_get_page_no(block),
+					&hash_mutex);
 			} else {
 				hash_block = NULL;
 			}
 
 			if (hash_block) {
 				ut_a(hash_block == block);
+				buf_page_hash_mutex_exit(hash_mutex);
 			} else {
 				/* When a block is being freed,
 				buf_LRU_search_and_free_block() first

=== modified file 'storage/innodb_plugin/buf/buf0buddy.c'
--- a/storage/innodb_plugin/buf/buf0buddy.c	2010-03-26 14:19:01 +0000
+++ b/storage/innodb_plugin/buf/buf0buddy.c	2010-04-29 17:23:32 +0000
@@ -352,6 +352,7 @@ buf_buddy_relocate_block(
 	buf_page_t*	dpage)	/*!< in: free block to relocate to */
 {
 	buf_page_t*	b;
+	mutex_t*	hash_mutex;
 
 	ut_ad(buf_pool_mutex_own());
 
@@ -371,9 +372,14 @@ buf_buddy_relocate_block(
 		break;
 	}
 
+	buf_page_hash_get(bpage->space,
+			  bpage->offset,
+			  &hash_mutex);
+
 	mutex_enter(&buf_pool_zip_mutex);
 
 	if (!buf_page_can_relocate(bpage)) {
+		buf_page_hash_mutex_exit(hash_mutex);
 		mutex_exit(&buf_pool_zip_mutex);
 		return(FALSE);
 	}
@@ -393,6 +399,7 @@ buf_buddy_relocate_block(
 
 	UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
+	buf_page_hash_mutex_exit(hash_mutex);
 	mutex_exit(&buf_pool_zip_mutex);
 	return(TRUE);
 }
@@ -446,7 +453,7 @@ buf_buddy_relocate(
 			mach_read_from_4((const byte*) src
 					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
 			mach_read_from_4((const byte*) src
-					 + FIL_PAGE_OFFSET));
+					 + FIL_PAGE_OFFSET), NULL);
 
 		if (!bpage || bpage->zip.data != src) {
 			/* The block has probably been freshly

=== modified file 'storage/innodb_plugin/buf/buf0buf.c'
--- a/storage/innodb_plugin/buf/buf0buf.c	2010-03-23 16:20:36 +0000
+++ b/storage/innodb_plugin/buf/buf0buf.c	2010-04-29 17:23:32 +0000
@@ -975,7 +975,8 @@ buf_pool_init(void)
 	buf_pool->curr_size = chunk->size;
 	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
-	buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
+	buf_pool->page_hash = ha_create(2 * buf_pool->curr_size,
+					256, SYNC_BUF_PAGE_HASH);
 	buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 
 	buf_pool->last_printout_time = time(NULL);
@@ -1023,6 +1024,7 @@ buf_pool_free(void)
 	}
 
 	mem_free(buf_pool->chunks);
+	ha_clear(buf_pool->page_hash);
 	hash_table_free(buf_pool->page_hash);
 	hash_table_free(buf_pool->zip_hash);
 	mem_free(buf_pool);
@@ -1128,14 +1130,19 @@ buf_relocate(
 	buf_page_t*	b;
 	ulint		fold;
 
+	fold = buf_page_address_fold(bpage->space, bpage->offset);
+
 	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_page_hash_mutex_own(bpage));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
-	ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
+	ut_ad(bpage == buf_page_hash_get_low(bpage->space,
+					 bpage->offset,
+					 fold));
 #ifdef UNIV_DEBUG
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_FREE:
@@ -1187,8 +1194,6 @@ buf_relocate(
 			      ut_ad(ut_list_node_313->in_LRU_list)));
 
 	/* relocate buf_pool->page_hash */
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 }
@@ -1282,13 +1287,16 @@ shrink_again:
 			if (!buf_flush_ready_for_replace(&block->page)) {
 
 				buf_LRU_make_block_old(&block->page);
+				mutex_exit(&block->mutex);
 				dirty++;
-			} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
+			} else {
+				mutex_exit(&block->mutex);
+				if (buf_LRU_free_block(&block->page,
+						       TRUE, NULL)
 				   != BUF_LRU_FREED) {
-				nonfree++;
+					nonfree++;
+				}
 			}
-
-			mutex_exit(&block->mutex);
 		}
 
 		buf_pool_mutex_exit();
@@ -1359,10 +1367,21 @@ buf_pool_page_hash_rebuild(void)
 	buf_page_t*	b;
 
 	buf_pool_mutex_enter();
+	hash_mutex_enter_all(buf_pool->page_hash);
 
 	/* Free, create, and populate the hash table. */
+	ha_clear(buf_pool->page_hash);
+
+	/*FIXME: This is broken. When we free the hash_table we
+	free the mutex array as well. We either have to have a
+	mechanism where it is guaranteed that nobody will try to
+	acquire any of the page_hash mutexes or think some other
+	way to implement this. It doesn't matter as of now because
+	buffer pool resize code is not used currently. */
 	hash_table_free(buf_pool->page_hash);
-	buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
+	buf_pool->page_hash = page_hash
+			    = ha_create(2 * buf_pool->curr_size,
+					64, SYNC_BUF_PAGE_HASH);
 	zip_hash = hash_create(2 * buf_pool->curr_size);
 
 	HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
@@ -1438,6 +1457,7 @@ buf_pool_page_hash_rebuild(void)
 		}
 	}
 
+	hash_mutex_exit_all(buf_pool->page_hash);
 	buf_pool_mutex_exit();
 }
 
@@ -1558,16 +1578,17 @@ buf_reset_check_index_page_at_flush(
 	ulint	offset)	/*!< in: page number */
 {
 	buf_block_t*	block;
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter();
+	block = buf_block_hash_get(space, offset, &hash_mutex);
 
-	block = (buf_block_t*) buf_page_hash_get(space, offset);
+	if (block) {
+		if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
+			block->check_index_page_at_flush = FALSE;
+		}
 
-	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
-		block->check_index_page_at_flush = FALSE;
+		buf_page_hash_mutex_exit(hash_mutex);
 	}
-
-	buf_pool_mutex_exit();
 }
 
 /********************************************************************//**
@@ -1584,10 +1605,9 @@ buf_page_peek_if_search_hashed(
 {
 	buf_block_t*	block;
 	ibool		is_hashed;
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter();
-
-	block = (buf_block_t*) buf_page_hash_get(space, offset);
+	block = buf_block_hash_get(space, offset, &hash_mutex);
 
 	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 		is_hashed = FALSE;
@@ -1595,8 +1615,9 @@ buf_page_peek_if_search_hashed(
 		is_hashed = block->is_hashed;
 	}
 
-	buf_pool_mutex_exit();
-
+	if (block) {
+		buf_page_hash_mutex_exit(hash_mutex);
+	}
 	return(is_hashed);
 }
 
@@ -1615,17 +1636,15 @@ buf_page_set_file_page_was_freed(
 	ulint	offset)	/*!< in: page number */
 {
 	buf_page_t*	bpage;
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter();
-
-	bpage = buf_page_hash_get(space, offset);
+	bpage = buf_page_hash_get(space, offset, &hash_mutex);
 
 	if (bpage) {
 		bpage->file_page_was_freed = TRUE;
+		buf_page_hash_mutex_exit(hash_mutex);
 	}
 
-	buf_pool_mutex_exit();
-
 	return(bpage);
 }
 
@@ -1643,22 +1662,50 @@ buf_page_reset_file_page_was_freed(
 	ulint	offset)	/*!< in: page number */
 {
 	buf_page_t*	bpage;
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter();
-
-	bpage = buf_page_hash_get(space, offset);
+	bpage = buf_page_hash_get(space, offset, &hash_mutex);
 
 	if (bpage) {
 		bpage->file_page_was_freed = FALSE;
+		buf_page_hash_mutex_exit(hash_mutex);
 	}
 
-	buf_pool_mutex_exit();
-
 	return(bpage);
 }
 #endif /* UNIV_DEBUG_FILE_ACCESSES */
 
 /********************************************************************//**
+Attempts to discard the uncompressed frame of a compressed page. The
+caller should not be holding any mutexes when this function is called.
+@return	TRUE if successful, FALSE otherwise. */
+static
+void
+buf_block_try_discard_uncompressed(
+/*===============================*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset)	/*!< in: page number */
+{
+	buf_page_t*	bpage;
+
+	/* Since we need to acquire buf_pool mutex to discard
+	the uncompressed frame and because page_hash mutex resides
+	below buf_pool mutex in sync ordering therefore we must
+	first release the page_hash mutex. This means that the
+	block in question can move out of page_hash. Therefore
+	we need to check again if the block is still in page_hash. */
+	buf_pool_mutex_enter();
+
+	bpage = buf_page_hash_get(space, offset, NULL);
+
+	if (bpage) {
+		buf_LRU_free_block(bpage, FALSE, NULL);
+	}
+
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
 Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
@@ -1677,6 +1724,8 @@ buf_page_get_zip(
 {
 	buf_page_t*	bpage;
 	mutex_t*	block_mutex;
+	mutex_t*	hash_mutex;
+	ibool		discard_attempted = FALSE;
 	ibool		must_read;
 	unsigned	access_time;
 
@@ -1686,16 +1735,17 @@ buf_page_get_zip(
 	buf_pool->stat.n_page_gets++;
 
 	for (;;) {
-		buf_pool_mutex_enter();
 lookup:
-		bpage = buf_page_hash_get(space, offset);
+
+		/* The following call will also grab the page_hash
+		mutex if the page is found. */
+		bpage = buf_page_hash_get(space, offset, &hash_mutex);
 		if (bpage) {
 			break;
 		}
 
 		/* Page not in buf_pool: needs to be read from file */
-
-		buf_pool_mutex_exit();
+		ut_ad(!hash_mutex);
 
 		buf_read_page(space, zip_size, offset);
 
@@ -1704,10 +1754,12 @@ lookup:
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 	}
 
+	ut_ad(buf_page_hash_mutex_own(bpage));
+
 	if (UNIV_UNLIKELY(!bpage->zip.data)) {
 		/* There is no compressed page. */
 err_exit:
-		buf_pool_mutex_exit();
+		buf_page_hash_mutex_exit(hash_mutex);
 		return(NULL);
 	}
 
@@ -1725,17 +1777,17 @@ err_exit:
 		bpage->buf_fix_count++;
 		goto got_block;
 	case BUF_BLOCK_FILE_PAGE:
-		block_mutex = &((buf_block_t*) bpage)->mutex;
-		mutex_enter(block_mutex);
-
 		/* Discard the uncompressed page frame if possible. */
-		if (buf_LRU_free_block(bpage, FALSE, NULL)
-		    == BUF_LRU_FREED) {
-
-			mutex_exit(block_mutex);
+		if (!discard_attempted) {
+			buf_page_hash_mutex_exit(hash_mutex);
+			buf_block_try_discard_uncompressed(space,
+							   offset);
+			discard_attempted = TRUE;
 			goto lookup;
 		}
 
+		block_mutex = &((buf_block_t*) bpage)->mutex;
+		mutex_enter(block_mutex);
 		buf_block_buf_fix_inc((buf_block_t*) bpage,
 				      __FILE__, __LINE__);
 		goto got_block;
@@ -1748,8 +1800,7 @@ got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 	access_time = buf_page_is_accessed(bpage);
 
-	buf_pool_mutex_exit();
-
+	buf_page_hash_mutex_exit(hash_mutex);
 	mutex_exit(block_mutex);
 
 	buf_page_set_accessed_make_young(bpage, access_time);
@@ -2003,8 +2054,6 @@ buf_block_is_uncompressed(
 	const buf_block_t*	block)	/*!< in: pointer to block,
 					not dereferenced */
 {
-	ut_ad(buf_pool_mutex_own());
-
 	if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
 		/* The pointer should be aligned. */
 		return(FALSE);
@@ -2036,6 +2085,10 @@ buf_page_get_gen(
 	unsigned	access_time;
 	ulint		fix_type;
 	ibool		must_read;
+	mutex_t*	hash_mutex;
+	mutex_t*	block_mutex;
+	buf_page_t*	hash_bpage;
+	ulint		fold;
 	ulint		retries = 0;
 
 	ut_ad(mtr);
@@ -2052,10 +2105,13 @@ buf_page_get_gen(
 	ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
 #endif
 	buf_pool->stat.n_page_gets++;
+
+	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(fold);
 loop:
 	block = guess;
-	buf_pool_mutex_enter();
 
+	mutex_enter(hash_mutex);
 	if (block) {
 		/* If the guess is a compressed page descriptor that
 		has been allocated by buf_buddy_alloc(), it may have
@@ -2070,6 +2126,8 @@ loop:
 		    || space != block->page.space
 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
+			/* Our guess was bogus or things have changed
+			since. */
 			block = guess = NULL;
 		} else {
 			ut_ad(!block->page.in_zip_hash);
@@ -2078,14 +2136,16 @@ loop:
 	}
 
 	if (block == NULL) {
-		block = (buf_block_t*) buf_page_hash_get(space, offset);
+		block = (buf_block_t*) buf_page_hash_get_low(space,
+							     offset,
+							     fold);
 	}
 
 loop2:
 	if (block == NULL) {
 		/* Page not in buf_pool: needs to be read from file */
 
-		buf_pool_mutex_exit();
+		mutex_exit(hash_mutex);
 
 		if (mode == BUF_GET_IF_IN_POOL) {
 
@@ -2124,12 +2184,22 @@ loop2:
 
 	ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
 
+	/* We can release hash_mutex after we acquire block_mutex to
+	make sure that no state change takes place. */
+	block_mutex = buf_page_get_mutex(&block->page);
+	mutex_enter(block_mutex);
+
+	/* Now safe to release page_hash mutex */
+	buf_page_hash_mutex_exit(hash_mutex);
+
 	must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
 
 	if (must_read && mode == BUF_GET_IF_IN_POOL) {
-		/* The page is only being read to buffer */
-		buf_pool_mutex_exit();
 
+		/* The page is being read to buffer pool,
+		but we cannot wait around for the read to
+		complete. */
+		mutex_exit(block_mutex);
 		return(NULL);
 	}
 
@@ -2143,61 +2213,70 @@ loop2:
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
 		bpage = &block->page;
-		/* Protect bpage->buf_fix_count. */
-		mutex_enter(&buf_pool_zip_mutex);
 
 		if (bpage->buf_fix_count
 		    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 			/* This condition often occurs when the buffer
 			is not buffer-fixed, but I/O-fixed by
 			buf_page_init_for_read(). */
-			mutex_exit(&buf_pool_zip_mutex);
+			mutex_exit(block_mutex);
 wait_until_unfixed:
 			/* The block is buffer-fixed or I/O-fixed.
 			Try again later. */
-			buf_pool_mutex_exit();
 			os_thread_sleep(WAIT_FOR_READ);
 
 			goto loop;
 		}
 
 		/* Allocate an uncompressed page. */
-		buf_pool_mutex_exit();
-		mutex_exit(&buf_pool_zip_mutex);
+		mutex_exit(block_mutex);
 
 		block = buf_LRU_get_free_block(0);
 		ut_a(block);
 
 		buf_pool_mutex_enter();
+
+		/* As we have released the page_hash mutex and the
+		block_mutex to allocate an uncompressed page it is
+		possible that page_hash might have changed. We do
+		another lookup here while holding the buf_pool mutex
+		to verify that bpage is indeed still a part of
+		page_hash. */
+		mutex_enter(hash_mutex);
+		hash_bpage = buf_page_hash_get_low(space, offset, fold);
+
+
 		mutex_enter(&block->mutex);
+		if (UNIV_UNLIKELY(bpage != hash_bpage)) {
+			/* The buf_pool->page_hash was modified
+			while buf_pool_mutex was released.
+			Free the block that was allocated. */
 
-		{
-			buf_page_t*	hash_bpage
-				= buf_page_hash_get(space, offset);
-
-			if (UNIV_UNLIKELY(bpage != hash_bpage)) {
-				/* The buf_pool->page_hash was modified
-				while buf_pool_mutex was released.
-				Free the block that was allocated. */
+			buf_LRU_block_free_non_file_page(block);
+			buf_pool_mutex_exit();
+			mutex_exit(&block->mutex);
 
-				buf_LRU_block_free_non_file_page(block);
-				mutex_exit(&block->mutex);
+			block = (buf_block_t*) hash_bpage;
 
-				block = (buf_block_t*) hash_bpage;
-				goto loop2;
-			}
+			/* Note that we are still holding the
+			hash_mutex which is fine as this is what
+			we expect when we move to loop2 above. */
+			goto loop2;
 		}
 
 		if (UNIV_UNLIKELY
 		    (bpage->buf_fix_count
 		     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
 
+			buf_page_hash_mutex_exit(hash_mutex);
+
 			/* The block was buffer-fixed or I/O-fixed
 			while buf_pool_mutex was not held by this thread.
 			Free the block that was allocated and try again.
 			This should be extremely unlikely. */
 
 			buf_LRU_block_free_non_file_page(block);
+			buf_pool_mutex_exit();
 			mutex_exit(&block->mutex);
 
 			goto wait_until_unfixed;
@@ -2240,8 +2319,10 @@ wait_until_unfixed:
 
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
-		mutex_exit(&block->mutex);
+		buf_page_hash_mutex_exit(hash_mutex);
 		mutex_exit(&buf_pool_zip_mutex);
+		mutex_exit(&block->mutex);
+
 		buf_pool->n_pend_unzip++;
 
 		buf_buddy_free(bpage, sizeof *bpage);
@@ -2262,13 +2343,12 @@ wait_until_unfixed:
 		mutex_enter(&block->mutex);
 		block->page.buf_fix_count--;
 		buf_block_set_io_fix(block, BUF_IO_NONE);
-		mutex_exit(&block->mutex);
 		buf_pool->n_pend_unzip--;
+		buf_pool_mutex_exit();
 		rw_lock_x_unlock(&block->lock);
 
 		if (UNIV_UNLIKELY(!success)) {
-
-			buf_pool_mutex_exit();
+			mutex_exit(&block->mutex);
 			return(NULL);
 		}
 
@@ -2283,9 +2363,9 @@ wait_until_unfixed:
 		break;
 	}
 
+	ut_ad(!mutex_own(hash_mutex));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
-	mutex_enter(&block->mutex);
 	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 
 	buf_block_buf_fix_inc(block, file, line);
@@ -2296,8 +2376,6 @@ wait_until_unfixed:
 
 	access_time = buf_page_is_accessed(&block->page);
 
-	buf_pool_mutex_exit();
-
 	buf_page_set_accessed_make_young(&block->page, access_time);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
@@ -2596,22 +2674,21 @@ buf_page_try_get_func(
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
 	buf_block_t*	block;
+	mutex_t*	hash_mutex;
 	ibool		success;
 	ulint		fix_type;
 
 	ut_ad(mtr);
 	ut_ad(mtr->state == MTR_ACTIVE);
 
-	buf_pool_mutex_enter();
-	block = buf_block_hash_get(space_id, page_no);
+	block = buf_block_hash_get(space_id, page_no, &hash_mutex);
 
 	if (!block) {
-		buf_pool_mutex_exit();
 		return(NULL);
 	}
 
 	mutex_enter(&block->mutex);
-	buf_pool_mutex_exit();
+	buf_page_hash_mutex_exit(hash_mutex);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
@@ -2694,11 +2771,13 @@ buf_page_init(
 	ulint		space,	/*!< in: space id */
 	ulint		offset,	/*!< in: offset of the page within space
 				in units of a page */
+	ulint		fold,	/*!< in: buf_page_address_fold(space,offset) */
 	buf_block_t*	block)	/*!< in: block to init */
 {
 	buf_page_t*	hash_page;
 
 	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_hash_mutex_get(fold)));
 	ut_ad(mutex_own(&(block->mutex)));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
@@ -2718,9 +2797,11 @@ buf_page_init(
 
 	block->lock_hash_val	= lock_rec_hash(space, offset);
 
+	buf_page_init_low(&block->page);
+
 	/* Insert into the hash table of file pages */
 
-	hash_page = buf_page_hash_get(space, offset);
+	hash_page = buf_page_hash_get_low(space, offset, fold);
 
 	if (UNIV_LIKELY_NULL(hash_page)) {
 		fprintf(stderr,
@@ -2740,13 +2821,11 @@ buf_page_init(
 		ut_error;
 	}
 
-	buf_page_init_low(&block->page);
-
 	ut_ad(!block->page.in_zip_hash);
 	ut_ad(!block->page.in_page_hash);
 	ut_d(block->page.in_page_hash = TRUE);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-		    buf_page_address_fold(space, offset), &block->page);
+		    fold, &block->page);
 }
 
 /********************************************************************//**
@@ -2760,7 +2839,7 @@ on the buffer frame. The io-handler must
 and the lock released later.
 @return	pointer to the block or NULL */
 UNIV_INTERN
-buf_page_t*
+buf_page_t* 
 buf_page_init_for_read(
 /*===================*/
 	ulint*		err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
@@ -2775,6 +2854,8 @@ buf_page_init_for_read(
 {
 	buf_block_t*	block;
 	buf_page_t*	bpage;
+	mutex_t*	hash_mutex;
+	ulint		fold;
 	mtr_t		mtr;
 	ibool		lru	= FALSE;
 	void*		data;
@@ -2810,9 +2891,14 @@ buf_page_init_for_read(
 		ut_ad(block);
 	}
 
+	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(fold);
+
 	buf_pool_mutex_enter();
 
-	if (buf_page_hash_get(space, offset)) {
+	mutex_enter(hash_mutex);
+	if (buf_page_hash_get_low(space, offset, fold)) {
+		mutex_exit(hash_mutex);
 		/* The page is already in the buffer pool. */
 err_exit:
 		if (block) {
@@ -2837,7 +2923,9 @@ err_exit:
 	if (block) {
 		bpage = &block->page;
 		mutex_enter(&block->mutex);
-		buf_page_init(space, offset, block);
+
+		buf_page_init(space, offset, fold, block);
+		mutex_exit(hash_mutex);
 
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
@@ -2881,6 +2969,8 @@ err_exit:
 
 		mutex_exit(&block->mutex);
 	} else {
+		mutex_exit(hash_mutex);
+
 		/* Defer buf_buddy_alloc() until after the block has
 		been found not to exist.  The buf_buddy_alloc() and
 		buf_buddy_free() calls may be expensive because of
@@ -2893,12 +2983,15 @@ err_exit:
 		data = buf_buddy_alloc(zip_size, &lru);
 		bpage = buf_buddy_alloc(sizeof *bpage, &lru);
 
+		mutex_enter(hash_mutex);
+
 		/* If buf_buddy_alloc() allocated storage from the LRU list,
 		it released and reacquired buf_pool_mutex.  Thus, we must
 		check the page_hash again, as it may have been modified. */
 		if (UNIV_UNLIKELY(lru)
-		    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
+		    && UNIV_LIKELY_NULL(buf_page_hash_get_low(space, offset, fold))) {
 
+			mutex_exit(hash_mutex);
 			/* The block was added by some other thread. */
 			buf_buddy_free(bpage, sizeof *bpage);
 			buf_buddy_free(data, zip_size);
@@ -2929,8 +3022,9 @@ err_exit:
 
 		ut_d(bpage->in_page_hash = TRUE);
 		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-			    buf_page_address_fold(space, offset), bpage);
+			    fold, bpage);
 
+		mutex_exit(hash_mutex);
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 		buf_LRU_insert_zip_clean(bpage);
@@ -2949,6 +3043,7 @@ func_exit:
 		mtr_commit(&mtr);
 	}
 
+	ut_ad(!mutex_own(hash_mutex));
 	ut_ad(!bpage || buf_page_in_file(bpage));
 	return(bpage);
 }
@@ -2971,6 +3066,8 @@ buf_page_create(
 {
 	buf_frame_t*	frame;
 	buf_block_t*	block;
+	mutex_t*	hash_mutex;
+	ulint		fold;
 	buf_block_t*	free_block	= NULL;
 	ulint		time_ms		= ut_time_ms();
 
@@ -2980,9 +3077,13 @@ buf_page_create(
 
 	free_block = buf_LRU_get_free_block(0);
 
+	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(fold);
+
 	buf_pool_mutex_enter();
+	mutex_enter(hash_mutex);
 
-	block = (buf_block_t*) buf_page_hash_get(space, offset);
+	block = (buf_block_t*) buf_page_hash_get_low(space, offset, fold);
 
 	if (block && buf_page_in_file(&block->page)) {
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -2994,6 +3095,7 @@ buf_page_create(
 
 		/* Page can be found in buf_pool */
 		buf_pool_mutex_exit();
+		mutex_exit(hash_mutex);
 
 		buf_block_free(free_block);
 
@@ -3014,7 +3116,9 @@ buf_page_create(
 
 	mutex_enter(&block->mutex);
 
-	buf_page_init(space, offset, block);
+	buf_page_init(space, offset, fold, block);
+
+	mutex_exit(hash_mutex);
 
 	/* The block must be put to the LRU list */
 	buf_LRU_add_block(&block->page, FALSE);
@@ -3388,10 +3492,14 @@ buf_validate(void)
 	ulint		n_flush		= 0;
 	ulint		n_free		= 0;
 	ulint		n_zip		= 0;
+	ulint		fold		= 0;
+	ulint		space		= 0;
+	ulint		offset		= 0;
 
 	ut_ad(buf_pool);
 
 	buf_pool_mutex_enter();
+	hash_mutex_enter_all(buf_pool->page_hash);
 
 	chunk = buf_pool->chunks;
 
@@ -3416,10 +3524,12 @@ buf_validate(void)
 				break;
 
 			case BUF_BLOCK_FILE_PAGE:
-				ut_a(buf_page_hash_get(buf_block_get_space(
-							       block),
-						       buf_block_get_page_no(
-							       block))
+				space = buf_block_get_space(block);
+				offset = buf_block_get_page_no(block);
+				fold = buf_page_address_fold(space, offset);
+				ut_a(buf_page_hash_get_low(space,
+							   offset,
+							   fold)
 				     == &block->page);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -3507,7 +3617,9 @@ buf_validate(void)
 			break;
 		}
 		ut_a(!b->oldest_modification);
-		ut_a(buf_page_hash_get(b->space, b->offset) == b);
+		fold = buf_page_address_fold(b->space, b->offset);
+		ut_a(buf_page_hash_get_low(b->space, b->offset,
+					   fold) == b);
 
 		n_lru++;
 		n_zip++;
@@ -3559,10 +3671,13 @@ buf_validate(void)
 			ut_error;
 			break;
 		}
-		ut_a(buf_page_hash_get(b->space, b->offset) == b);
+		fold = buf_page_address_fold(b->space, b->offset);
+		ut_a(buf_page_hash_get_low(b->space, b->offset,
+				           fold) == b);
 	}
 
 	mutex_exit(&buf_pool_zip_mutex);
+	hash_mutex_exit_all(buf_pool->page_hash);
 
 	if (n_lru + n_free > buf_pool->curr_size + n_zip) {
 		fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",

=== modified file 'storage/innodb_plugin/buf/buf0flu.c'
--- a/storage/innodb_plugin/buf/buf0flu.c	2010-04-07 06:21:26 +0000
+++ b/storage/innodb_plugin/buf/buf0flu.c	2010-04-29 17:23:32 +0000
@@ -1168,7 +1168,7 @@ buf_flush_try_neighbors(
 
 	for (i = low; i < high; i++) {
 
-		bpage = buf_page_hash_get(space, i);
+		bpage = buf_page_hash_get(space, i, NULL);
 
 		if (!bpage) {
 

=== modified file 'storage/innodb_plugin/buf/buf0lru.c'
--- a/storage/innodb_plugin/buf/buf0lru.c	2010-03-23 16:20:36 +0000
+++ b/storage/innodb_plugin/buf/buf0lru.c	2010-04-29 17:23:32 +0000
@@ -123,7 +123,11 @@ UNIV_INTERN uint	buf_LRU_old_threshold_m
 /******************************************************************//**
 Takes a block out of the LRU list and page hash table.
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool_zip_mutex will be released.
+the object will be freed.
+
+The caller must hold buf_pool_mutex, the buf_page_get_mutex() mutex
+and the appropriate hash_mutex. This function will release the
+buf_page_get_mutex() and the hash_mutex.
 
 If a compressed page or a compressed-only block descriptor is freed,
 other compressed pages or compressed-only block descriptors may be
@@ -371,7 +375,13 @@ scan_again:
 
 			all_freed = FALSE;
 		} else {
+			ulint fold = buf_page_address_fold(bpage->space,
+							   bpage->offset);
+			mutex_t* hash_mutex = buf_page_hash_mutex_get(fold);
+
 			mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+			mutex_enter(hash_mutex);
 			mutex_enter(block_mutex);
 
 			if (bpage->buf_fix_count > 0) {
@@ -382,7 +392,8 @@ scan_again:
 				the modifications to the file */
 
 				all_freed = FALSE;
-
+				mutex_exit(hash_mutex);
+				mutex_exit(block_mutex);
 				goto next_page;
 			}
 
@@ -436,6 +447,7 @@ scan_again:
 				zip_size = buf_page_get_zip_size(bpage);
 				page_no = buf_page_get_page_no(bpage);
 
+				mutex_exit(hash_mutex);
 				mutex_exit(block_mutex);
 
 				/* Note that the following call will acquire
@@ -459,8 +471,7 @@ scan_again:
 							       bpage);
 			} else {
 				/* The block_mutex should have been
-				released by buf_LRU_block_remove_hashed_page()
-				when it returns BUF_BLOCK_ZIP_FREE. */
+				released by buf_LRU_block_remove_hashed_page() */
 				ut_ad(block_mutex == &buf_pool_zip_mutex);
 				ut_ad(!mutex_own(block_mutex));
 
@@ -478,14 +489,14 @@ scan_again:
 					mutex_exit(block_mutex);
 				}
 
-				goto next_page_no_mutex;
 			}
-next_page:
-			mutex_exit(block_mutex);
-		}
 
-next_page_no_mutex:
+			ut_ad(!mutex_own(hash_mutex));
+			ut_ad(!mutex_own(block_mutex));
+		}
+next_page:
 		bpage = prev_bpage;
+
 	}
 
 	buf_pool_mutex_exit();
@@ -574,10 +585,7 @@ buf_LRU_free_from_unzip_LRU_list(
 		ut_ad(block->in_unzip_LRU_list);
 		ut_ad(block->page.in_LRU_list);
 
-		mutex_enter(&block->mutex);
 		freed = buf_LRU_free_block(&block->page, FALSE, NULL);
-		mutex_exit(&block->mutex);
-
 		switch (freed) {
 		case BUF_LRU_FREED:
 			return(TRUE);
@@ -628,17 +636,12 @@ buf_LRU_free_from_common_LRU_list(
 
 		enum buf_lru_free_block_status	freed;
 		unsigned			accessed;
-		mutex_t*			block_mutex
-			= buf_page_get_mutex(bpage);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
-		mutex_enter(block_mutex);
 		accessed = buf_page_is_accessed(bpage);
 		freed = buf_LRU_free_block(bpage, TRUE, NULL);
-		mutex_exit(block_mutex);
-
 		switch (freed) {
 		case BUF_LRU_FREED:
 			/* Keep track of pages that are evicted without
@@ -1368,9 +1371,8 @@ NOTE: If this function returns BUF_LRU_F
 release buf_pool_mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call.  No other
-buf_page_get_mutex() may be held when calling this function.
+The caller must hold buf_pool_mutex and must not hold any
+buf_page_get_mutex() when calling this function.
 @return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
 BUF_LRU_NOT_FREED otherwise. */
 UNIV_INTERN
@@ -1385,20 +1387,25 @@ buf_LRU_free_block(
 				be assigned TRUE if buf_pool_mutex
 				was temporarily released, or NULL */
 {
+	enum buf_lru_free_block_status	ret;
 	buf_page_t*	b = NULL;
+	const ulint	fold = buf_page_address_fold(bpage->space,
+						     bpage->offset);
+	mutex_t*	hash_mutex = buf_page_hash_mutex_get(fold);
 	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own());
-	ut_ad(mutex_own(block_mutex));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
+
+	mutex_enter(hash_mutex);
+	mutex_enter(block_mutex);
+
 	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
 	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
 
 	if (!buf_page_can_relocate(bpage)) {
-
-		/* Do not free buffer-fixed or I/O-fixed blocks. */
-		return(BUF_LRU_NOT_FREED);
+		goto no_free_exit;
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1410,29 +1417,47 @@ buf_LRU_free_block(
 		/* Do not completely free dirty blocks. */
 
 		if (bpage->oldest_modification) {
-			return(BUF_LRU_NOT_FREED);
+			goto no_free_exit;
 		}
-	} else if (bpage->oldest_modification) {
-		/* Do not completely free dirty blocks. */
+	} else if ((bpage->oldest_modification)
+		   && (buf_page_get_state(bpage)
+		       != BUF_BLOCK_FILE_PAGE)) {
 
-		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-			ut_ad(buf_page_get_state(bpage)
-			      == BUF_BLOCK_ZIP_DIRTY);
-			return(BUF_LRU_NOT_FREED);
-		}
+		ut_ad(buf_page_get_state(bpage)
+		      == BUF_BLOCK_ZIP_DIRTY);
+
+		goto no_free_exit;
 
-		goto alloc;
 	} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+
+		mutex_exit(block_mutex);
 		/* Allocate the control block for the compressed page.
 		If it cannot be allocated (without freeing a block
 		from the LRU list), refuse to free bpage. */
-alloc:
 		buf_pool_mutex_exit_forbid();
 		b = buf_buddy_alloc(sizeof *b, NULL);
 		buf_pool_mutex_exit_allow();
 
+		mutex_enter(block_mutex);
+
+		/* The block may get buffer fixed while we released
+		the block mutex. In that case we free the newly
+		allocated descriptor and return */
+		if (!buf_page_can_relocate(bpage)) {
+			if (b) {
+				buf_buddy_free(b, sizeof(*b));
+			}
+no_free_exit:
+			ret = BUF_LRU_NOT_FREED;
+func_exit:
+			mutex_exit(hash_mutex);
+			mutex_exit(block_mutex);
+			return(ret);
+		}
+
 		if (UNIV_UNLIKELY(!b)) {
-			return(BUF_LRU_CANNOT_RELOCATE);
+			ret = BUF_LRU_CANNOT_RELOCATE;
+			goto func_exit;
 		}
 
 		memcpy(b, bpage, sizeof *b);
@@ -1446,16 +1471,28 @@ alloc:
 	}
 #endif /* UNIV_DEBUG */
 
+	ut_ad(mutex_own(hash_mutex));
+	ut_ad(buf_page_can_relocate(bpage));
+
 	if (buf_LRU_block_remove_hashed_page(bpage, zip)
 	    != BUF_BLOCK_ZIP_FREE) {
-		ut_a(bpage->buf_fix_count == 0);
+
+		/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
+		then it was a compressed page with an uncompressed frame and
+		we are interested in freeing only the uncompressed frame.
+		Therefore we have to reinsert the compressed page descriptor
+		into the LRU and page_hash (and possibly flush_list).
+		if b == NULL then it was a regular page that has been freed */
 
 		if (b) {
 			buf_page_t*	prev_b	= UT_LIST_GET_PREV(LRU, b);
-			const ulint	fold	= buf_page_address_fold(
-				bpage->space, bpage->offset);
 
-			ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
+			mutex_enter(hash_mutex);
+			mutex_enter(block_mutex);
+
+			ut_a(!buf_page_hash_get_low(bpage->space,
+						    bpage->offset,
+						    fold));
 
 			b->state = b->oldest_modification
 				? BUF_BLOCK_ZIP_DIRTY
@@ -1542,6 +1579,9 @@ alloc:
 			buf_pool_mutex and block_mutex. */
 			b->buf_fix_count++;
 			b->io_fix = BUF_IO_READ;
+
+			mutex_exit(hash_mutex);
+			mutex_exit(block_mutex);
 		}
 
 		if (buf_pool_mutex_released) {
@@ -1549,7 +1589,6 @@ alloc:
 		}
 
 		buf_pool_mutex_exit();
-		mutex_exit(block_mutex);
 
 		/* Remove possible adaptive hash index on the page.
 		The page was declared uninitialized by
@@ -1581,7 +1620,6 @@ alloc:
 		}
 
 		buf_pool_mutex_enter();
-		mutex_enter(block_mutex);
 
 		if (b) {
 			mutex_enter(&buf_pool_zip_mutex);
@@ -1591,12 +1629,6 @@ alloc:
 		}
 
 		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-	} else {
-		/* The block_mutex should have been released by
-		buf_LRU_block_remove_hashed_page() when it returns
-		BUF_BLOCK_ZIP_FREE. */
-		ut_ad(block_mutex == &buf_pool_zip_mutex);
-		mutex_enter(block_mutex);
 	}
 
 	return(BUF_LRU_FREED);
@@ -1663,7 +1695,11 @@ buf_LRU_block_free_non_file_page(
 /******************************************************************//**
 Takes a block out of the LRU list and page hash table.
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool_zip_mutex will be released.
+the object will be freed.
+
+The caller must hold buf_pool_mutex, the buf_page_get_mutex() mutex
+and the appropriate hash_mutex. This function will release the
+buf_page_get_mutex() and the hash_mutex.
 
 If a compressed page or a compressed-only block descriptor is freed,
 other compressed pages or compressed-only block descriptors may be
@@ -1681,10 +1717,17 @@ buf_LRU_block_remove_hashed_page(
 				compressed page of an uncompressed page */
 {
 	const buf_page_t*	hashed_bpage;
+	mutex_t*		hash_mutex;
+	ulint			fold;
+
 	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
+	fold = buf_page_address_fold(bpage->space, bpage->offset);
+	hash_mutex = buf_page_hash_mutex_get(fold);
+	ut_ad(mutex_own(hash_mutex));
+
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
 
@@ -1763,7 +1806,9 @@ buf_LRU_block_remove_hashed_page(
 		break;
 	}
 
-	hashed_bpage = buf_page_hash_get(bpage->space, bpage->offset);
+	hashed_bpage = buf_page_hash_get_low(bpage->space,
+					 bpage->offset,
+					 fold);
 
 	if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
 		fprintf(stderr,
@@ -1782,7 +1827,7 @@ buf_LRU_block_remove_hashed_page(
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		mutex_exit(buf_page_get_mutex(bpage));
+		mutex_exit(hash_mutex);
 		buf_pool_mutex_exit();
 		buf_print();
 		buf_LRU_print();
@@ -1808,7 +1853,11 @@ buf_LRU_block_remove_hashed_page(
 
 		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
+		ut_ad(buf_page_get_mutex(bpage) == &buf_pool_zip_mutex);
+
+		buf_page_hash_mutex_exit(hash_mutex);
 		mutex_exit(&buf_pool_zip_mutex);
+
 		buf_pool_mutex_exit_forbid();
 		buf_buddy_free(bpage->zip.data,
 			       page_zip_get_size(&bpage->zip));
@@ -1826,6 +1875,28 @@ buf_LRU_block_remove_hashed_page(
 				 UNIV_PAGE_SIZE);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
+		/* Question: If we release bpage and hash mutex here
+		then what protects us against:
+		1) Some other thread buffer fixing this page
+		2) Some other thread trying to read this page and
+		not finding it in buffer pool attempting to read it
+		from the disk.
+		Answer:
+		1) Cannot happen because the page is no longer in the
+		page_hash. Only possibility is when while invalidating
+		a tablespace we buffer fix the prev_page in LRU to
+		avoid relocation during the scan. But that is not
+		possible because we are holding buf_pool mutex.
+
+		2) Not possible because in buf_page_init_for_read()
+		we do a look up of page_hash while holding buf_pool
+		mutex and since we are holding buf_pool mutex here
+		and by the time we'll release it in the caller we'd
+		have inserted the compressed only descriptor in the
+		page_hash. */
+		buf_page_hash_mutex_exit(hash_mutex);
+		mutex_exit(&((buf_block_t*) bpage)->mutex);
+
 		if (zip && bpage->zip.data) {
 			/* Free the compressed page. */
 			void*	data = bpage->zip.data;
@@ -1834,11 +1905,9 @@ buf_LRU_block_remove_hashed_page(
 			ut_ad(!bpage->in_free_list);
 			ut_ad(!bpage->in_flush_list);
 			ut_ad(!bpage->in_LRU_list);
-			mutex_exit(&((buf_block_t*) bpage)->mutex);
 			buf_pool_mutex_exit_forbid();
 			buf_buddy_free(data, page_zip_get_size(&bpage->zip));
 			buf_pool_mutex_exit_allow();
-			mutex_enter(&((buf_block_t*) bpage)->mutex);
 			page_zip_set_size(&bpage->zip, 0);
 		}
 
@@ -1867,11 +1936,14 @@ buf_LRU_block_free_hashed_page(
 				be in a state where it can be freed */
 {
 	ut_ad(buf_pool_mutex_own());
-	ut_ad(mutex_own(&block->mutex));
+
+	mutex_enter(&block->mutex);
 
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	buf_LRU_block_free_non_file_page(block);
+
+	mutex_exit(&block->mutex);
 }
 
 /**********************************************************************//**

=== modified file 'storage/innodb_plugin/buf/buf0rea.c'
--- a/storage/innodb_plugin/buf/buf0rea.c	2010-03-26 14:19:01 +0000
+++ b/storage/innodb_plugin/buf/buf0rea.c	2010-04-29 17:23:32 +0000
@@ -320,7 +320,7 @@ buf_read_ahead_linear(
 	fail_count = 0;
 
 	for (i = low; i < high; i++) {
-		bpage = buf_page_hash_get(space, i);
+		bpage = buf_page_hash_get(space, i, NULL);
 
 		if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
@@ -358,7 +358,7 @@ buf_read_ahead_linear(
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 
-	bpage = buf_page_hash_get(space, offset);
+	bpage = buf_page_hash_get(space, offset, NULL);
 
 	if (bpage == NULL) {
 		buf_pool_mutex_exit();

=== modified file 'storage/innodb_plugin/ha/ha0ha.c'
--- a/storage/innodb_plugin/ha/ha0ha.c	2010-04-07 18:24:55 +0000
+++ b/storage/innodb_plugin/ha/ha0ha.c	2010-04-29 17:23:32 +0000
@@ -60,11 +60,6 @@ ha_create_func(
 	ut_ad(ut_is_2pow(n_mutexes));
 	table = hash_create(n);
 
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-	table->adaptive = TRUE;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 	/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
 	but in practise it never should in this case, hence the asserts. */
 
@@ -104,7 +99,8 @@ ha_clear(
 	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+	ut_ad(!table->adaptive
+	       || rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
 
 #ifndef UNIV_HOTBACKUP

=== modified file 'storage/innodb_plugin/ha/hash0hash.c'
--- a/storage/innodb_plugin/ha/hash0hash.c	2010-03-29 09:54:57 +0000
+++ b/storage/innodb_plugin/ha/hash0hash.c	2010-04-29 17:23:32 +0000
@@ -86,6 +86,28 @@ hash_mutex_exit_all(
 		mutex_exit(table->mutexes + i);
 	}
 }
+
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	mutex_t*	keep_mutex)	/*!< in: mutex to keep */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_t* mutex = table->mutexes + i;
+		if (UNIV_LIKELY(keep_mutex != mutex)) {
+			mutex_exit(mutex);
+		}
+	}
+
+	ut_ad(mutex_own(keep_mutex));
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
@@ -137,9 +159,6 @@ hash_table_free(
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifndef UNIV_HOTBACKUP
-	ut_a(table->mutexes == NULL);
-#endif /* !UNIV_HOTBACKUP */
 
 	ut_free(table->array);
 	mem_free(table);

=== modified file 'storage/innodb_plugin/include/buf0buf.h'
--- a/storage/innodb_plugin/include/buf0buf.h	2010-03-23 16:20:36 +0000
+++ b/storage/innodb_plugin/include/buf0buf.h	2010-04-29 17:23:32 +0000
@@ -990,20 +990,54 @@ Returns the control block of a file page
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
+buf_page_hash_get_low(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset,	/*!< in: offset of the page within space */
+	ulint	fold);	/*!< in: buf_page_address_fold(space, offset) */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is up to the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
 buf_page_hash_get(
 /*==============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: offset of the page within space */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex);		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 /******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found or an
+uncompressed page frame does not exist.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is upto the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get(
 /*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: offset of the page within space */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex);		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
 @return	length of the free list */
@@ -1037,7 +1071,16 @@ struct buf_page_struct{
 					BUF_BLOCK_READY_FOR_USE to
 					BUF_BLOCK_MEMORY need not be
 					protected by buf_page_get_mutex().
-					@see enum buf_page_state */
+					@see enum buf_page_state.
+					State changes that are relevant
+					to page_hash are additionally
+					protected by the appropriate
+					page_hash mutex i.e.: if a page
+					is in page_hash or is being
+					added to/removed from page_hash
+					then the corresponding changes
+					must also be protected by
+					page_hash mutex. */
 #ifndef UNIV_HOTBACKUP
 	unsigned	flush_type:2;	/*!< if this block is currently being
 					flushed to disk, this tells the
@@ -1327,7 +1370,14 @@ struct buf_pool_struct{
 	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
 					buf_block_t file pages,
 					buf_page_in_file() == TRUE,
-					indexed by (space_id, offset) */
+					indexed by (space_id, offset).
+					page_hash is protected by an
+					array of mutexes.
+					Changes in page_hash are protected
+					by buf_pool_mutex and the relevant
+					page_hash mutex. Lookups can happen
+					while holding the buf_pool_mutex or
+					the relevant page_hash mutex. */
 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
 					whose frames are allocated to the
 					zip buddy system,
@@ -1453,6 +1503,22 @@ Use these instead of accessing buf_pool_
 	mutex_enter(&buf_pool_mutex);		\
 } while (0)
 
+/** Get appropriate page_hash_mutex. */
+#define buf_page_hash_mutex_get(f)		\
+	hash_get_mutex(buf_pool->page_hash, f)
+/** Exit page_hash_mutex. */
+#define	buf_page_hash_mutex_exit(m) mutex_exit(m)
+
+/** Test if page_hash mutex is owned. */
+#define buf_page_hash_mutex_own(b)			\
+	mutex_own(buf_page_hash_mutex_get(		\
+		  buf_page_address_fold(b->space,	\
+					b->offset)))
+#define buf_block_hash_mutex_own(b)			\
+	mutex_own(buf_page_hash_mutex_get(		\
+		  buf_page_address_fold(b->page.space,	\
+					b->page.offset)))
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /** Flag to forbid the release of the buffer pool mutex.
 Protected by buf_pool_mutex. */

=== modified file 'storage/innodb_plugin/include/buf0buf.ic'
--- a/storage/innodb_plugin/include/buf0buf.ic	2010-03-26 14:19:01 +0000
+++ b/storage/innodb_plugin/include/buf0buf.ic	2010-04-29 17:23:32 +0000
@@ -908,20 +908,28 @@ Returns the control block of a file page
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
-buf_page_hash_get(
-/*==============*/
+buf_page_hash_get_low(
+/*==================*/
 	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
+	ulint	offset,	/*!< in: offset of the page within space */
+	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
 {
 	buf_page_t*	bpage;
-	ulint		fold;
+
+#ifdef UNIV_DEBUG
+	ulint		hash_fold;
+	mutex_t*	hash_mutex;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own());
 
-	/* Look for the page in the hash table */
+	hash_fold = buf_page_address_fold(space, offset);
+	ut_ad(hash_fold == fold);
 
-	fold = buf_page_address_fold(space, offset);
+	hash_mutex = hash_get_mutex(buf_pool->page_hash, fold);
+	ut_ad(mutex_own(hash_mutex));
+#endif /* UNIV_DEBUG */
+
+	/* Look for the page in the hash table */
 
 	HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
 		    ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
@@ -938,17 +946,105 @@ buf_page_hash_get(
 }
 
 /******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is up to the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get(
+/*==============*/
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex)		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
+{
+	buf_page_t*	bpage;
+	ulint		fold;
+	mutex_t*	hash_mutex;
+
+	if (mutex != NULL) {
+		*mutex = NULL;
+	}
+
+	fold = buf_page_address_fold(space, offset);
+	hash_mutex = hash_get_mutex(buf_pool->page_hash, fold);
+
+	mutex_enter(hash_mutex);
+	bpage = buf_page_hash_get_low(space, offset, fold);
+
+	if (!bpage) {
+		mutex_exit(hash_mutex);
+		return(NULL);
+	}
+
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(offset == bpage->offset);
+	ut_ad(space == bpage->space);
+
+	if (mutex == NULL) {
+		/* The caller wants us to release the page_hash mutex */
+		mutex_exit(hash_mutex);
+	} else {
+		/* To be released by the caller */
+		*mutex = hash_mutex;
+	}
+
+	return(bpage);
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found or an
+uncompressed page frame does not exist.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is upto the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get(
 /*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
-{
-	return(buf_page_get_block(buf_page_hash_get(space, offset)));
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex)		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
+{
+	buf_page_t*	bpage = buf_page_hash_get(space, offset, mutex);
+	buf_block_t*	block = buf_page_get_block(bpage);
+
+	if (block) {
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(!mutex || mutex_own(*mutex));
+		return(block);
+	} else if (bpage) {
+		/* It is not a block. Just a bpage */
+		ut_ad(buf_page_in_file(bpage));
+
+		if (mutex) {
+			mutex_exit(*mutex);
+		}
+		*mutex = NULL;
+		return(NULL);
+	}
+
+	ut_ad(!bpage);
+	ut_ad(mutex == NULL ||*mutex == NULL);
+	return(NULL);
 }
 
 /********************************************************************//**
@@ -965,15 +1061,7 @@ buf_page_peek(
 	ulint	space,	/*!< in: space id */
 	ulint	offset)	/*!< in: page number */
 {
-	const buf_page_t*	bpage;
-
-	buf_pool_mutex_enter();
-
-	bpage = buf_page_hash_get(space, offset);
-
-	buf_pool_mutex_exit();
-
-	return(bpage != NULL);
+	return(buf_page_hash_get(space, offset, NULL) != NULL);
 }
 
 /********************************************************************//**

=== modified file 'storage/innodb_plugin/include/buf0lru.h'
--- a/storage/innodb_plugin/include/buf0lru.h	2009-08-27 06:25:00 +0000
+++ b/storage/innodb_plugin/include/buf0lru.h	2010-04-29 17:23:32 +0000
@@ -100,9 +100,8 @@ NOTE: If this function returns BUF_LRU_F
 release buf_pool_mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call.  No other
-buf_page_get_mutex() may be held when calling this function.
+The caller must hold buf_pool_mutex and must not hold any
+buf_page_get_mutex() when calling this function.
 @return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
 BUF_LRU_NOT_FREED otherwise. */
 UNIV_INTERN

=== modified file 'storage/innodb_plugin/include/hash0hash.h'
--- a/storage/innodb_plugin/include/hash0hash.h	2010-03-29 09:54:57 +0000
+++ b/storage/innodb_plugin/include/hash0hash.h	2010-04-29 17:23:32 +0000
@@ -403,6 +403,14 @@ void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	mutex_t*	keep_mutex);	/*!< in: mutex to keep */
 #else /* !UNIV_HOTBACKUP */
 # define hash_get_heap(table, fold)	((table)->heap)
 # define hash_mutex_enter(table, fold)	((void) 0)

=== modified file 'storage/innodb_plugin/include/sync0sync.h'
--- a/storage/innodb_plugin/include/sync0sync.h	2010-03-22 11:35:29 +0000
+++ b/storage/innodb_plugin/include/sync0sync.h	2010-04-29 17:23:32 +0000
@@ -488,7 +488,8 @@ or row lock! */
 					can call routines there! Otherwise
 					the level is SYNC_MEM_HASH. */
 #define	SYNC_BUF_POOL		150
-#define	SYNC_BUF_BLOCK		149
+#define	SYNC_BUF_PAGE_HASH	149	/* buf_pool->page_hash mutex */
+#define	SYNC_BUF_BLOCK		147	/* Block mutex */
 #define SYNC_DOUBLEWRITE	140
 #define	SYNC_ANY_LATCH		135
 #define SYNC_THR_LOCAL		133

=== modified file 'storage/innodb_plugin/sync/sync0sync.c'
--- a/storage/innodb_plugin/sync/sync0sync.c	2010-02-20 16:45:41 +0000
+++ b/storage/innodb_plugin/sync/sync0sync.c	2010-04-29 17:23:32 +0000
@@ -1175,6 +1175,11 @@ sync_thread_add_level(
 			ut_error;
 		}
 		break;
+
+	case SYNC_BUF_PAGE_HASH:
+		/* Multiple page_hash mutexes are only allowed during
+		buf_validate and that is where buf_pool mutex is already
+		held. */
 	case SYNC_BUF_BLOCK:
 		/* Either the thread must own the buffer pool mutex
 		(buf_pool_mutex), or it is allowed to latch only ONE


Attachment: [text/bzr-bundle] bzr/inaam.rana@oracle.com-20100429172332-z7ptfb1334jn9058.bundle
Thread
bzr commit into mysql-5.1-innodb branch (inaam.rana:3431) Inaam Rana29 Apr