List:Commits« Previous MessageNext Message »
From:Inaam Rana Date:June 29 2010 2:55pm
Subject:bzr commit into mysql-next-mr-innodb branch (inaam.rana:3238)
View as plain text  
#At file:///home/inaam/w/page_hash/ based on revid:marko.makela@stripped

 3238 Inaam Rana	2010-06-29
      Split access control to buf_pool->page_hash to an array of mutexes.
      This is a port of page_hash patch that was originally written for
      5.1 and is now ported to work with multiple buffer pools.

    modified:
      storage/innobase/btr/btr0cur.c
      storage/innobase/btr/btr0sea.c
      storage/innobase/buf/buf0buddy.c
      storage/innobase/buf/buf0buf.c
      storage/innobase/buf/buf0flu.c
      storage/innobase/buf/buf0lru.c
      storage/innobase/buf/buf0rea.c
      storage/innobase/ha/ha0ha.c
      storage/innobase/ha/hash0hash.c
      storage/innobase/ibuf/ibuf0ibuf.c
      storage/innobase/include/buf0buf.h
      storage/innobase/include/buf0buf.ic
      storage/innobase/include/buf0lru.h
      storage/innobase/include/hash0hash.h
      storage/innobase/include/sync0sync.h
      storage/innobase/sync/sync0sync.c
=== modified file 'storage/innobase/btr/btr0cur.c'
--- a/storage/innobase/btr/btr0cur.c	revid:marko.makela@stripped
+++ b/storage/innobase/btr/btr0cur.c	revid:inaam.rana@stripped
@@ -3880,7 +3880,6 @@ btr_blob_free(
 	mtr_commit(mtr);
 
 	buf_pool_mutex_enter(buf_pool);
-	mutex_enter(&block->mutex);
 
 	/* Only free the block if it is still allocated to
 	the same file page. */
@@ -3901,7 +3900,6 @@ btr_blob_free(
 	}
 
 	buf_pool_mutex_exit(buf_pool);
-	mutex_exit(&block->mutex);
 }
 
 /*******************************************************************//**

=== modified file 'storage/innobase/btr/btr0sea.c'
--- a/storage/innobase/btr/btr0sea.c	revid:marko.makela@stripped
+++ b/storage/innobase/btr/btr0sea.c	revid:inaam.rana@stripped
@@ -185,6 +185,10 @@ btr_search_sys_create(
 	btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
 
 	btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	btr_search_sys->hash_index->adaptive = TRUE;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
 }
 
 /*****************************************************************//**
@@ -1798,7 +1802,8 @@ btr_search_validate(void)
 				hash_block = buf_block_hash_get(
 					buf_pool,
 					buf_block_get_space(block),
-					buf_block_get_page_no(block));
+					buf_block_get_page_no(block),
+					NULL);
 			} else {
 				hash_block = NULL;
 			}

=== modified file 'storage/innobase/buf/buf0buddy.c'
--- a/storage/innobase/buf/buf0buddy.c	revid:marko.makela@stripped
+++ b/storage/innobase/buf/buf0buddy.c	revid:inaam.rana@stripped
@@ -354,6 +354,9 @@ buf_buddy_relocate_block(
 {
 	buf_page_t*	b;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ulint		fold = buf_page_address_fold(bpage->space,
+						     bpage->offset);
+	mutex_t*	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
@@ -373,9 +376,11 @@ buf_buddy_relocate_block(
 		break;
 	}
 
+	mutex_enter(hash_mutex);
 	mutex_enter(&buf_pool->zip_mutex);
 
 	if (!buf_page_can_relocate(bpage)) {
+		mutex_exit(hash_mutex);
 		mutex_exit(&buf_pool->zip_mutex);
 		return(FALSE);
 	}
@@ -395,6 +400,7 @@ buf_buddy_relocate_block(
 
 	UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
+	mutex_exit(hash_mutex);
 	mutex_exit(&buf_pool->zip_mutex);
 	return(TRUE);
 }
@@ -454,7 +460,7 @@ buf_buddy_relocate(
 		on uninitialized value. */
 		UNIV_MEM_VALID(&space, sizeof space);
 		UNIV_MEM_VALID(&page_no, sizeof page_no);
-		bpage = buf_page_hash_get(buf_pool, space, page_no);
+		bpage = buf_page_hash_get(buf_pool, space, page_no, NULL);
 
 		if (!bpage || bpage->zip.data != src) {
 			/* The block has probably been freshly
@@ -465,8 +471,6 @@ buf_buddy_relocate(
 			return(FALSE);
 		}
 
-		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
-
 		if (page_zip_get_size(&bpage->zip) != size) {
 			/* The block is of different size.  We would
 			have to relocate all blocks covered by src.

=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c	revid:marko.makela@stripped
+++ b/storage/innobase/buf/buf0buf.c	revid:inaam.rana@stripped
@@ -1231,7 +1231,9 @@ buf_pool_init_instance(
 		buf_pool->curr_size = chunk->size;
 		buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
-		buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
+		buf_pool->page_hash = ha_create(2 * buf_pool->curr_size,
+						256, SYNC_BUF_PAGE_HASH);
+
 		buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 		
 		buf_pool->last_printout_time = ut_time();
@@ -1278,6 +1280,7 @@ buf_pool_free_instance(
 	}
 
 	mem_free(buf_pool->chunks);
+	ha_clear(buf_pool->page_hash);
 	hash_table_free(buf_pool->page_hash);
 	hash_table_free(buf_pool->zip_hash);
 	mem_free(buf_pool);
@@ -1467,15 +1470,21 @@ buf_relocate(
 	ulint		fold;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
+	fold = buf_page_address_fold(bpage->space, bpage->offset);
+
 	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_page_hash_mutex_own(buf_pool, bpage));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
-	ut_ad(bpage == buf_page_hash_get(buf_pool,
-			       		 bpage->space, bpage->offset));
+	ut_ad(bpage == buf_page_hash_get_low(buf_pool,
+					     bpage->space,
+					     bpage->offset,
+					     fold));
+
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 #ifdef UNIV_DEBUG
 	switch (buf_page_get_state(bpage)) {
@@ -1528,8 +1537,6 @@ buf_relocate(
 			      ut_ad(ut_list_node_313->in_LRU_list)));
 
 	/* relocate buf_pool->page_hash */
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 }
@@ -1624,13 +1631,16 @@ shrink_again:
 			if (!buf_flush_ready_for_replace(&block->page)) {
 
 				buf_LRU_make_block_old(&block->page);
+				mutex_exit(&block->mutex);
 				dirty++;
-			} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
+			} else {
+				mutex_exit(&block->mutex);
+				if (buf_LRU_free_block(&block->page,
+						       TRUE, NULL)
 				   != BUF_LRU_FREED) {
-				nonfree++;
+					nonfree++;
+				}
 			}
-
-			mutex_exit(&block->mutex);
 		}
 
 		buf_pool_mutex_exit(buf_pool);
@@ -1726,9 +1736,22 @@ buf_pool_page_hash_rebuild_instance(
 
 	buf_pool_mutex_enter(buf_pool);
 
+	hash_mutex_enter_all(buf_pool->page_hash);
+
 	/* Free, create, and populate the hash table. */
+	ha_clear(buf_pool->page_hash);
+
+	/*FIXME: This is broken. When we free the hash_table we
+	free the mutex array as well. We either have to have a
+	mechanism where it is guaranteed that nobody will try to
+	acquire any of the page_hash mutexes or think some other
+	way to implement this. It doesn't matter as of now because
+	buffer pool resize code is not used currently. */
 	hash_table_free(buf_pool->page_hash);
-	buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
+	buf_pool->page_hash = page_hash
+			    = ha_create(2 * buf_pool->curr_size,
+					256, SYNC_BUF_PAGE_HASH);
+
 	zip_hash = hash_create(2 * buf_pool->curr_size);
 
 	HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
@@ -1805,6 +1828,7 @@ buf_pool_page_hash_rebuild_instance(
 		}
 	}
 
+	hash_mutex_exit_all(buf_pool->page_hash);
 	buf_flush_list_mutex_exit(buf_pool);
 	buf_pool_mutex_exit(buf_pool);
 }
@@ -1819,6 +1843,13 @@ buf_pool_watch_is_sentinel(
 	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
 	const buf_page_t*	bpage)		/*!< in: block */
 {
+#ifdef UNIV_DEBUG
+	/* We must also own the appropriate hash_bucket mutex. */
+	ulint	 fold = buf_page_address_fold(bpage->space,
+					      bpage->offset);
+	mutex_t* hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
+	ut_ad(mutex_own(hash_mutex));
+#endif
 	ut_ad(buf_page_in_file(bpage));
 
 	if (bpage < &buf_pool->watch[0]
@@ -1839,8 +1870,9 @@ buf_pool_watch_is_sentinel(
 }
 
 /****************************************************************//**
-Add watch for the given page to be read in. Caller must have the buffer pool
-mutex reserved.
+Add watch for the given page to be read in. Caller must have
+appropriate hash_mutex for the bpage. This function may release the
+hash_mutex and reacquire it.
 @return NULL if watch set, block if the page is in the buffer pool */
 UNIV_INTERN
 buf_page_t*
@@ -1853,12 +1885,16 @@ buf_pool_watch_set(
 	buf_page_t*	bpage;
 	ulint		i;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
+  
+	ut_ad(mutex_own(hash_mutex));
 
 	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
 
 	if (UNIV_LIKELY_NULL(bpage)) {
+page_found:
 		if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
 			/* The page was loaded meanwhile. */
 			return(bpage);
@@ -1868,6 +1904,34 @@ buf_pool_watch_set(
 		return(NULL);
 	}
 
+	/* From this point this function becomes fairly heavy in terms
+	of latching. We acquire the buf_pool mutex as well as all the
+	hash_mutexes. buf_pool mutex is needed because any changes to
+	the page_hash must be covered by it and hash_mutexes are needed
+	because we don't want to read any stale information in
+	buf_pool_watch[]. However, it is not in the critical code path
+	as this function will be called only by the purge thread. */
+
+
+	/* To obey latching order first release the hash_mutex. */
+	mutex_exit(hash_mutex);
+
+	buf_pool_mutex_enter(buf_pool);
+	hash_mutex_enter_all(buf_pool->page_hash);
+
+	/* We have to recheck that the page
+	was not loaded or a watch set by some other
+	purge thread. This is because of the small
+	time window between when we release the
+	hash_mutex to acquire buf_pool mutex above. */
+
+	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	if (UNIV_LIKELY_NULL(bpage)) {
+		buf_pool_mutex_exit(buf_pool);
+		hash_mutex_exit_all_but(buf_pool->page_hash, hash_mutex);
+		goto page_found;
+	}
+
 	for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
 		bpage = &buf_pool->watch[i];
 
@@ -1895,6 +1959,14 @@ buf_pool_watch_set(
 			ut_d(bpage->in_page_hash = TRUE);
 			HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 				    fold, bpage);
+
+			buf_pool_mutex_exit(buf_pool);
+			/* Once the sentinel is in the page_hash we can
+			safely release all mutexes except just the
+			relevant hash_mutex */
+			hash_mutex_exit_all_but(buf_pool->page_hash,
+						hash_mutex);
+
 			return(NULL);
 		case BUF_BLOCK_ZIP_PAGE:
 			ut_ad(bpage->in_page_hash);
@@ -2046,6 +2118,12 @@ buf_pool_watch_remove(
 					space, offset) */
 	buf_page_t*	watch)		/*!< in/out: sentinel for watch */
 {
+#ifdef UNIV_DEBUG
+	/* We must also own the appropriate hash_bucket mutex. */
+	mutex_t* hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
+	ut_ad(mutex_own(hash_mutex));
+#endif
+
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
@@ -2067,8 +2145,18 @@ buf_pool_watch_unset(
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 	ulint		fold = buf_page_address_fold(space, offset);
-
+	mutex_t*	hash_mutex = buf_page_hash_mutex_get(buf_pool,
+							     fold);
+  
+	/* We only need to have buf_pool mutex in case where we end
+	up calling buf_pool_watch_remove but to obey latching order
+	we acquire it here before acquiring hash_mutex. This should
+	not cause too much grief as this function is only ever
+	called from the purge thread. */
 	buf_pool_mutex_enter(buf_pool);
+
+	mutex_enter(hash_mutex);
+
 	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
 	/* The page must exist because buf_pool_watch_set()
 	increments buf_fix_count. */
@@ -2090,6 +2178,7 @@ buf_pool_watch_unset(
 	}
 
 	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(hash_mutex);
 }
 
 /****************************************************************//**
@@ -2108,15 +2197,17 @@ buf_pool_watch_occurred(
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 	ulint		fold	= buf_page_address_fold(space, offset);
-
-	buf_pool_mutex_enter(buf_pool);
-
+	mutex_t*	hash_mutex = buf_page_hash_mutex_get(buf_pool,
+							     fold);
+  
+	mutex_enter(hash_mutex);
+  
 	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
 	/* The page must exist because buf_pool_watch_set()
 	increments buf_fix_count. */
 	ut_a(bpage);
 	ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(hash_mutex);
 
 	return(ret);
 }
@@ -2186,17 +2277,19 @@ buf_reset_check_index_page_at_flush(
 {
 	buf_block_t*	block;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
+  
+	block = buf_block_hash_get(buf_pool, space, offset, &hash_mutex);
 
-	buf_pool_mutex_enter(buf_pool);
-
-	block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+	if (block) {
+		if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
+			ut_ad(!buf_pool_watch_is_sentinel(buf_pool,
+							  &block->page));
+			block->check_index_page_at_flush = FALSE;
+		}
 
-	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
-		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
-		block->check_index_page_at_flush = FALSE;
+		mutex_exit(hash_mutex);
 	}
-
-	buf_pool_mutex_exit(buf_pool);
 }
 
 /********************************************************************//**
@@ -2214,11 +2307,10 @@ buf_page_peek_if_search_hashed(
 	buf_block_t*	block;
 	ibool		is_hashed;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter(buf_pool);
-
-	block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
+	block = buf_block_hash_get(buf_pool, space, offset, &hash_mutex);
+ 
 	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 		is_hashed = FALSE;
 	} else {
@@ -2226,7 +2318,9 @@ buf_page_peek_if_search_hashed(
 		is_hashed = block->is_hashed;
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	if (block) {
+		mutex_exit(hash_mutex);
+	}
 
 	return(is_hashed);
 }
@@ -2247,18 +2341,16 @@ buf_page_set_file_page_was_freed(
 {
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter(buf_pool);
-
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, space, offset, &hash_mutex);
 
 	if (bpage) {
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		bpage->file_page_was_freed = TRUE;
+		mutex_exit(hash_mutex);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	return(bpage);
 }
 
@@ -2277,23 +2369,52 @@ buf_page_reset_file_page_was_freed(
 {
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
 
-	buf_pool_mutex_enter(buf_pool);
-
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, space, offset, &hash_mutex);
 
 	if (bpage) {
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		bpage->file_page_was_freed = FALSE;
+		mutex_exit(hash_mutex);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	return(bpage);
 }
 #endif /* UNIV_DEBUG_FILE_ACCESSES */
 
 /********************************************************************//**
+Attempts to discard the uncompressed frame of a compressed page. The
+caller should not be holding any mutexes when this function is called.
+@return	TRUE if successful, FALSE otherwise. */
+static
+void
+buf_block_try_discard_uncompressed(
+/*===============================*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset)	/*!< in: page number */
+{
+	buf_page_t*	bpage;
+	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+
+	/* Since we need to acquire buf_pool mutex to discard
+	the uncompressed frame and because page_hash mutex resides
+	below buf_pool mutex in sync ordering therefore we must
+	first release the page_hash mutex. This means that the
+	block in question can move out of page_hash. Therefore
+	we need to check again if the block is still in page_hash. */
+	buf_pool_mutex_enter(buf_pool);
+
+	bpage = buf_page_hash_get(buf_pool, space, offset, NULL);
+
+	if (bpage) {
+		buf_LRU_free_block(bpage, FALSE, NULL);
+	}
+
+	buf_pool_mutex_exit(buf_pool);
+}
+
+/********************************************************************//**
 Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
@@ -2312,6 +2433,8 @@ buf_page_get_zip(
 {
 	buf_page_t*	bpage;
 	mutex_t*	block_mutex;
+	mutex_t*	hash_mutex;
+	ibool		discard_attempted = FALSE;
 	ibool		must_read;
 	unsigned	access_time;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
@@ -2322,9 +2445,12 @@ buf_page_get_zip(
 	buf_pool->stat.n_page_gets++;
 
 	for (;;) {
-		buf_pool_mutex_enter(buf_pool);
 lookup:
-		bpage = buf_page_hash_get(buf_pool, space, offset);
+
+		/* The following call will also grab the page_hash
+		mutex if the page is found. */
+		bpage = buf_page_hash_get(buf_pool, space, offset,
+					  &hash_mutex);
 		if (bpage) {
 			ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 			break;
@@ -2332,8 +2458,7 @@ lookup:
 
 		/* Page not in buf_pool: needs to be read from file */
 
-		buf_pool_mutex_exit(buf_pool);
-
+		ut_ad(!hash_mutex);
 		buf_read_page(space, zip_size, offset);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2341,10 +2466,12 @@ lookup:
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 	}
 
+	ut_ad(buf_page_hash_mutex_own(buf_pool, bpage));
+
 	if (UNIV_UNLIKELY(!bpage->zip.data)) {
 		/* There is no compressed page. */
 err_exit:
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(hash_mutex);
 		return(NULL);
 	}
 
@@ -2364,17 +2491,17 @@ err_exit:
 		bpage->buf_fix_count++;
 		goto got_block;
 	case BUF_BLOCK_FILE_PAGE:
-		block_mutex = &((buf_block_t*) bpage)->mutex;
-		mutex_enter(block_mutex);
-
 		/* Discard the uncompressed page frame if possible. */
-		if (buf_LRU_free_block(bpage, FALSE, NULL)
-		    == BUF_LRU_FREED) {
-
-			mutex_exit(block_mutex);
+		if (!discard_attempted) {
+			mutex_exit(hash_mutex);
+			buf_block_try_discard_uncompressed(space,
+							   offset);
+			discard_attempted = TRUE;
 			goto lookup;
 		}
 
+		block_mutex = &((buf_block_t*) bpage)->mutex;
+		mutex_enter(block_mutex);
 		buf_block_buf_fix_inc((buf_block_t*) bpage,
 				      __FILE__, __LINE__);
 		goto got_block;
@@ -2387,8 +2514,7 @@ got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 	access_time = buf_page_is_accessed(bpage);
 
-	buf_pool_mutex_exit(buf_pool);
-
+	mutex_exit(hash_mutex);
 	mutex_exit(block_mutex);
 
 	buf_page_set_accessed_make_young(bpage, access_time);
@@ -2696,8 +2822,6 @@ buf_block_is_uncompressed(
 	const buf_block_t*	block)		/*!< in: pointer to block,
 						not dereferenced */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
 	if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
 		/* The pointer should be aligned. */
 		return(FALSE);
@@ -2731,6 +2855,9 @@ buf_page_get_gen(
 	unsigned	access_time;
 	ulint		fix_type;
 	ibool		must_read;
+	mutex_t*	hash_mutex;
+	mutex_t*	block_mutex;
+	buf_page_t*	hash_bpage;
 	ulint		retries = 0;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 
@@ -2751,10 +2878,11 @@ buf_page_get_gen(
 #endif
 	buf_pool->stat.n_page_gets++;
 	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
 loop:
 	block = guess;
-	buf_pool_mutex_enter(buf_pool);
 
+	mutex_enter(hash_mutex);
 	if (block) {
 		/* If the guess is a compressed page descriptor that
 		has been allocated by buf_buddy_alloc(), it may have
@@ -2769,6 +2897,8 @@ loop:
 		    || space != block->page.space
 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
+			/* Our guess was bogus or things have changed
+			since. */
 			block = guess = NULL;
 		} else {
 			ut_ad(!block->page.in_zip_hash);
@@ -2782,7 +2912,8 @@ loop:
 	}
 
 loop2:
-	if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
+	if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
+		mutex_exit(hash_mutex);
 		block = NULL;
 	}
 
@@ -2790,20 +2921,22 @@ loop2:
 		/* Page not in buf_pool: needs to be read from file */
 
 		if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+			mutex_enter(hash_mutex);
 			block = (buf_block_t*) buf_pool_watch_set(
 				space, offset, fold);
 
 			if (UNIV_LIKELY_NULL(block)) {
 
+				ut_ad(mutex_own(hash_mutex));
 				goto got_block;
 			}
+			mutex_exit(hash_mutex);
 		}
 
-		buf_pool_mutex_exit(buf_pool);
-
 		if (mode == BUF_GET_IF_IN_POOL
 		    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
 
+			ut_ad(!mutex_own(hash_mutex));
 			return(NULL);
 		}
 
@@ -2840,6 +2973,14 @@ loop2:
 got_block:
 	ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
 
+	/* We can release hash_mutex after we acquire block_mutex to
+	make sure that no state change takes place. */
+	block_mutex = buf_page_get_mutex(&block->page);
+	mutex_enter(block_mutex);
+
+	/* Now safe to release page_hash mutex */
+	mutex_exit(hash_mutex);
+
 	must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
 
 	if (must_read && mode == BUF_GET_IF_IN_POOL) {
@@ -2847,7 +2988,7 @@ got_block:
 		/* The page is being read to buffer pool,
 		but we cannot wait around for the read to
 		complete. */
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(block_mutex);
 
 		return(NULL);
 	}
@@ -2862,63 +3003,68 @@ got_block:
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
 		bpage = &block->page;
-		/* Protect bpage->buf_fix_count. */
-		mutex_enter(&buf_pool->zip_mutex);
 
 		if (bpage->buf_fix_count
 		    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 			/* This condition often occurs when the buffer
 			is not buffer-fixed, but I/O-fixed by
 			buf_page_init_for_read(). */
-			mutex_exit(&buf_pool->zip_mutex);
+			mutex_exit(block_mutex);
 wait_until_unfixed:
 			/* The block is buffer-fixed or I/O-fixed.
 			Try again later. */
-			buf_pool_mutex_exit(buf_pool);
 			os_thread_sleep(WAIT_FOR_READ);
   
 			goto loop;
 		}
 
 		/* Allocate an uncompressed page. */
-		buf_pool_mutex_exit(buf_pool);
-		mutex_exit(&buf_pool->zip_mutex);
-
+		mutex_exit(block_mutex);
 		block = buf_LRU_get_free_block(buf_pool, 0);
 		ut_a(block);
 
 		buf_pool_mutex_enter(buf_pool);
-		mutex_enter(&block->mutex);
 
-		{
-			buf_page_t*	hash_bpage;
+		/* As we have released the page_hash mutex and the
+		block_mutex to allocate an uncompressed page it is
+		possible that page_hash might have changed. We do
+		another lookup here while holding the buf_pool mutex
+		to verify that bpage is indeed still a part of
+		page_hash. */
+		mutex_enter(hash_mutex);
+		hash_bpage = buf_page_hash_get_low(buf_pool, space,
+						   offset, fold);
 
-			hash_bpage = buf_page_hash_get_low(
-				buf_pool, space, offset, fold);
+		mutex_enter(&block->mutex);
+		if (UNIV_UNLIKELY(bpage != hash_bpage)) {
+			/* The buf_pool->page_hash was modified
+			while buf_pool_mutex was released.
+			Free the block that was allocated. */
 
-			if (UNIV_UNLIKELY(bpage != hash_bpage)) {
-				/* The buf_pool->page_hash was modified
-				while buf_pool->mutex was released.
-				Free the block that was allocated. */
+			buf_LRU_block_free_non_file_page(block);
+			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&block->mutex);
 
-				buf_LRU_block_free_non_file_page(block);
-				mutex_exit(&block->mutex);
+			block = (buf_block_t*) hash_bpage;
 
-				block = (buf_block_t*) hash_bpage;
-				goto loop2;
-			}
+			/* Note that we are still holding the
+			hash_mutex which is fine as this is what
+			we expect when we move to loop2 above. */
+			goto loop2;
 		}
 
 		if (UNIV_UNLIKELY
 		    (bpage->buf_fix_count
 		     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
 
+			mutex_exit(hash_mutex);
 			/* The block was buffer-fixed or I/O-fixed
 			while buf_pool->mutex was not held by this thread.
 			Free the block that was allocated and try again.
 			This should be extremely unlikely. */
 
 			buf_LRU_block_free_non_file_page(block);
+			buf_pool_mutex_exit(buf_pool);
 			mutex_exit(&block->mutex);
 
 			goto wait_until_unfixed;
@@ -2961,6 +3107,7 @@ wait_until_unfixed:
 
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
+		mutex_exit(hash_mutex);
 		mutex_exit(&block->mutex);
 		mutex_exit(&buf_pool->zip_mutex);
 		buf_pool->n_pend_unzip++;
@@ -2984,8 +3131,8 @@ wait_until_unfixed:
 		mutex_enter(&block->mutex);
 		block->page.buf_fix_count--;
 		buf_block_set_io_fix(block, BUF_IO_NONE);
-		mutex_exit(&block->mutex);
 		buf_pool->n_pend_unzip--;
+		buf_pool_mutex_exit(buf_pool);
 		rw_lock_x_unlock(&block->lock);
 
 		break;
@@ -2999,9 +3146,9 @@ wait_until_unfixed:
 		break;
 	}
 
+	ut_ad(!mutex_own(hash_mutex));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
-	mutex_enter(&block->mutex);
 #if UNIV_WORD_SIZE == 4
 	/* On 32-bit systems, there is no padding in buf_page_t.  On
 	other systems, Valgrind could complain about uninitialized pad
@@ -3017,8 +3164,6 @@ wait_until_unfixed:
 
 	access_time = buf_page_is_accessed(&block->page);
 
-	buf_pool_mutex_exit(buf_pool);
-
 	buf_page_set_accessed_make_young(&block->page, access_time);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
@@ -3083,6 +3228,7 @@ wait_until_unfixed:
 	ut_a(ibuf_count_get(buf_block_get_space(block),
 			    buf_block_get_page_no(block)) == 0);
 #endif
+	ut_ad(!mutex_own(hash_mutex));
 	return(block);
 }
 
@@ -3325,22 +3471,24 @@ buf_page_try_get_func(
 	ibool		success;
 	ulint		fix_type;
 	buf_pool_t*	buf_pool = buf_pool_get(space_id, page_no);
+	mutex_t*	hash_mutex;
 
 	ut_ad(mtr);
 	ut_ad(mtr->state == MTR_ACTIVE);
 
-	buf_pool_mutex_enter(buf_pool);
-	block = buf_block_hash_get(buf_pool, space_id, page_no);
+	block = buf_block_hash_get(buf_pool, space_id, page_no, &hash_mutex);
 
 	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-		buf_pool_mutex_exit(buf_pool);
+		if (block) {
+			mutex_exit(hash_mutex);
+		}
 		return(NULL);
 	}
 
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
 
 	mutex_enter(&block->mutex);
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(hash_mutex);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
@@ -3430,6 +3578,7 @@ buf_page_init(
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(buf_page_hash_mutex_get(buf_pool, fold)));
 	ut_ad(mutex_own(&(block->mutex)));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
@@ -3516,6 +3665,7 @@ buf_page_init_for_read(
 	buf_block_t*	block;
 	buf_page_t*	bpage	= NULL;
 	buf_page_t*	watch_page;
+	mutex_t*	hash_mutex;
 	mtr_t		mtr;
 	ulint		fold;
 	ibool		lru	= FALSE;
@@ -3555,14 +3705,17 @@ buf_page_init_for_read(
 	}
 
 	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
 
 	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(hash_mutex);
 
 	watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
 	if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
 		/* The page is already in the buffer pool. */
 		watch_page = NULL;
 err_exit:
+		mutex_exit(hash_mutex);
 		if (block) {
 			mutex_enter(&block->mutex);
 			buf_LRU_block_free_non_file_page(block);
@@ -3584,11 +3737,13 @@ err_exit:
 
 	if (block) {
 		bpage = &block->page;
+
 		mutex_enter(&block->mutex);
 
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
 		buf_page_init(space, offset, fold, block);
+		mutex_exit(hash_mutex);
 
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
@@ -3632,6 +3787,7 @@ err_exit:
 
 		mutex_exit(&block->mutex);
 	} else {
+		mutex_exit(hash_mutex);
 		/* Defer buf_buddy_alloc() until after the block has
 		been found not to exist.  The buf_buddy_alloc() and
 		buf_buddy_free() calls may be expensive because of
@@ -3647,6 +3803,8 @@ err_exit:
 		/* Initialize the buf_pool pointer. */
 		bpage->buf_pool = buf_pool;
 
+		mutex_enter(hash_mutex);
+
 		/* If buf_buddy_alloc() allocated storage from the LRU list,
 		it released and reacquired buf_pool->mutex.  Thus, we must
 		check the page_hash again, as it may have been modified. */
@@ -3655,11 +3813,12 @@ err_exit:
 			watch_page = buf_page_hash_get_low(
 				buf_pool, space, offset, fold);
 
-			if (watch_page
+			if (UNIV_UNLIKELY(watch_page
 			    && !buf_pool_watch_is_sentinel(buf_pool,
-				   			   watch_page)) {
+				   			   watch_page))) {
 
 				/* The block was added by some other thread. */
+				mutex_exit(hash_mutex);
 				watch_page = NULL;
 				buf_buddy_free(buf_pool, bpage, sizeof *bpage);
 				buf_buddy_free(buf_pool, data, zip_size);
@@ -3695,6 +3854,7 @@ err_exit:
 		ut_d(bpage->in_page_hash = TRUE);
 
 		if (UNIV_LIKELY_NULL(watch_page)) {
+
 			/* Preserve the reference count. */
 			ulint	buf_fix_count = watch_page->buf_fix_count;
 			ut_a(buf_fix_count > 0);
@@ -3706,6 +3866,8 @@ err_exit:
 		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
 			    bpage);
 
+		mutex_exit(hash_mutex);
+
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 		buf_LRU_insert_zip_clean(bpage);
@@ -3724,6 +3886,7 @@ func_exit:
 		mtr_commit(&mtr);
 	}
 
+	ut_ad(!mutex_own(hash_mutex));
 	ut_ad(!bpage || buf_page_in_file(bpage));
 	return(bpage);
 }
@@ -3750,6 +3913,7 @@ buf_page_create(
 	buf_block_t*	free_block	= NULL;
 	ulint		time_ms		= ut_time_ms();
 	buf_pool_t*	buf_pool 	= buf_pool_get(space, offset);
+	mutex_t*	hash_mutex;
 
 	ut_ad(mtr);
 	ut_ad(mtr->state == MTR_ACTIVE);
@@ -3758,8 +3922,10 @@ buf_page_create(
 	free_block = buf_LRU_get_free_block(buf_pool, 0);
 
 	fold = buf_page_address_fold(space, offset);
+	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
 
 	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(hash_mutex);
 
 	block = (buf_block_t*) buf_page_hash_get_low(
 		buf_pool, space, offset, fold);
@@ -3776,6 +3942,7 @@ buf_page_create(
 
 		/* Page can be found in buf_pool */
 		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(hash_mutex);
 
 		buf_block_free(free_block);
 
@@ -3798,6 +3965,8 @@ buf_page_create(
 
 	buf_page_init(space, offset, fold, block);
 
+	mutex_exit(hash_mutex);
+
 	/* The block must be put to the LRU list */
 	buf_LRU_add_block(&block->page, FALSE);
 
@@ -4223,10 +4392,14 @@ buf_pool_validate_instance(
 	ulint		n_flush		= 0;
 	ulint		n_free		= 0;
 	ulint		n_zip		= 0;
-
+	ulint		fold		= 0;
+	ulint		space		= 0;
+	ulint		offset		= 0;
+  
 	ut_ad(buf_pool);
 
 	buf_pool_mutex_enter(buf_pool);
+	hash_mutex_enter_all(buf_pool->page_hash);
 
 	chunk = buf_pool->chunks;
 
@@ -4251,11 +4424,13 @@ buf_pool_validate_instance(
 				break;
 
 			case BUF_BLOCK_FILE_PAGE:
-				ut_a(buf_page_hash_get(buf_pool,
-						       buf_block_get_space(
-							       block),
-						       buf_block_get_page_no(
-							       block))
+				space = buf_block_get_space(block);
+				offset = buf_block_get_page_no(block);
+				fold = buf_page_address_fold(space, offset);
+				ut_a(buf_page_hash_get_low(buf_pool,
+							   space,
+							   offset,
+							   fold)
 				     == &block->page);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -4342,8 +4517,9 @@ buf_pool_validate_instance(
 		we have acquired buf_pool->zip_mutex above which acts
 		as the 'block->mutex' for these bpages. */
 		ut_a(!b->oldest_modification);
-		ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
-
+		fold = buf_page_address_fold(b->space, b->offset);
+		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
+					   fold) == b);
 		n_lru++;
 		n_zip++;
 	}
@@ -4394,11 +4570,14 @@ buf_pool_validate_instance(
 			ut_error;
 			break;
 		}
-		ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
+		fold = buf_page_address_fold(b->space, b->offset);
+		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
+					   fold) == b);
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
 
+	hash_mutex_exit_all(buf_pool->page_hash);
 	buf_flush_list_mutex_exit(buf_pool);
 
 	mutex_exit(&buf_pool->zip_mutex);

=== modified file 'storage/innobase/buf/buf0flu.c'
--- a/storage/innobase/buf/buf0flu.c	revid:marko.makela@stripped
+++ b/storage/innobase/buf/buf0flu.c	revid:inaam.rana@stripped
@@ -1300,7 +1300,7 @@ buf_flush_try_neighbors(
 		buf_pool_mutex_enter(buf_pool);
 
 		/* We only want to flush pages from this buffer pool. */
-		bpage = buf_page_hash_get(buf_pool, space, i);
+		bpage = buf_page_hash_get(buf_pool, space, i, NULL);
 
 		if (!bpage) {
 

=== modified file 'storage/innobase/buf/buf0lru.c'
--- a/storage/innobase/buf/buf0lru.c	revid:marko.makela@stripped
+++ b/storage/innobase/buf/buf0lru.c	revid:inaam.rana@stripped
@@ -121,7 +121,11 @@ UNIV_INTERN uint	buf_LRU_old_threshold_m
 /******************************************************************//**
 Takes a block out of the LRU list and page hash table.
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool->zip_mutex will be released.
+the object will be freed.
+
+The caller must hold buf_pool_mutex, the buf_page_get_mutex() mutex
+and the appropriate hash_mutex. This function will release the
+buf_page_get_mutex() and the hash_mutex.
 
 If a compressed page or a compressed-only block descriptor is freed,
 other compressed pages or compressed-only block descriptors may be
@@ -369,7 +373,14 @@ scan_again:
 
 			all_freed = FALSE;
 		} else {
+			ulint fold = buf_page_address_fold(bpage->space,
+							   bpage->offset);
+			mutex_t* hash_mutex = buf_page_hash_mutex_get(buf_pool,
+								      fold);
+
 			mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+			mutex_enter(hash_mutex);
 			mutex_enter(block_mutex);
 
 			if (bpage->buf_fix_count > 0) {
@@ -380,7 +391,8 @@ scan_again:
 				the modifications to the file */
 
 				all_freed = FALSE;
-
+				mutex_exit(hash_mutex);
+				mutex_exit(block_mutex);
 				goto next_page;
 			}
 
@@ -434,6 +446,7 @@ scan_again:
 				zip_size = buf_page_get_zip_size(bpage);
 				page_no = buf_page_get_page_no(bpage);
 
+				mutex_exit(hash_mutex);
 				mutex_exit(block_mutex);
 
 				/* Note that the following call will acquire
@@ -457,8 +470,7 @@ scan_again:
 							       bpage);
 			} else {
 				/* The block_mutex should have been
-				released by buf_LRU_block_remove_hashed_page()
-				when it returns BUF_BLOCK_ZIP_FREE. */
+				released by buf_LRU_block_remove_hashed_page() */
 				ut_ad(block_mutex == &buf_pool->zip_mutex);
 				ut_ad(!mutex_own(block_mutex));
 
@@ -476,14 +488,14 @@ scan_again:
 					mutex_exit(block_mutex);
 				}
 
-				goto next_page_no_mutex;
 			}
-next_page:
-			mutex_exit(block_mutex);
-		}
 
-next_page_no_mutex:
+			ut_ad(!mutex_own(hash_mutex));
+			ut_ad(!mutex_own(block_mutex));
+		}
+next_page:
 		bpage = prev_bpage;
+
 	}
 
 	buf_pool_mutex_exit(buf_pool);
@@ -602,10 +614,7 @@ buf_LRU_free_from_unzip_LRU_list(
 		ut_ad(block->in_unzip_LRU_list);
 		ut_ad(block->page.in_LRU_list);
 
-		mutex_enter(&block->mutex);
 		freed = buf_LRU_free_block(&block->page, FALSE, NULL);
-		mutex_exit(&block->mutex);
-
 		switch (freed) {
 		case BUF_LRU_FREED:
 			return(TRUE);
@@ -658,17 +667,12 @@ buf_LRU_free_from_common_LRU_list(
 
 		enum buf_lru_free_block_status	freed;
 		unsigned			accessed;
-		mutex_t*			block_mutex
-			= buf_page_get_mutex(bpage);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
-		mutex_enter(block_mutex);
 		accessed = buf_page_is_accessed(bpage);
 		freed = buf_LRU_free_block(bpage, TRUE, NULL);
-		mutex_exit(block_mutex);
-
 		switch (freed) {
 		case BUF_LRU_FREED:
 			/* Keep track of pages that are evicted without
@@ -1446,9 +1450,8 @@ NOTE: If this function returns BUF_LRU_F
 release buf_pool_mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call.  No other
-buf_page_get_mutex() may be held when calling this function.
+The caller must hold buf_pool_mutex and must not hold any
+buf_page_get_mutex() when calling this function.
 @return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
 BUF_LRU_NOT_FREED otherwise. */
 UNIV_INTERN
@@ -1465,13 +1468,20 @@ buf_LRU_free_block(
 {
 	buf_page_t*	b = NULL;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	enum buf_lru_free_block_status	ret;
+	const ulint	fold = buf_page_address_fold(bpage->space,
+						     bpage->offset);
+	mutex_t*	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
+
 	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(mutex_own(block_mutex));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
-	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+
+	mutex_enter(hash_mutex);
+	mutex_enter(block_mutex);
+
 #if UNIV_WORD_SIZE == 4
 	/* On 32-bit systems, there is no padding in buf_page_t.  On
 	other systems, Valgrind could complain about uninitialized pad
@@ -1482,7 +1492,7 @@ buf_LRU_free_block(
 	if (!buf_page_can_relocate(bpage)) {
 
 		/* Do not free buffer-fixed or I/O-fixed blocks. */
-		return(BUF_LRU_NOT_FREED);
+		goto no_free_exit;
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1494,33 +1504,56 @@ buf_LRU_free_block(
 		/* Do not completely free dirty blocks. */
 
 		if (bpage->oldest_modification) {
-			return(BUF_LRU_NOT_FREED);
+			goto no_free_exit;
 		}
-	} else if (bpage->oldest_modification) {
-		/* Do not completely free dirty blocks. */
+	} else if ((bpage->oldest_modification)
+		   && (buf_page_get_state(bpage)
+		       != BUF_BLOCK_FILE_PAGE)) {
 
-		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-			ut_ad(buf_page_get_state(bpage)
-			      == BUF_BLOCK_ZIP_DIRTY);
-			return(BUF_LRU_NOT_FREED);
-		}
+		ut_ad(buf_page_get_state(bpage)
+		      == BUF_BLOCK_ZIP_DIRTY);
+
+		goto no_free_exit;
 
-		goto alloc;
 	} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+
+		mutex_exit(block_mutex);
 		/* Allocate the control block for the compressed page.
 		If it cannot be allocated (without freeing a block
 		from the LRU list), refuse to free bpage. */
-alloc:
 		buf_pool_mutex_exit_forbid(buf_pool);
 		b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
 		buf_pool_mutex_exit_allow(buf_pool);
 
+		mutex_enter(block_mutex);
+
+		/* The block may get buffer fixed while we released
+		the block mutex. In that case we free the newly
+		allocated descriptor and return */
+		if (!buf_page_can_relocate(bpage)) {
+			if (b) {
+				buf_buddy_free(buf_pool, b, sizeof(*b));
+			}
+no_free_exit:
+			ret = BUF_LRU_NOT_FREED;
+func_exit:
+			mutex_exit(hash_mutex);
+			mutex_exit(block_mutex);
+			return(ret);
+		}
+
 		if (UNIV_UNLIKELY(!b)) {
-			return(BUF_LRU_CANNOT_RELOCATE);
+			ret = BUF_LRU_CANNOT_RELOCATE;
+			goto func_exit;
 		}
 
 		memcpy(b, bpage, sizeof *b);
 	}
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(bpage->in_LRU_list);
+	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
 
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
@@ -1530,21 +1563,29 @@ alloc:
 	}
 #endif /* UNIV_DEBUG */
 
+	ut_ad(mutex_own(hash_mutex));
+	ut_ad(buf_page_can_relocate(bpage));
+
 	if (buf_LRU_block_remove_hashed_page(bpage, zip)
 	    != BUF_BLOCK_ZIP_FREE) {
-		ut_a(bpage->buf_fix_count == 0);
+
+		/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
+		then it was a compressed page with an uncompressed frame and
+		we are interested in freeing only the uncompressed frame.
+		Therefore we have to reinsert the compressed page descriptor
+		into the LRU and page_hash (and possibly flush_list).
+		if b == NULL then it was a regular page that has been freed */
 
 		if (b) {
-			buf_page_t*	hash_b;
 			buf_page_t*	prev_b	= UT_LIST_GET_PREV(LRU, b);
 
-			const ulint	fold = buf_page_address_fold(
-				bpage->space, bpage->offset);
-
-			hash_b	= buf_page_hash_get_low(
-				buf_pool, bpage->space, bpage->offset, fold);
+			mutex_enter(hash_mutex);
+			mutex_enter(block_mutex);
 
-			ut_a(!hash_b);
+			ut_a(!buf_page_hash_get_low(buf_pool,
+						    bpage->space,
+						    bpage->offset,
+						    fold));
 
 			b->state = b->oldest_modification
 				? BUF_BLOCK_ZIP_DIRTY
@@ -1636,6 +1677,9 @@ alloc:
 			buf_pool->mutex and block_mutex. */
 			b->buf_fix_count++;
 			b->io_fix = BUF_IO_READ;
+
+			mutex_exit(hash_mutex);
+			mutex_exit(block_mutex);
 		}
 
 		if (buf_pool_mutex_released) {
@@ -1643,7 +1687,6 @@ alloc:
 		}
 
 		buf_pool_mutex_exit(buf_pool);
-		mutex_exit(block_mutex);
 
 		/* Remove possible adaptive hash index on the page.
 		The page was declared uninitialized by
@@ -1675,7 +1718,6 @@ alloc:
 		}
 
 		buf_pool_mutex_enter(buf_pool);
-		mutex_enter(block_mutex);
 
 		if (b) {
 			mutex_enter(&buf_pool->zip_mutex);
@@ -1685,12 +1727,6 @@ alloc:
 		}
 
 		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-	} else {
-		/* The block_mutex should have been released by
-		buf_LRU_block_remove_hashed_page() when it returns
-		BUF_BLOCK_ZIP_FREE. */
-		ut_ad(block_mutex == &buf_pool->zip_mutex);
-		mutex_enter(block_mutex);
 	}
 
 	return(BUF_LRU_FREED);
@@ -1761,7 +1797,11 @@ buf_LRU_block_free_non_file_page(
 /******************************************************************//**
 Takes a block out of the LRU list and page hash table.
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool->zip_mutex will be released.
+the object will be freed.
+
+The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
+and the appropriate hash_mutex. This function will release the
+buf_page_get_mutex() and the hash_mutex.
 
 If a compressed page or a compressed-only block descriptor is freed,
 other compressed pages or compressed-only block descriptors may be
@@ -1781,10 +1821,15 @@ buf_LRU_block_remove_hashed_page(
 	ulint			fold;
 	const buf_page_t*	hashed_bpage;
 	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
+	mutex_t*		hash_mutex;
 
-	ut_ad(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+  	ut_ad(bpage);
+  	ut_ad(buf_pool_mutex_own(buf_pool));
+  	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	fold = buf_page_address_fold(bpage->space, bpage->offset);
+	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
+	ut_ad(mutex_own(hash_mutex));
 
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
@@ -1869,9 +1914,8 @@ buf_LRU_block_remove_hashed_page(
 		break;
 	}
 
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-	hashed_bpage = buf_page_hash_get_low(
-		buf_pool, bpage->space, bpage->offset, fold);
+	hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
+					     bpage->offset, fold);
 
 	if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
 		fprintf(stderr,
@@ -1891,6 +1935,7 @@ buf_LRU_block_remove_hashed_page(
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		mutex_exit(buf_page_get_mutex(bpage));
+		mutex_exit(hash_mutex);
 		buf_pool_mutex_exit(buf_pool);
 		buf_print();
 		buf_LRU_print();
@@ -1915,6 +1960,7 @@ buf_LRU_block_remove_hashed_page(
 		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
 		mutex_exit(&buf_pool->zip_mutex);
+		mutex_exit(hash_mutex);
 		buf_pool_mutex_exit_forbid(buf_pool);
 
 		buf_buddy_free(
@@ -1936,6 +1982,28 @@ buf_LRU_block_remove_hashed_page(
 				 UNIV_PAGE_SIZE);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
+		/* Question: If we release bpage and hash mutex here
+		then what protects us against:
+		1) Some other thread buffer fixing this page
+		2) Some other thread trying to read this page and
+		not finding it in buffer pool attempting to read it
+		from the disk.
+		Answer:
+		1) Cannot happen because the page is no longer in the
+		page_hash. Only possibility is when while invalidating
+		a tablespace we buffer fix the prev_page in LRU to
+		avoid relocation during the scan. But that is not
+		possible because we are holding buf_pool mutex.
+
+		2) Not possible because in buf_page_init_for_read()
+		we do a look up of page_hash while holding buf_pool
+		mutex and since we are holding buf_pool mutex here
+		and by the time we'll release it in the caller we'd
+		have inserted the compressed only descriptor in the
+		page_hash. */
+		mutex_exit(hash_mutex);
+		mutex_exit(&((buf_block_t*) bpage)->mutex);
+
 		if (zip && bpage->zip.data) {
 			/* Free the compressed page. */
 			void*	data = bpage->zip.data;
@@ -1944,7 +2012,6 @@ buf_LRU_block_remove_hashed_page(
 			ut_ad(!bpage->in_free_list);
 			ut_ad(!bpage->in_flush_list);
 			ut_ad(!bpage->in_LRU_list);
-			mutex_exit(&((buf_block_t*) bpage)->mutex);
 			buf_pool_mutex_exit_forbid(buf_pool);
 
 			buf_buddy_free(
@@ -1952,7 +2019,6 @@ buf_LRU_block_remove_hashed_page(
 				page_zip_get_size(&bpage->zip));
 
 			buf_pool_mutex_exit_allow(buf_pool);
-			mutex_enter(&((buf_block_t*) bpage)->mutex);
 			page_zip_set_size(&bpage->zip, 0);
 		}
 
@@ -1984,11 +2050,12 @@ buf_LRU_block_free_hashed_page(
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 #endif
-	ut_ad(mutex_own(&block->mutex));
 
+	mutex_enter(&block->mutex);
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	buf_LRU_block_free_non_file_page(block);
+	mutex_exit(&block->mutex);
 }
 
 /**********************************************************************//**

=== modified file 'storage/innobase/buf/buf0rea.c'
--- a/storage/innobase/buf/buf0rea.c	revid:marko.makela@stripped
+++ b/storage/innobase/buf/buf0rea.c	revid:inaam.rana@stripped
@@ -322,7 +322,7 @@ buf_read_ahead_linear(
 	fail_count = 0;
 
 	for (i = low; i < high; i++) {
-		bpage = buf_page_hash_get(buf_pool, space, i);
+		bpage = buf_page_hash_get(buf_pool, space, i, NULL);
 
 		if (bpage == NULL || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
@@ -360,7 +360,7 @@ buf_read_ahead_linear(
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, space, offset, NULL);
 
 	if (bpage == NULL) {
 		buf_pool_mutex_exit(buf_pool);

=== modified file 'storage/innobase/ha/ha0ha.c'
--- a/storage/innobase/ha/ha0ha.c	revid:marko.makela@stripped
+++ b/storage/innobase/ha/ha0ha.c	revid:inaam.rana@stripped
@@ -60,11 +60,6 @@ ha_create_func(
 	ut_ad(ut_is_2pow(n_mutexes));
 	table = hash_create(n);
 
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-	table->adaptive = TRUE;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 	/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
 	but in practise it never should in this case, hence the asserts. */
 
@@ -104,7 +99,8 @@ ha_clear(
 	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+	ut_ad(!table->adaptive
+	       || rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
 
 #ifndef UNIV_HOTBACKUP

=== modified file 'storage/innobase/ha/hash0hash.c'
--- a/storage/innobase/ha/hash0hash.c	revid:marko.makela@stripped
+++ b/storage/innobase/ha/hash0hash.c	revid:inaam.rana@stripped
@@ -91,6 +91,28 @@ hash_mutex_exit_all(
 		mutex_exit(table->mutexes + i);
 	}
 }
+
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	mutex_t*	keep_mutex)	/*!< in: mutex to keep */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_t* mutex = table->mutexes + i;
+		if (UNIV_LIKELY(keep_mutex != mutex)) {
+			mutex_exit(mutex);
+		}
+	}
+
+	ut_ad(mutex_own(keep_mutex));
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
@@ -142,9 +164,6 @@ hash_table_free(
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifndef UNIV_HOTBACKUP
-	ut_a(table->mutexes == NULL);
-#endif /* !UNIV_HOTBACKUP */
 
 	ut_free(table->array);
 	mem_free(table);

=== modified file 'storage/innobase/ibuf/ibuf0ibuf.c'
--- a/storage/innobase/ibuf/ibuf0ibuf.c	revid:marko.makela@stripped
+++ b/storage/innobase/ibuf/ibuf0ibuf.c	revid:inaam.rana@stripped
@@ -3683,10 +3683,11 @@ check_watch:
 		buf_page_t*	bpage;
 		ulint		fold = buf_page_address_fold(space, page_no);
 		buf_pool_t*	buf_pool = buf_pool_get(space, page_no);
+		mutex_t*	hash_mutex = buf_page_hash_mutex_get(buf_pool, fold);
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(hash_mutex);
 		bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(hash_mutex);
 
 		if (UNIV_LIKELY_NULL(bpage)) {
 			/* A buffer pool watch has been set or the

=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/buf0buf.h	revid:inaam.rana@stripped
@@ -1073,35 +1073,55 @@ UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
 /*==================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: offset of the page
-					within space */
-	ulint		fold);		/*!< in: buf_page_address_fold(
-					space, offset) */
+	buf_pool_t*	buf_pool,/*!< buffer pool instance */
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space */
+	ulint		fold);	/*!< in: buf_page_address_fold(space, offset) */
 /******************************************************************//**
 Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found or not a real control block */
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is up to the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
+@return	block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get(
 /*==============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		space,		/*!< in: space id */
-	ulint		offset);	/*!< in: offset of the page
-					within space */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex);		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 /******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found or an
+uncompressed page frame does not exist.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is upto the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get(
 /*===============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		space,		/*!< in: space id */
-	ulint		offset);	/*!< in: offset of the page
-					within space */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex);		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
 @return	length of the free list */
@@ -1194,7 +1214,16 @@ struct buf_page_struct{
 					BUF_BLOCK_READY_FOR_USE to
 					BUF_BLOCK_MEMORY need not be
 					protected by buf_page_get_mutex().
-					@see enum buf_page_state */
+					@see enum buf_page_state.
+					State changes that are relevant
+					to page_hash are additionally
+					protected by the appropriate
+					page_hash mutex i.e.: if a page
+					is in page_hash or is being
+					added to/removed from page_hash
+					then the corresponding changes
+					must also be protected by
+					page_hash mutex. */
 #ifndef UNIV_HOTBACKUP
 	unsigned	flush_type:2;	/*!< if this block is currently being
 					flushed to disk, this tells the
@@ -1535,7 +1564,14 @@ struct buf_pool_struct{
 	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
 					buf_block_t file pages,
 					buf_page_in_file() == TRUE,
-					indexed by (space_id, offset) */
+					indexed by (space_id, offset).
+					page_hash is protected by an
+					array of mutexes.
+					Changes in page_hash are protected
+					by buf_pool_mutex and the relevant
+					page_hash mutex. Lookups can happen
+					while holding the buf_pool_mutex or
+					the relevant page_hash mutex. */
 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
 					whose frames are allocated to the
 					zip buddy system,
@@ -1693,6 +1729,20 @@ Use these instead of accessing buf_pool_
 
 
 
+/** Get appropriate page_hash_mutex. */
+#define buf_page_hash_mutex_get(b, f)		\
+	hash_get_mutex(b->page_hash, f)
+
+/** Test if page_hash mutex is owned. */
+#define buf_page_hash_mutex_own(b, p)			\
+	mutex_own(buf_page_hash_mutex_get(b,		\
+		  buf_page_address_fold(p->space,	\
+					p->offset)))
+#define buf_block_hash_mutex_own(b, p)			\
+	mutex_own(buf_page_hash_mutex_get(b,		\
+		  buf_page_address_fold(p->page.space,	\
+					p->page.offset)))
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /** Forbid the release of the buffer pool mutex. */
 # define buf_pool_mutex_exit_forbid(b) do {	\

=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic	revid:marko.makela@stripped
+++ b/storage/innobase/include/buf0buf.ic	revid:inaam.rana@stripped
@@ -952,18 +952,25 @@ UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
 /*==================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: offset of the page
-					within space */
-	ulint		fold)		/*!< in: buf_page_address_fold(
-					space, offset) */
+	buf_pool_t*	buf_pool,/*!< buffer pool instance */
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space */
+	ulint		fold)	/*!< in: buf_page_address_fold(space, offset) */
 {
 	buf_page_t*	bpage;
 
+#ifdef UNIV_DEBUG
+	ulint		hash_fold;
+	mutex_t*	hash_mutex;
+
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(fold == buf_page_address_fold(space, offset));
+
+	hash_fold = buf_page_address_fold(space, offset);
+	ut_ad(hash_fold == fold);
+
+	hash_mutex = hash_get_mutex(buf_pool->page_hash, fold);
+	ut_ad(mutex_own(hash_mutex));
+#endif /* UNIV_DEBUG */
 
 	/* Look for the page in the hash table */
 
@@ -988,46 +995,106 @@ buf_page_hash_get_low(
 
 /******************************************************************//**
 Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found or not a real control block */
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is up to the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
+@return	block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get(
 /*==============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		space,		/*!< in: space id */
-	ulint		offset)		/*!< in: offset of the page
-					within space */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex)		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 {
 	buf_page_t*	bpage;
-	ulint		fold	= buf_page_address_fold(space, offset);
+	ulint		fold;
+	mutex_t*	hash_mutex;
+
+	if (mutex != NULL) {
+		*mutex = NULL;
+	}
 
-	bpage	= buf_page_hash_get_low(buf_pool, space, offset, fold);
+	fold = buf_page_address_fold(space, offset);
+	hash_mutex = hash_get_mutex(buf_pool->page_hash, fold);
 
-	if (bpage && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
-		bpage = NULL;
+	mutex_enter(hash_mutex);
+	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+
+	if (!bpage || UNIV_UNLIKELY(buf_pool_watch_is_sentinel(buf_pool, bpage))) {
+		mutex_exit(hash_mutex);
+		return(NULL);
+	}
+
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(offset == bpage->offset);
+	ut_ad(space == bpage->space);
+
+	if (mutex == NULL) {
+		/* The caller wants us to release the page_hash mutex */
+		mutex_exit(hash_mutex);
+	} else {
+		/* To be released by the caller */
+		*mutex = hash_mutex;
 	}
 
 	return(bpage);
 }
 
 /******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
+Returns the control block of a file page, NULL if not found or an
+uncompressed page frame does not exist.
+If the block is found and mutex is not NULL then the appropriate
+page_hash mutex is acquired. It is upto the caller to release the
+mutex. If the block is found and the mutex is NULL then the page_hash
+mutex is released by this function.
 @return	block, NULL if not found */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get(
 /*===============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_mutex
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		space,		/*!< in: space id */
-	ulint		offset)		/*!< in: offset of the page
-					within space */
+	ulint		offset,		/*!< in: page number */
+	mutex_t**	mutex)		/*!< in/out: mutex of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_mutex
+					is released by this function */
 {
-	buf_block_t*	block;
+	buf_page_t*	bpage = buf_page_hash_get(buf_pool, space, offset, mutex);
+	buf_block_t*	block = buf_page_get_block(bpage);
 
-	block = buf_page_get_block(buf_page_hash_get(buf_pool, space, offset));
+	if (block) {
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(!mutex || mutex_own(*mutex));
+		return(block);
+	} else if (bpage) {
+		/* It is not a block. Just a bpage */
+		ut_ad(buf_page_in_file(bpage));
+
+		if (mutex) {
+			mutex_exit(*mutex);
+		}
+		*mutex = NULL;
+		return(NULL);
+	}
 
-	return(block);
+	ut_ad(!bpage);
+	ut_ad(mutex == NULL ||*mutex == NULL);
+	return(NULL);
 }
 
 /********************************************************************//**
@@ -1044,16 +1111,9 @@ buf_page_peek(
 	ulint	space,	/*!< in: space id */
 	ulint	offset)	/*!< in: page number */
 {
-	const buf_page_t*	bpage;
 	buf_pool_t*		buf_pool = buf_pool_get(space, offset);
 
-	buf_pool_mutex_enter(buf_pool);
-
-	bpage = buf_page_hash_get(buf_pool, space, offset);
-
-	buf_pool_mutex_exit(buf_pool);
-
-	return(bpage != NULL);
+	return(buf_page_hash_get(buf_pool, space, offset, NULL) != NULL);
 }
 
 /********************************************************************//**

=== modified file 'storage/innobase/include/buf0lru.h'
--- a/storage/innobase/include/buf0lru.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/buf0lru.h	revid:inaam.rana@stripped
@@ -101,9 +101,8 @@ NOTE: If this function returns BUF_LRU_F
 release buf_pool->mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
-The caller must hold buf_pool->mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call.  No other
-buf_page_get_mutex() may be held when calling this function.
+The caller must hold buf_pool->mutex and must not hold any
+buf_page_get_mutex() when calling this function.
 @return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
 BUF_LRU_NOT_FREED otherwise. */
 UNIV_INTERN

=== modified file 'storage/innobase/include/hash0hash.h'
--- a/storage/innobase/include/hash0hash.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/hash0hash.h	revid:inaam.rana@stripped
@@ -403,6 +403,14 @@ void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	mutex_t*	keep_mutex);	/*!< in: mutex to keep */
 #else /* !UNIV_HOTBACKUP */
 # define hash_get_heap(table, fold)	((table)->heap)
 # define hash_mutex_enter(table, fold)	((void) 0)

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/sync0sync.h	revid:inaam.rana@stripped
@@ -672,6 +672,7 @@ or row lock! */
 					can call routines there! Otherwise
 					the level is SYNC_MEM_HASH. */
 #define	SYNC_BUF_POOL		150	/* Buffer pool mutex */
+#define	SYNC_BUF_PAGE_HASH	149	/* buf_pool->page_hash mutex */
 #define	SYNC_BUF_BLOCK		146	/* Block mutex */
 #define	SYNC_BUF_FLUSH_LIST	145	/* Buffer flush list mutex */
 #define SYNC_DOUBLEWRITE	140

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	revid:marko.makela@stripped
+++ b/storage/innobase/sync/sync0sync.c	revid:inaam.rana@stripped
@@ -1196,6 +1196,13 @@ sync_thread_add_level(
 		}
 		break;
 
+
+	case SYNC_BUF_PAGE_HASH:
+		/* Multiple page_hash mutexes are only allowed during
+		buf_validate and that is where buf_pool mutex is already
+		held. */
+		/* Fall through */
+
 	case SYNC_BUF_BLOCK:
 		/* Either the thread must own the buffer pool mutex
 		(buf_pool_mutex), or it is allowed to latch only ONE


Attachment: [text/bzr-bundle] bzr/inaam.rana@oracle.com-20100629145343-2ztwh23ih1lt5g9b.bundle
Thread
bzr commit into mysql-next-mr-innodb branch (inaam.rana:3238) Inaam Rana29 Jun