List:Commits« Previous MessageNext Message »
From:ingo Date:July 21 2006 10:22am
Subject:bk commit into 4.0 tree (ingo:1.2184) BUG#20719
View as plain text  
Below is the list of changes that have just been committed into a local
4.0 repository of mydev. When mydev does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-07-21 12:22:41+02:00, ingo@stripped +1 -0
  Bug#20719 - Reading dynamic records with write buffer could fail
  
  Fixed a possible problem with reading of dynamic records
  when a write cache is active. The cache must be flushed
  whenever a part of the file in the write cache is to be 
  read.
  
  Added a read optimization to _mi_read_dynamic_record().
  
  No test case. This was a hypothetical but existing problem.

  myisam/mi_dynrec.c@stripped, 2006-07-21 12:22:39+02:00, ingo@stripped +129 -28
    Bug#20719 - Reading dynamic records with write buffer could fail
    
    Fixed a possible problem with reading of dynamic records
    when a write cache is active. The cache must be flushed
    whenever a part of the file in the write cache is to be 
    read. This must be done before the read of the header
    and before the read of the rest block.
    
    Renamed the 'flag' and 'skipp_deleted_blocks' variables.
    
    Added a read optimization to _mi_read_dynamic_record()
    that was present in _mi_read_rnd_dynamic_record() already.
    After _mi_get_block_info() we have some bytes of the record
    in the header buffer already. No need to read them again.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	ingo
# Host:	chilla.local
# Root:	/home/mydev/mysql-4.0-bug20719

--- 1.34/myisam/mi_dynrec.c	2006-07-21 12:22:43 +02:00
+++ 1.35/myisam/mi_dynrec.c	2006-07-21 12:22:43 +02:00
@@ -1095,12 +1095,41 @@ void _my_store_blob_length(byte *pos,uin
 }
 
 
-	/* Read record from datafile */
-	/* Returns 0 if ok, -1 if error */
+/*
+  Read record from datafile.
+
+  SYNOPSIS
+    _mi_read_dynamic_record()
+      info                      MI_INFO pointer to table.
+      filepos                   From where to read the record.
+      buf                       Destination for record.
+
+  NOTE
+
+    If a write buffer is active, it needs to be flushed if its contents
+    intersects with the record to read. We always check if the position
+    of the first byte of the write buffer is lower than the position
+    past the last byte to read. In theory this is also true if the write
+    buffer is completely below the read segment. That is, if there is no
+    intersection. But this case is unusual. We flush anyway. Only if the
+    first byte in the write buffer is above the last byte to read, we do
+    not flush.
+
+    A dynamic record may need several reads. So this check must be done
+    before every read. Reading a dynamic record starts with reading the
+    block header. If the record does not fit into the free space of the
+    header, the block may be longer than the header. In this case a
+    second read is necessary. These one or two reads repeat for every
+    part of the record.
+
+  RETURN
+    0           OK
+    -1          Error
+*/
 
 int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf)
 {
-  int flag;
+  int block_of_record;
   uint b_type,left_length;
   byte *to;
   MI_BLOCK_INFO block_info;
@@ -1112,17 +1141,16 @@ int _mi_read_dynamic_record(MI_INFO *inf
     LINT_INIT(to);
     LINT_INIT(left_length);
     file=info->dfile;
-    block_info.next_filepos=filepos;	/* for easyer loop */
-    flag=block_info.second_read=0;
+    block_of_record= 0;   /* First block of record is numbered as zero. */
+    block_info.second_read= 0;
     do
     {
       if (info->opt_flag & WRITE_CACHE_USED &&
-	  info->rec_cache.pos_in_file <= block_info.next_filepos &&
+	  info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
 	  flush_io_cache(&info->rec_cache))
 	goto err;
       info->rec_cache.seek_not_done=1;
-      if ((b_type=_mi_get_block_info(&block_info,file,
-				     block_info.next_filepos))
+      if ((b_type= _mi_get_block_info(&block_info, file, filepos))
 	  & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
 	     BLOCK_FATAL_ERROR))
       {
@@ -1130,9 +1158,8 @@ int _mi_read_dynamic_record(MI_INFO *inf
 	  my_errno=HA_ERR_RECORD_DELETED;
 	goto err;
       }
-      if (flag == 0)			/* First block */
+      if (block_of_record++ == 0)			/* First block */
       {
-	flag=1;
 	if (block_info.rec_len > (uint) info->s->base.max_pack_length)
 	  goto panic;
 	if (info->s->base.blobs)
@@ -1147,11 +1174,35 @@ int _mi_read_dynamic_record(MI_INFO *inf
       }
       if (left_length < block_info.data_len || ! block_info.data_len)
 	goto panic;			/* Wrong linked record */
-      if (my_pread(file,(byte*) to,block_info.data_len,block_info.filepos,
-		   MYF(MY_NABP)))
-	goto panic;
-      left_length-=block_info.data_len;
-      to+=block_info.data_len;
+      /* copy information that is already read */
+      {
+        uint offset= (uint) (block_info.filepos - filepos);
+        uint prefetch_len= (sizeof(block_info.header) - offset);
+        filepos+= sizeof(block_info.header);
+
+        if (prefetch_len > block_info.data_len)
+          prefetch_len= block_info.data_len;
+        if (prefetch_len)
+        {
+          memcpy((byte*) to, block_info.header + offset, prefetch_len);
+          block_info.data_len-= prefetch_len;
+          left_length-= prefetch_len;
+          to+= prefetch_len;
+        }
+      }
+      /* read rest of record from file */
+      if (block_info.data_len)
+      {
+        if (info->opt_flag & WRITE_CACHE_USED &&
+            info->rec_cache.pos_in_file < filepos + block_info.data_len &&
+            flush_io_cache(&info->rec_cache))
+          goto err;
+        if (my_read(file, (byte*) to, block_info.data_len, MYF(MY_NABP)))
+          goto panic;
+        left_length-=block_info.data_len;
+        to+=block_info.data_len;
+      }
+      filepos= block_info.next_filepos;
     } while (left_length);
 
     info->update|= HA_STATE_AKTIV;	/* We have a aktive record */
@@ -1308,11 +1359,45 @@ err:
 }
 
 
+/*
+  Read record from datafile.
+
+  SYNOPSIS
+    _mi_read_rnd_dynamic_record()
+      info                      MI_INFO pointer to table.
+      buf                       Destination for record.
+      filepos                   From where to read the record.
+      skip_deleted_blocks       If to repeat reading until a non-deleted
+                                record is found.
+
+  NOTE
+
+    If a write buffer is active, it needs to be flushed if its contents
+    intersects with the record to read. We always check if the position
+    of the first byte of the write buffer is lower than the position
+    past the last byte to read. In theory this is also true if the write
+    buffer is completely below the read segment. That is, if there is no
+    intersection. But this case is unusual. We flush anyway. Only if the
+    first byte in the write buffer is above the last byte to read, we do
+    not flush.
+
+    A dynamic record may need several reads. So this check must be done
+    before every read. Reading a dynamic record starts with reading the
+    block header. If the record does not fit into the free space of the
+    header, the block may be longer than the header. In this case a
+    second read is necessary. These one or two reads repeat for every
+    part of the record.
+
+  RETURN
+    0           OK
+    != 0        Error
+*/
+
 int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
 				register my_off_t filepos,
-				my_bool skipp_deleted_blocks)
+				my_bool skip_deleted_blocks)
 {
-  int flag,info_read,save_errno;
+  int block_of_record, info_read, save_errno;
   uint left_len,b_type;
   byte *to;
   MI_BLOCK_INFO block_info;
@@ -1338,7 +1423,8 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
   else
     info_read=1;				/* memory-keyinfoblock is ok */
 
-  flag=block_info.second_read=0;
+  block_of_record= 0;   /* First block of record is numbered as zero. */
+  block_info.second_read= 0;
   left_len=1;
   do
   {
@@ -1361,15 +1447,15 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
     {
       if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos,
 			 sizeof(block_info.header),
-			 (!flag && skipp_deleted_blocks ? READING_NEXT : 0) |
-			 READING_HEADER))
+			 (!block_of_record && skip_deleted_blocks ?
+                          READING_NEXT : 0) | READING_HEADER))
 	goto panic;
       b_type=_mi_get_block_info(&block_info,-1,filepos);
     }
     else
     {
       if (info->opt_flag & WRITE_CACHE_USED &&
-	  info->rec_cache.pos_in_file <= filepos &&
+	  info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
 	  flush_io_cache(&info->rec_cache))
 	DBUG_RETURN(my_errno);
       info->rec_cache.seek_not_done=1;
@@ -1380,7 +1466,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
 		  BLOCK_FATAL_ERROR))
     {
       if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
-	  && skipp_deleted_blocks)
+	  && skip_deleted_blocks)
       {
 	filepos=block_info.filepos+block_info.block_len;
 	block_info.second_read=0;
@@ -1394,7 +1480,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
       }
       goto err;
     }
-    if (flag == 0)				/* First block */
+    if (block_of_record == 0)				/* First block */
     {
       if (block_info.rec_len > (uint) share->base.max_pack_length)
 	goto panic;
@@ -1427,7 +1513,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
 	left_len-=tmp_length;
 	to+=tmp_length;
 	filepos+=tmp_length;
-     }
+      }
     }
     /* read rest of record from file */
     if (block_info.data_len)
@@ -1436,11 +1522,17 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
       {
 	if (_mi_read_cache(&info->rec_cache,(byte*) to,filepos,
 			   block_info.data_len,
-			   (!flag && skipp_deleted_blocks) ? READING_NEXT :0))
+			   (!block_of_record && skip_deleted_blocks) ?
+                           READING_NEXT : 0))
 	  goto panic;
       }
       else
       {
+        if (info->opt_flag & WRITE_CACHE_USED &&
+            info->rec_cache.pos_in_file <
+            block_info.filepos + block_info.data_len &&
+            flush_io_cache(&info->rec_cache))
+          goto err;
 	/* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */
 	if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP)))
 	{
@@ -1450,10 +1542,14 @@ int _mi_read_rnd_dynamic_record(MI_INFO 
 	}
       }
     }
-    if (flag++ == 0)
+    /*
+      Increment block-of-record counter. If it was the first block,
+      remember the position behind the block for the next call.
+    */
+    if (block_of_record++ == 0)
     {
-      info->nextpos=block_info.filepos+block_info.block_len;
-      skipp_deleted_blocks=0;
+      info->nextpos= block_info.filepos + block_info.block_len;
+      skip_deleted_blocks= 0;
     }
     left_len-=block_info.data_len;
     to+=block_info.data_len;
@@ -1485,6 +1581,11 @@ uint _mi_get_block_info(MI_BLOCK_INFO *i
 
   if (file >= 0)
   {
+    /*
+      We do not use my_pread() here because we want to have the file
+      pointer set to the end of the header after this function.
+      my_pread() may leave the file pointer untouched.
+    */
     VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
     if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) !=
 	sizeof(info->header))
Thread
bk commit into 4.0 tree (ingo:1.2184) BUG#20719ingo21 Jul