List:Commits« Previous MessageNext Message »
From:Rafal Somla Date:February 9 2009 9:05am
Subject:bzr commit into mysql-6.0-backup branch (Rafal.Somla:2762) Bug#40975
View as plain text  
#At file:///ext/mysql/bzr/backup/bug40975/

 2762 Rafal Somla	2009-02-09
      Bug #40975 - Database restored through named pipes brings up a corrupt database. 
      
      The problem was buried deep inside the backup stream library code. Inside 
      bstream_read_part() function (in backup/stream_v1_transpor.c) there is a branch 
      which reads bytes directly from an underlying stream to the provided input 
      buffer (the old code):
      
        else
        {
          /* read directly into data area */
          ASSERT(s->buf.header > s->buf.pos);
      
          howmuch= data->end - data->begin;
          if ((s->buf.pos + howmuch) > s->buf.header)
            howmuch= s->buf.header - s->buf.pos;
      
          saved= *data;
          data->end= data->begin + howmuch;
      
          ret= as_read(&s->stream, data, buf);
          if (ret == BSTREAM_ERROR)
            return SERROR(s);
          if (ret == BSTREAM_EOS)
            s->state= EOS;
      
          s->buf.begin += data->begin - saved.begin;
          s->buf.pos= s->buf.begin;
      
          data->begin= data->end;
          data->end= saved.end;
        }
      
      After determining how much data can be read (howmuch variable) the data blob 
      describes the memory area where the data should be stored. as_read() function 
      reads bytes from the underlying stream to the area described by data and updates 
      data blob to describe the space which was *not* filled with bytes:
      
           |- the original data blob |
           |                         |
        ---[***************[---------]------
           |               |         |
           |- bytes read --|- data --|
              from stream     blob after the operation
      
      In the case depicted above, when not all the available area was filled with 
      bytes from the stream and data blob is non-empty, the assignment 
      
       data->begin= data->end
      
      incorrectly modifies data->begin pointer which in this situation is different 
      from data->end. This causes further confusion in the internal buffering code.
      
      The assignment is completely wrong and serves no purpose here - the patch 
      removes it. This problem was not detected before due to the fact that in the 
      usual situation the whole available area is filled with data from the stream so 
      that after as_read() data->begin == data->end. In this situation the assignment 
      has no effect. However, when reading from a pipe, not always enough bytes is 
      available in the stream to fill the buffer and above situation can happen.
      
      In the test we trigger the errornous situation using error injection code.
added:
  mysql-test/suite/backup/r/backup_stream_pipe.result
  mysql-test/suite/backup/t/backup_stream_pipe.test
modified:
  sql/backup/stream.cc
  sql/backup/stream_v1_transport.c

per-file messages:
  mysql-test/suite/backup/t/backup_stream_pipe.test
    A test repeating test scenario described in bug report.
  sql/backup/stream.cc
    Add error injection code which simulates reading from a pipe.
  sql/backup/stream_v1_transport.c
    - Improve comments in bstream_read_part() function.
    - Remove errornous code.
=== added file 'mysql-test/suite/backup/r/backup_stream_pipe.result'
--- a/mysql-test/suite/backup/r/backup_stream_pipe.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/backup/r/backup_stream_pipe.result	2009-02-09 09:05:20 +0000
@@ -0,0 +1,38 @@
+# Initial clean-up.
+DROP DATABASE IF EXISTS db1;
+# Prepare objects to backup.
+CREATE DATABASE db1;
+USE db1;
+CREATE TABLE t1(a int, b blob);
+# Fill t1 with approx 80k of data.
+SELECT a, length(b) FROM t1;
+a	length(b)
+10	8192
+9	8192
+8	8192
+7	8192
+6	8192
+5	8192
+4	8192
+3	8192
+2	8192
+1	8192
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+db1.t1	check	status	OK
+# Backup the table.
+BACKUP DATABASE db1 TO 'db1.bkp';
+backup_id
+#
+SET DEBUG='d,backup_read_simulate_pipe';
+# Restore the table.
+RESTORE FROM 'db1.bkp' OVERWRITE;
+backup_id
+#
+SET DEBUG='d,-';
+# Check for errors.
+CHECK TABLE db1.t1;
+Table	Op	Msg_type	Msg_text
+db1.t1	check	status	OK
+# Final clean-up.
+DROP DATABASE db1;

=== added file 'mysql-test/suite/backup/t/backup_stream_pipe.test'
--- a/mysql-test/suite/backup/t/backup_stream_pipe.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/backup/t/backup_stream_pipe.test	2009-02-09 09:05:20 +0000
@@ -0,0 +1,81 @@
+#
+# Test for problem reported in BUG#40975. 
+#
+# Due to errors in stream library code, RESTORE was confused when reading 
+# image from a pipe in a situation when read call returns less bytes than 
+# requested. This manifested in getting corrupted table after restore. In 
+# this test we simulate this situation using error injection code. We check 
+# that table is no longer corrupted.
+#
+# To have a similar setup as the one presented in the bug report (the word.bak 
+# database) we create a table with enough data to span several blocks in the 
+# backup image.
+# 
+--source include/not_embedded.inc
+--source include/have_debug.inc
+
+let $bdir=`SELECT @@backupdir`;
+
+--echo # Initial clean-up.
+
+--disable_warnings
+DROP DATABASE IF EXISTS db1;
+--error 0,1
+--remove_file $bdir/db1.bkp
+--enable_warnings
+
+--echo # Prepare objects to backup.
+
+CREATE DATABASE db1;
+USE db1;
+
+CREATE TABLE t1(a int, b blob);
+
+# Single block in backup stream is 16k by default.
+# We need table data to span several such blocks.
+# We will fill t1 with 10 rows, each approx 8k long.
+
+# Create $data string of length 8k
+
+let $data=12345678;
+let $n=10;
+while ($n)
+{
+  let $data=$data$data;
+  dec $n;
+}
+
+--echo # Fill t1 with approx 80k of data.
+
+--disable_query_log
+let $n=10;
+while ($n)
+{
+  eval INSERT INTO t1 VALUES ($n,'$data');
+  dec $n;
+}
+--enable_query_log
+SELECT a, length(b) FROM t1;
+CHECK TABLE t1;
+
+--echo # Backup the table.
+--replace_column 1 #
+BACKUP DATABASE db1 TO 'db1.bkp';
+
+# Enable simulation of reading from a pipe.
+SET DEBUG='d,backup_read_simulate_pipe';
+
+--echo # Restore the table.
+--replace_column 1 #
+RESTORE FROM 'db1.bkp' OVERWRITE;
+
+SET DEBUG='d,-';
+
+--echo # Check for errors.
+
+CHECK TABLE db1.t1;
+
+--echo # Final clean-up.
+
+DROP DATABASE db1;
+--remove_file $bdir/db1.bkp

=== modified file 'sql/backup/stream.cc'
--- a/sql/backup/stream.cc	2009-01-29 21:17:59 +0000
+++ b/sql/backup/stream.cc	2009-02-09 09:05:20 +0000
@@ -165,6 +165,13 @@ extern "C" int stream_read(void *instanc
   else
 #endif
   {
+    DBUG_EXECUTE_IF("backup_read_simulate_pipe",{
+      /*
+        Simulate reading from a pipe, when less bytes is read than actually
+        requested.
+      */
+      if (howmuch > 1024) howmuch= 1024;
+    });
     howmuch= my_read(fd, buf->begin, howmuch, MYF(0));
   }
 

=== modified file 'sql/backup/stream_v1_transport.c'
--- a/sql/backup/stream_v1_transport.c	2009-01-20 11:36:32 +0000
+++ b/sql/backup/stream_v1_transport.c	2009-02-09 09:05:20 +0000
@@ -1503,7 +1503,17 @@ int bstream_read_part(backup_stream *s, 
 
   IBUF_INV(s->buf);
 
-  /* fill input buffer if we reached end of fragment or of the input block */
+  /*
+    We fill the input buffer if one of these conditions holds:
+
+    a) all bytes from the current fragment have been read 
+       (buf.begin == buf.header), or
+
+    b) all bytes in the current input block have been read
+       (buf.begin == buf.end).
+    
+    (See INPUT BUFFER description above) 
+  */
   if ((s->buf.begin == s->buf.end) || (s->buf.begin == s->buf.header))
   {
     ret= load_buffer(s);
@@ -1516,8 +1526,8 @@ int bstream_read_part(backup_stream *s, 
   ASSERT(s->buf.header >= s->buf.begin);
 
   /*
-    If we finished reading a fragment, we should load next one
-    or signal EOC if it was the last fragment of a chunk.
+    If we finished reading a fragment, we should load next one into the
+    input buffer. We signal EOC if this was the last fragment of a chunk.
   */
   if (s->buf.header == s->buf.begin)
   {
@@ -1532,8 +1542,18 @@ int bstream_read_part(backup_stream *s, 
   }
 
   /*
-    Determine length of the fragment remainder stored in the input
-    buffer.
+    Determine how much bytes of the current fragment are there in the input
+    buffer (the amount could be 0). There are 2 cases:
+
+    a) the input buffer contains the header of next fragment 
+       (buf.header <= buf.pos) - in this case all the remaining bytes of the
+       current fragment are in the buffer between buf.begin and buf.header,
+
+    b) the header of the next framgment was not yet loaded into the input buffer
+       (buf.pos < buf.header) - inis case all bytes in the buffer belong to the
+       current fragment.
+
+    (See INPUT BUFFER description above)
   */
   if (s->buf.header <= s->buf.pos)
     howmuch= s->buf.header - s->buf.begin;
@@ -1541,11 +1561,15 @@ int bstream_read_part(backup_stream *s, 
     howmuch= s->buf.pos - s->buf.begin;
 
   /*
-    If there is some data in the buffer we copy it to the data blob,
-    otherwise we fill data from the stream.
-   */
+    If there is some data in the buffer we copy it to the data blob and return
+    from call. Otherwise we have more work to do.
+  */
   if (howmuch > 0)
   {
+    /*
+      Adjust howmuch if there is more data in the input bufer that would fit
+      into the blob. Copy only as much bytes as fits.
+    */ 
     if ((data->begin + howmuch) > data->end)
       howmuch= data->end - data->begin;
 
@@ -1555,29 +1579,100 @@ int bstream_read_part(backup_stream *s, 
   }
   else
   {
-    /* read directly into data area */
+    /*
+      At this point it is not possible that the header of the next fragment is
+      in the buffer. This is because if it were the case, then we would have
+      buf.begin == buf.header (as howmuch == 0). But if buf.begin == buf.header 
+      then load_next_fragment() would be called above and then there should be
+      some bytes of the next fragment in the buffer (i.e. howmuch > 0).
+
+      Thus we know that buf.pos < buf.header. Moreover, as the buffer is empty 
+      (howmuch == 0), we also must have buf.begin == buf.pos. We check these 
+      claims with assertions.
+    */ 
     ASSERT(s->buf.header > s->buf.pos);
+    ASSERT(s->buf.begin == s->buf.pos);
 
+    /*
+      Determine how much data to read from the stream. We don't want to read 
+      more bytes than there is in the current fragment (buf.header - buf.pos).
+      At the same time there is no point in reading more bytes than would fit 
+      into the data blob.
+    */ 
     howmuch= data->end - data->begin;
     if ((s->buf.pos + howmuch) > s->buf.header)
       howmuch= s->buf.header - s->buf.pos;
 
-    saved= *data;
+    /*
+      We are about to read howmuch bytes from the input stream. Modify data
+      blob to describe the area where these bytes should be stored.
+
+       ________________ original data blob _______________
+      |                                                   |
+
+      [-----------------------------]---------------------]
+
+      |                             |
+       area where bytes will be read
+
+      We save the original data blob to construct correct reply later.
+    */ 
+    saved= *data;       /* note: only begin/end pointers are copied here */
     data->end= data->begin + howmuch;
 
+    /*
+      Now we call as_read() to read bytes from the underlying input stream.
+      The function will move data->begin pointer indicating how much data have
+      been read.
+    */ 
     ret= as_read(&s->stream, data, buf);
     if (ret == BSTREAM_ERROR)
       return SERROR(s);
     if (ret == BSTREAM_EOS)
       s->state= EOS;
 
-    s->buf.begin += data->begin - saved.begin;
-    s->buf.pos= s->buf.begin;
+    /*
+      After a successful call to as_read(), data blob describes the area which
+      was *not* filled with data from the stream.
+
+       __ original area described by data blob __
+      |                                          |
+      
+      [======================]-------------------]
+    
+      |                      |                   |
+       bytes read from stream                    |
+      |                      |                   data->end
+      saved.begin            data->begin
+ 
+      Thus now data->begin points one byte after the last byte read from the
+      stream, which is as expected. But data->end should point at the end of 
+      the memory area passed by the caller - this can be restored from saved 
+      blob.
+    */ 
 
-    data->begin= data->end;
     data->end= saved.end;
+
+    /*
+      We have read (data->begin - saved.begin) bytes from the input stream. To
+      preserve input buffer invariants, we must move buf.begin pointer by that
+      amount (so that buf.end - buf.begin is the number of bytes left in the
+      current input block.
+
+      The input buffer is still empty (all bytes were read to the memory area
+      provided by caller). Therefore we set buf.pos = buf.begin.
+    */ 
+
+    s->buf.begin += data->begin - saved.begin;
+    s->buf.pos= s->buf.begin;
   }
 
+  /*
+    Now some data has been copied to the data memory blob, either from input
+    buffer or directly from stream. We check input buffer invariants and return,
+    possibly signalling end of chunk or stream.
+  */ 
+
   IBUF_INV(s->buf);
 
   return check_end(s);

Thread
bzr commit into mysql-6.0-backup branch (Rafal.Somla:2762) Bug#40975Rafal Somla9 Feb