Hi Andrei!
Comments below.
Just my few cents,
Mats Kindahl
Andrei Elkin wrote:
> Mats, hej.
>
> I have several notes.
>
>
>> Hi Sven!
>>
>> Here's the second patch with changes according to you're review.
>>
>> Just my few cents,
>> Mats Kindahl
>>
>> --
>> Mats Kindahl
>> Lead Software Developer
>> Replication Team
>> MySQL AB, www.mysql.com
>>
>> # This is a BitKeeper generated diff -Nru style patch.
>> #
>> # ChangeSet
>> # 2007/11/08 16:03:19+01:00 mats@stripped
>> # BUG#31793 (log event corruption causes crash):
>> #
>> # When running mysqlbinlog on a 64-bit machine with a corrupt relay log,
>> # it causes mysqlbinlog to crash. In this case, the crash is caused
>> # because a request for 18446744073709534806U bytes is issued, which
>> # apparantly can be served on a 64-bit machine (speculatively, I assume)
>> # but this causes the memcpy() issued later to copy the data to segfault.
>> #
>> # The request for the number of bytes is caused by a computation
>> # of data_len - server_vars_len where server_vars_len is corrupt in such
>> # a sense that it is > data_len. This causes a wrap-around, with the
>> # the data_len given above.
>> #
>> # This patch adds a check that if server_vars_len is greater than
>> # data_len before the substraction, and aborts reading the event in
>> # that case marking the event as invalid. It also adds checks to see
>> # that reading the server variables does not go outside the bounds
>> # of the available space, giving a limited amount of integrity check.
>> #
>> # mysql-test/r/mysqlbinlog.result
>> # 2007/11/08 16:03:17+01:00 mats@stripped +1 -0
>> # Result change.
>> #
>> # mysql-test/std_data/corrupt-relay-bin.000624
>> # 2007/11/08 16:03:17+01:00 mats@stripped +2033 -0
>> # BitKeeper file
> /home/mats/devel/b31793-mysql-5.0-rpl/mysql-test/std_data/corrupt-relay-bin.000624
>> #
>> # mysql-test/std_data/corrupt-relay-bin.000624
>> # 2007/11/08 16:03:17+01:00 mats@stripped +0 -0
>> #
>> # mysql-test/t/mysqlbinlog.test
>> # 2007/11/08 16:03:17+01:00 mats@stripped +4 -0
>> # Adding test that it fails gracefully for a corrupt relay log.
>> #
>> # sql/log_event.cc
>> # 2007/11/08 16:03:17+01:00 mats@stripped +81 -11
>> # Adding check that status var length does not cause wrap-around
>> # when performing subtraction. Extending get_str_len_and_pointer() to
>> # check that the string can actually be read without reading outside
>> # bounds. Adding checks when reading server variables from the Query-
>> # log_event so that the variable can really be read. Abort reading
>> # and mark the event as invalid otherwise.
>> #
>> diff -Nru a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result
>> --- a/mysql-test/r/mysqlbinlog.result 2007-11-08 16:05:24 +01:00
>> +++ b/mysql-test/r/mysqlbinlog.result 2007-11-08 16:05:24 +01:00
>> @@ -325,4 +325,5 @@
>> drop table t1;
>> 1
>> drop table t1;
>> +shell> mysqlbinlog std_data/corrupt-relay-bin.000624 >
> var/tmp/bug31793.sql
>> End of 5.0 tests
>> Binary files a/mysql-test/std_data/corrupt-relay-bin.000624 and
> b/mysql-test/std_data/corrupt-relay-bin.000624 differ
>> diff -Nru a/mysql-test/t/mysqlbinlog.test b/mysql-test/t/mysqlbinlog.test
>> --- a/mysql-test/t/mysqlbinlog.test 2007-11-08 16:05:24 +01:00
>> +++ b/mysql-test/t/mysqlbinlog.test 2007-11-08 16:05:24 +01:00
>> @@ -237,4 +237,8 @@
>> --echo $c
>> drop table t1;
>>
>> +echo shell> mysqlbinlog std_data/corrupt-relay-bin.000624 >
> var/tmp/bug31793.sql;
>> +error 1;
>> +exec $MYSQL_BINLOG $MYSQL_TEST_DIR/std_data/corrupt-relay-bin.000624 >
> $MYSQLTEST_VARDIR/tmp/bug31793.sql;
>> +
>> --echo End of 5.0 tests
>> diff -Nru a/sql/log_event.cc b/sql/log_event.cc
>> --- a/sql/log_event.cc 2007-11-08 16:05:24 +01:00
>> +++ b/sql/log_event.cc 2007-11-08 16:05:24 +01:00
>> @@ -1400,17 +1400,43 @@
>>
>> /* 2 utility functions for the next method */
>>
>> -/*
>> - Get the pointer for a string (src) that contains the length in
>> - the first byte. Set the output string (dst) to the string value
>> - and place the length of the string in the byte after the string.
>> +/**
>> + Read a string with length from memory.
>> +
>> + Get the pointer for a string (src) that contains the length in
>> + the first byte. Set the output string (dst) to the string value
>> + and place the length of the string in the byte after the string.
>> +
>> + @param src Pointer to variable holding a pointer to the memory to
>> + read the string from.
>> + @param dst Pointer to variable holding a pointer where the string
>> + actual string starts. Starting from this position, the
>> + string can be copied using @c memcpy().
>> + @param len Pointer to variable where the length will be stored.
>> + @param end One-past-the-end of the memory where the string is
>> + stored.
>> +
>> + @return Zero if the entire string can be copied successfully, @c
>> + UINT_MAX if the length could not be read from memory
>> + (that is, if *src >= end), the number of bytes that
>> + are missing if we hit the end of the memory.
>> */
>> -static void get_str_len_and_pointer(const Log_event::Byte **src,
>> - const char **dst,
>> - uint *len)
>> +static int
>> +get_str_len_and_pointer(const Log_event::Byte **src,
>> + const char **dst,
>> + uint *len,
>> + const Log_event::Byte *end)
>> {
>> - if ((*len= **src))
>> - *dst= (char *)*src + 1; // Will be copied later
>> + if (*src >= end)
>> + return -1; // Will be UINT_MAX in two-complement arithmetics
>>
>
>
>> + uint length= **src;
>> + if (length > 0)
>>
>
> Having length az zero might not be okay. There should be the minimum
> for both Q_CATALOG_NZ_CODE and Q_TIME_ZONE_CODE.
>
I'm just adding checks to an existing function. I see no point in doing
a larger than necessary refactoring even when the code is not "perfect"
or does not look "nice".
>
>> + {
>> + if (*src + length >= end)
>> + return *src + length - end; // Number of bytes missing
>> + *dst= (char *)*src + 1; // Will be copied later
>> + }
>> + *len= length;
>> (*src)+= *len + 1;
>> }
>>
>
> This function does not look to me very necessary. In fact, I find it
> of dubious benefit, as it rather obsures the code having just two
> operations and being used twice all within the same caller.
>
> Particularly, one of two (*src)+= *len + 1; increments incompatibly
> with the rest of that the caller is doing.
> I'd rather to remover the
> function and see incrementing on the same switch..case lines as well
> as commented further emergency leave from the function if length is
> more than the max or the min (above).
>
See above.
>
>
>>
>> @@ -1424,6 +1450,23 @@
>> *(*dst)++= 0;
>> }
>>
>> +
>> +/**
>> + Macro to check that there is enough space to read from memory.
>> +
>> + @param PTR Pointer to memory
>> + @param END End of memory
>> + @param CNT Number of bytes that should be read.
>> + */
>> +#define CHECK_SPACE(PTR,END,CNT) \
>> + do { \
>> + DBUG_ASSERT((PTR) + (CNT) <= (END)); \
>> + if ((PTR) + (CNT) > (END)) { \
>> + query= 0; \
>> + DBUG_VOID_RETURN; \
>> + } \
>> + } while (0)
>> +
>> /*
>> Query_log_event::Query_log_event()
>> This is used by the SQL slave thread to prepare the event before execution.
>> @@ -1475,6 +1518,17 @@
>> if (tmp)
>> {
>> status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET);
>> + /*
>> + Check if status variable length is corrupt and will lead to very
>> + wrong data. We could be even more strict and require data_len to
>> + be even bigger, but this will suffice to catch most corruption
>> + errors that can lead to a crash.
>> + */
>>
>
> I think you should compare with MAX_SIZE_LOG_EVENT_STATUS instead.
>
Good point, but suppose that data_len + 1 < MAX_SIZE_LOG_EVENT_STATUS
for some reason, then the code will read beyond the end, potentially
causing a segmentation fault. We should compare with the min() of the two.
>
>> + if (status_vars_len >= data_len + 1)
>>
>
>
>
>> + {
>> + query= 0;
>> + DBUG_VOID_RETURN;
>> + }
>>
>
>
>> data_len-= status_vars_len;
>> DBUG_PRINT("info", ("Query_log_event has status_vars_len: %u",
>> (uint) status_vars_len));
>> @@ -1494,6 +1548,7 @@
>> {
>> switch (*pos++) {
>> case Q_FLAGS2_CODE:
>> + CHECK_SPACE(pos, end, 4);
>>
>
> this checks are redundant and should be moved outside of the loop.
> Obviously the new macro is not needed as it's enough to verify if
> pos is still below end with pos < end.
>
No, it is not redundant. The uint4korr() function reads memory at pos,
pos+1, ..., pos+3, so if there are only M bytes available (for some M <
3), we will read beyond the allotted memory, potentially causing a
segmentation fault.
>
>> flags2_inited= 1;
>> flags2= uint4korr(pos);
>> DBUG_PRINT("info",("In Query_log_event, read flags2: %lu", (ulong)
> flags2));
>> @@ -1504,6 +1559,7 @@
>> #ifndef DBUG_OFF
>> char buff[22];
>> #endif
>> + CHECK_SPACE(pos, end, 8);
>> sql_mode_inited= 1;
>> sql_mode= (ulong) uint8korr(pos); // QQ: Fix when sql_mode is ulonglong
>> DBUG_PRINT("info",("In Query_log_event, read sql_mode: %s",
>> @@ -1512,15 +1568,21 @@
>> break;
>> }
>> case Q_CATALOG_NZ_CODE:
>>
>
>
>> - get_str_len_and_pointer(&pos, &catalog, &catalog_len);
>>
>
> Similar to time_zone_len comments, please find them below.
>
>
>> + if (get_str_len_and_pointer(&pos, &catalog, &catalog_len,
> end))
>> + {
>> + query= 0;
>> + DBUG_VOID_RETURN;
>> + }
>> break;
>> case Q_AUTO_INCREMENT:
>> + CHECK_SPACE(pos, end, 4);
>> auto_increment_increment= uint2korr(pos);
>> auto_increment_offset= uint2korr(pos+2);
>> pos+= 4;
>> break;
>> case Q_CHARSET_CODE:
>> {
>> + CHECK_SPACE(pos, end, 6);
>> charset_inited= 1;
>> memcpy(charset, pos, 6);
>> pos+= 6;
>> @@ -1528,20 +1590,28 @@
>> }
>> case Q_TIME_ZONE_CODE:
>> {
>>
>
>
>> - get_str_len_and_pointer(&pos, &time_zone_str,
> &time_zone_len);
>>
>
> I suggest to leave the signature of the function and compare
> time_zone_len with the existing MAX_TIME_ZONE_NAME_LENGTH.
>
Why? There is not reason to perform a refactoring just because the
existing code does not look "nice". Also, in the case that end - pos <
MAX_TIME_ZONE_NAME_LENGTH, we might end up with a segmentation fault for
the same reasons as I gave above.
>
>> + if (get_str_len_and_pointer(&pos, &time_zone_str,
> &time_zone_len, end))
>> + {
>> + query= 0;
>> + DBUG_VOID_RETURN;
>> + }
>> break;
>> }
>> case Q_CATALOG_CODE: /* for 5.0.x where 0<=x<=3 masters */
>> + CHECK_SPACE(pos, end, 1);
>> if ((catalog_len= *pos))
>> catalog= (char*) pos+1; // Will be copied
> later
>> + CHECK_SPACE(pos, end, catalog_len + 2);
>> pos+= catalog_len+2; // leap over end 0
>> catalog_nz= 0; // catalog has end 0 in event
>> break;
>> case Q_LC_TIME_NAMES_CODE:
>> + CHECK_SPACE(pos, end, 2);
>> lc_time_names_number= uint2korr(pos);
>> pos+= 2;
>> break;
>> case Q_CHARSET_DATABASE_CODE:
>> + CHECK_SPACE(pos, end, 2);
>> charset_database_number= uint2korr(pos);
>> pos+= 2;
>> break;
>>
>>
>>
>
> cheers,
>
> /andrei
>
>
--
Mats Kindahl
Lead Software Developer
Replication Team
MySQL AB, www.mysql.com