Mats, hej.
I have several notes.
> Hi Sven!
>
> Here's the second patch with changes according to you're review.
>
> Just my few cents,
> Mats Kindahl
>
> --
> Mats Kindahl
> Lead Software Developer
> Replication Team
> MySQL AB, www.mysql.com
>
> # This is a BitKeeper generated diff -Nru style patch.
> #
> # ChangeSet
> # 2007/11/08 16:03:19+01:00 mats@stripped
> # BUG#31793 (log event corruption causes crash):
> #
> # When running mysqlbinlog on a 64-bit machine with a corrupt relay log,
> # it causes mysqlbinlog to crash. In this case, the crash is caused
> # because a request for 18446744073709534806U bytes is issued, which
> # apparantly can be served on a 64-bit machine (speculatively, I assume)
> # but this causes the memcpy() issued later to copy the data to segfault.
> #
> # The request for the number of bytes is caused by a computation
> # of data_len - server_vars_len where server_vars_len is corrupt in such
> # a sense that it is > data_len. This causes a wrap-around, with the
> # the data_len given above.
> #
> # This patch adds a check that if server_vars_len is greater than
> # data_len before the substraction, and aborts reading the event in
> # that case marking the event as invalid. It also adds checks to see
> # that reading the server variables does not go outside the bounds
> # of the available space, giving a limited amount of integrity check.
> #
> # mysql-test/r/mysqlbinlog.result
> # 2007/11/08 16:03:17+01:00 mats@stripped +1 -0
> # Result change.
> #
> # mysql-test/std_data/corrupt-relay-bin.000624
> # 2007/11/08 16:03:17+01:00 mats@stripped +2033 -0
> # BitKeeper file
> /home/mats/devel/b31793-mysql-5.0-rpl/mysql-test/std_data/corrupt-relay-bin.000624
> #
> # mysql-test/std_data/corrupt-relay-bin.000624
> # 2007/11/08 16:03:17+01:00 mats@stripped +0 -0
> #
> # mysql-test/t/mysqlbinlog.test
> # 2007/11/08 16:03:17+01:00 mats@stripped +4 -0
> # Adding test that it fails gracefully for a corrupt relay log.
> #
> # sql/log_event.cc
> # 2007/11/08 16:03:17+01:00 mats@stripped +81 -11
> # Adding check that status var length does not cause wrap-around
> # when performing subtraction. Extending get_str_len_and_pointer() to
> # check that the string can actually be read without reading outside
> # bounds. Adding checks when reading server variables from the Query-
> # log_event so that the variable can really be read. Abort reading
> # and mark the event as invalid otherwise.
> #
> diff -Nru a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result
> --- a/mysql-test/r/mysqlbinlog.result 2007-11-08 16:05:24 +01:00
> +++ b/mysql-test/r/mysqlbinlog.result 2007-11-08 16:05:24 +01:00
> @@ -325,4 +325,5 @@
> drop table t1;
> 1
> drop table t1;
> +shell> mysqlbinlog std_data/corrupt-relay-bin.000624 > var/tmp/bug31793.sql
> End of 5.0 tests
> Binary files a/mysql-test/std_data/corrupt-relay-bin.000624 and
> b/mysql-test/std_data/corrupt-relay-bin.000624 differ
> diff -Nru a/mysql-test/t/mysqlbinlog.test b/mysql-test/t/mysqlbinlog.test
> --- a/mysql-test/t/mysqlbinlog.test 2007-11-08 16:05:24 +01:00
> +++ b/mysql-test/t/mysqlbinlog.test 2007-11-08 16:05:24 +01:00
> @@ -237,4 +237,8 @@
> --echo $c
> drop table t1;
>
> +echo shell> mysqlbinlog std_data/corrupt-relay-bin.000624 >
> var/tmp/bug31793.sql;
> +error 1;
> +exec $MYSQL_BINLOG $MYSQL_TEST_DIR/std_data/corrupt-relay-bin.000624 >
> $MYSQLTEST_VARDIR/tmp/bug31793.sql;
> +
> --echo End of 5.0 tests
> diff -Nru a/sql/log_event.cc b/sql/log_event.cc
> --- a/sql/log_event.cc 2007-11-08 16:05:24 +01:00
> +++ b/sql/log_event.cc 2007-11-08 16:05:24 +01:00
> @@ -1400,17 +1400,43 @@
>
> /* 2 utility functions for the next method */
>
> -/*
> - Get the pointer for a string (src) that contains the length in
> - the first byte. Set the output string (dst) to the string value
> - and place the length of the string in the byte after the string.
> +/**
> + Read a string with length from memory.
> +
> + Get the pointer for a string (src) that contains the length in
> + the first byte. Set the output string (dst) to the string value
> + and place the length of the string in the byte after the string.
> +
> + @param src Pointer to variable holding a pointer to the memory to
> + read the string from.
> + @param dst Pointer to variable holding a pointer where the string
> + actual string starts. Starting from this position, the
> + string can be copied using @c memcpy().
> + @param len Pointer to variable where the length will be stored.
> + @param end One-past-the-end of the memory where the string is
> + stored.
> +
> + @return Zero if the entire string can be copied successfully, @c
> + UINT_MAX if the length could not be read from memory
> + (that is, if *src >= end), the number of bytes that
> + are missing if we hit the end of the memory.
> */
> -static void get_str_len_and_pointer(const Log_event::Byte **src,
> - const char **dst,
> - uint *len)
> +static int
> +get_str_len_and_pointer(const Log_event::Byte **src,
> + const char **dst,
> + uint *len,
> + const Log_event::Byte *end)
> {
> - if ((*len= **src))
> - *dst= (char *)*src + 1; // Will be copied later
> + if (*src >= end)
> + return -1; // Will be UINT_MAX in two-complement arithmetics
> + uint length= **src;
> + if (length > 0)
Having length az zero might not be okay. There should be the minimum
for both Q_CATALOG_NZ_CODE and Q_TIME_ZONE_CODE.
> + {
> + if (*src + length >= end)
> + return *src + length - end; // Number of bytes missing
> + *dst= (char *)*src + 1; // Will be copied later
> + }
> + *len= length;
> (*src)+= *len + 1;
> }
This function does not look to me very necessary. In fact, I find it
of dubious benefit, as it rather obsures the code having just two
operations and being used twice all within the same caller.
Particularly, one of two (*src)+= *len + 1; increments incompatibly
with the rest of that the caller is doing.
I'd rather to remover the
function and see incrementing on the same switch..case lines as well
as commented further emergency leave from the function if length is
more than the max or the min (above).
>
> @@ -1424,6 +1450,23 @@
> *(*dst)++= 0;
> }
>
> +
> +/**
> + Macro to check that there is enough space to read from memory.
> +
> + @param PTR Pointer to memory
> + @param END End of memory
> + @param CNT Number of bytes that should be read.
> + */
> +#define CHECK_SPACE(PTR,END,CNT) \
> + do { \
> + DBUG_ASSERT((PTR) + (CNT) <= (END)); \
> + if ((PTR) + (CNT) > (END)) { \
> + query= 0; \
> + DBUG_VOID_RETURN; \
> + } \
> + } while (0)
> +
> /*
> Query_log_event::Query_log_event()
> This is used by the SQL slave thread to prepare the event before execution.
> @@ -1475,6 +1518,17 @@
> if (tmp)
> {
> status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET);
> + /*
> + Check if status variable length is corrupt and will lead to very
> + wrong data. We could be even more strict and require data_len to
> + be even bigger, but this will suffice to catch most corruption
> + errors that can lead to a crash.
> + */
I think you should compare with MAX_SIZE_LOG_EVENT_STATUS instead.
> + if (status_vars_len >= data_len + 1)
> + {
> + query= 0;
> + DBUG_VOID_RETURN;
> + }
> data_len-= status_vars_len;
> DBUG_PRINT("info", ("Query_log_event has status_vars_len: %u",
> (uint) status_vars_len));
> @@ -1494,6 +1548,7 @@
> {
> switch (*pos++) {
> case Q_FLAGS2_CODE:
> + CHECK_SPACE(pos, end, 4);
this checks are redundant and should be moved outside of the loop.
Obviously the new macro is not needed as it's enough to verify if
pos is still below end with pos < end.
> flags2_inited= 1;
> flags2= uint4korr(pos);
> DBUG_PRINT("info",("In Query_log_event, read flags2: %lu", (ulong) flags2));
> @@ -1504,6 +1559,7 @@
> #ifndef DBUG_OFF
> char buff[22];
> #endif
> + CHECK_SPACE(pos, end, 8);
> sql_mode_inited= 1;
> sql_mode= (ulong) uint8korr(pos); // QQ: Fix when sql_mode is ulonglong
> DBUG_PRINT("info",("In Query_log_event, read sql_mode: %s",
> @@ -1512,15 +1568,21 @@
> break;
> }
> case Q_CATALOG_NZ_CODE:
> - get_str_len_and_pointer(&pos, &catalog, &catalog_len);
Similar to time_zone_len comments, please find them below.
> + if (get_str_len_and_pointer(&pos, &catalog, &catalog_len, end))
> + {
> + query= 0;
> + DBUG_VOID_RETURN;
> + }
> break;
> case Q_AUTO_INCREMENT:
> + CHECK_SPACE(pos, end, 4);
> auto_increment_increment= uint2korr(pos);
> auto_increment_offset= uint2korr(pos+2);
> pos+= 4;
> break;
> case Q_CHARSET_CODE:
> {
> + CHECK_SPACE(pos, end, 6);
> charset_inited= 1;
> memcpy(charset, pos, 6);
> pos+= 6;
> @@ -1528,20 +1590,28 @@
> }
> case Q_TIME_ZONE_CODE:
> {
> - get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len);
I suggest to leave the signature of the function and compare
time_zone_len with the existing MAX_TIME_ZONE_NAME_LENGTH.
> + if (get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len,
> end))
> + {
> + query= 0;
> + DBUG_VOID_RETURN;
> + }
> break;
> }
> case Q_CATALOG_CODE: /* for 5.0.x where 0<=x<=3 masters */
> + CHECK_SPACE(pos, end, 1);
> if ((catalog_len= *pos))
> catalog= (char*) pos+1; // Will be copied later
> + CHECK_SPACE(pos, end, catalog_len + 2);
> pos+= catalog_len+2; // leap over end 0
> catalog_nz= 0; // catalog has end 0 in event
> break;
> case Q_LC_TIME_NAMES_CODE:
> + CHECK_SPACE(pos, end, 2);
> lc_time_names_number= uint2korr(pos);
> pos+= 2;
> break;
> case Q_CHARSET_DATABASE_CODE:
> + CHECK_SPACE(pos, end, 2);
> charset_database_number= uint2korr(pos);
> pos+= 2;
> break;
>
>
cheers,
/andrei