Hi Sergey,
Sergey Vojtovich wrote:
> Hi Alexander,
>
> ok to push. Though there are a few comments inline. I'm leaving
> them up to you to decide.
Thanks for your review! Please find my answers inline.
The new version is here:
http://lists.mysql.com/commits/125018
>
> On Thu, Nov 18, 2010 at 07:00:40AM +0000, Alexander Barkov wrote:
>> #At file:///home/bar/mysql-bzr/mysql-next-mr.w5510/ based on
> revid:alexander.nozdrin@stripped
>>
>> 3201 Alexander Barkov 2010-11-18
>> WL#5510 Functions to_base64 and from_base64
> ...skip...
>
>> === modified file 'mysys/base64.c'
>> --- a/mysys/base64.c 2009-03-20 14:27:53 +0000
>> +++ b/mysys/base64.c 2010-11-18 06:58:12 +0000
> ...skip...
>
>> @@ -100,24 +137,176 @@ base64_encode(const void *src, size_t sr
>> }
>>
>>
>> +/*
>> + Helper table for base64_decode
>> + -2 means "space character"
>> + -1 means "bad character"
>> + Non-negative values mean valid base64 encoding character.
>> +*/
>> +static int8
>> +from_base64_table[]=
>> +{
>> +/*00*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-2,-2,-2,-2,-2,-1,-1,
>> +/*10*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*20*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* !"#$%&'()*+,-./
> */
>> +/*30*/ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /*
> 0123456789:;<=>? */
>> +/*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* @ABCDEFGHIJKLMNO */
>> +/*50*/ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */
>> +/*60*/ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* `abcdefghijklmno */
>> +/*70*/ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~ */
>> +/*80*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*90*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*A0*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*B0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*C0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*D0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*E0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
>> +/*F0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
>> +};
> You have ending spaces in the above lines and a lot more
> around code. Coding style says: "Avoid trailing whitespace,
> in code and comments."
Thanks. Fixed.
This command does not report any lines with spaces any more:
bzr diff |egrep "(. $|===)"
>
>> +
>> +
>> +/*
>> + Convert base64 encoding character to bits
>> +
>> + SYNOPSIS
>> + my_base64_pos
>> +
>> + c - input character
>> + error - pointer to error
>> +
>> + DESCRIPTION
>> + Convert input character to a number in the range [0..63].
>> + If input character is not valid base64 encoding character,
>> + then error is set to 1.
>> +
>> + RETURN VALUE
>> + -2 - c is a space character
>> + -1 - c is an unknow character
>> + non-negative number - c is a valid base64 encoding character
>> +*/
> This type of function comment is not modern. We use doxygen these days.
> Coding style says: "As of 2007, MySQL uses Doxygen comments for file,
> section, class, structure, function and method comments."
Fixed.
>
> Also you mess up "function status" and "return value". And return 0 on
> failure. Coding style says: "All functions that can report an error
> (usually an allocation error), should return 0/FALSE/false on success,
> 1/TRUE/true on failure. Other return values should go in an output argument."
I rewrote it in this style.
These three functions now use this notation:
my_base64_decoder_skip_spaces()
my_base64_add() (former my_base64_add)
my_base64_decoder_getch()
I found it easier to pass the entire "decoder" to these
functions instead of individual members.
>
>> static inline uint
>> -pos(unsigned char c)
>> +my_base64_pos(unsigned char c, int *error)
>> +{
>> + int res= from_base64_table[c];
>> + if (res < 0)
>> + {
>> + *error= 1;
>> + return 0;
>> + }
>> + return (uint) res;
>> +}
>> +
>> +
>> +typedef struct my_base64_decoder_t
>> {
>> - return (uint) (strchr(base64_table, c) - base64_table);
>> + const char *src; /* Pointer to the current input position */
>> + const char *end; /* Pointer to the end of input buffer */
>> + unsigned int c; /* Collect bits into this number */
>> + int error; /* Error code */
>> + uchar state; /* Character number in the current group of 4 */
>> + uchar mark; /* Number of padding marks in the current group */
>> +} MY_BASE64_DECODER;
>> +
>> +
>> +/*
>> + Skip leading spaces in a base64 encoded string
>> +
>> + SYNOPSIS
>> + my_base64_decoder_skip_spaces
>> +
>> + decoder Pointer to MY_BASE64_DECODER
>> +
>> + DESCRIPTION
>> + Skip leading spaces and stop on the first non-space character.
>> + decoder->p will point to the first non-space character, or
>> + to the end of the input string.
>> +
>> + RETURN VALUE
>> + 0 - end-of-input found
>> + 1 - if there are some more non-space characters
>> +*/
>> +static inline int
>> +my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder)
>> +{
>> + for ( ; decoder->src < decoder->end; decoder->src++)
>> + {
>> + if (from_base64_table[(uchar) *decoder->src] != -2)
>> + return 1;
>> + }
>> + return 0;
>> }
>>
>>
>> -#define SKIP_SPACE(src, i, size) \
>> -{ \
>> - while (i < size && my_isspace(&my_charset_latin1, * src))
> \
>> - { \
>> - i++; \
>> - src++; \
>> - } \
>> - if (i == size) \
>> - { \
>> - break; \
>> - } \
>> +/*
>> + Get the next character from a base64 encoded string
>> +
>> + SYNOPSIS
>> + my_base64_decoder_getch
>> +
>> + decoder Pointer to MY_BASE64_DECODER
>> +
>> + DESCRIPTION
>> + Skip spaces, then scan the next base64 character or a pad character
>> + and collect bits into decoder->c.
>> +
>> + RETURN VALUE
>> + 0 - error (unexpected character or unexpected end-of-input found)
>> + 1 - success (valid base64 encoding character found)
>> +*/
>> +static inline int
>> +my_base64_decoder_getch(MY_BASE64_DECODER *decoder)
>> +{
>> + if (!my_base64_decoder_skip_spaces(decoder))
>> + {
>> + if (decoder->state > 0)
>> + decoder->error= 1; /* Unexpected end-of-input found */
>> + return 0;
>> + }
>> +
>> + decoder->c <<= 6;
>> + decoder->c+= my_base64_pos(*decoder->src++, &decoder->error);
>> +
>> + if (!decoder->error) /* Valid base64 character found */
>> + {
>> + if (decoder->mark)
>> + {
>> + /* If we have scanned '=' already, then only '=' is valid */
>> + DBUG_ASSERT(decoder->state == 3);
>> + decoder->error= 1;
>> + decoder->src--;
>> + return 0; /* '=' expected */
>> + }
>> + decoder->state++;
>> + return 1;
>> + }
>> +
>> + switch (decoder->state) {
> Coding style says:
> Indent switch like this:
>
> switch (condition)
> {
> case XXX:
> statements;
> case YYY:
> {
> statements;
> }
> }
>
>> + case 0:
>> + case 1:
>> + decoder->src--;
>> + return 0; /* base64 character expected */
>> + break;
>> +
>> + case 2:
>> + case 3:
>> + if (decoder->src[-1] == '=')
> Afair there is some document, which says that negative index is
> insecure and suggests not to use them. Unfortunately I didn't read
> this document and lost it's link. So I don't know details.
It is insecure in exactly the same extent with too-big positive index.
As far as you stay in the valid input data range, both are fine.
Case values '2' and '3 guarantee that we are at least 2 bytes
from the beginning, so src[-1] is *within* the valid data range,
i.e. not before the beginning.
>
>> + {
>> + decoder->error= 0;
>> + decoder->mark++;
>> + }
>> + else
>> + {
>> + decoder->src--;
>> + return 0; /* base64 character or '=' expected */
>> + }
>> + break;
>> +
>> + default:
>> + DBUG_ASSERT(0);
>> + }
>> +
>> + decoder->state++;
>> + return 1;
>> }
>>
>>
>> @@ -155,75 +344,40 @@ int
>> base64_decode(const char *src_base, size_t len,
>> void *dst, const char **end_ptr)
>> {
>> - char b[3];
>> - size_t i= 0;
>> - char *dst_base= (char *)dst;
>> - char const *src= src_base;
>> - char *d= dst_base;
>> - size_t j;
>> -
>> - while (i < len)
>> + char *d= (char*) dst;
>> + MY_BASE64_DECODER decoder;
>> +
>> + decoder.src= src_base;
>> + decoder.end= src_base + len;
>> + decoder.error= 0;
>> + decoder.mark= 0;
>> +
>> + for ( ; ; )
>> {
>> - unsigned c= 0;
>> - size_t mark= 0;
>> -
>> - SKIP_SPACE(src, i, len);
>> -
>> - c += pos(*src++);
>> - c <<= 6;
>> - i++;
>> -
>> - SKIP_SPACE(src, i, len);
>> -
>> - c += pos(*src++);
>> - c <<= 6;
>> - i++;
>> -
>> - SKIP_SPACE(src, i, len);
>> -
>> - if (*src != '=')
>> - c += pos(*src++);
>> - else
>> - {
>> - src += 2; /* There should be two bytes padding */
>> - i= len;
>> - mark= 2;
>> - c <<= 6;
>> - goto end;
>> - }
>> - c <<= 6;
>> - i++;
>> -
>> - SKIP_SPACE(src, i, len);
>> -
>> - if (*src != '=')
>> - c += pos(*src++);
>> - else
>> + decoder.c= 0;
>> + decoder.state= 0;
>> +
>> + if (!my_base64_decoder_getch(&decoder) ||
>> + !my_base64_decoder_getch(&decoder) ||
>> + !my_base64_decoder_getch(&decoder) ||
>> + !my_base64_decoder_getch(&decoder))
>> + break;
>> +
>> + *d++= (decoder.c >> 16) & 0xff;
>> + *d++= (decoder.c >> 8) & 0xff;
>> + *d++= (decoder.c >> 0) & 0xff;
>> +
>> + if (decoder.mark)
>> {
>> - src += 1; /* There should be one byte padding */
>> - i= len;
>> - mark= 1;
>> - goto end;
>> + d-= decoder.mark;
>> + break;
>> }
>> - i++;
>> -
>> - end:
>> - b[0]= (c >> 16) & 0xff;
>> - b[1]= (c >> 8) & 0xff;
>> - b[2]= (c >> 0) & 0xff;
>> -
>> - for (j=0; j<3-mark; j++)
>> - *d++= b[j];
>> }
>> -
>> +
>> if (end_ptr != NULL)
>> - *end_ptr= src;
>> -
>> - /*
>> - The variable 'i' is set to 'len' when padding has been read, so it
>> - does not actually reflect the number of bytes read from 'src'.
>> - */
>> - return i != len ? -1 : (int) (d - dst_base);
>> + *end_ptr= decoder.src;
>> +
>> + return decoder.error ? -1 : (int) (d - (char*) dst);
>> }
> In your reply to my first review you say: "... for the sake of better
> code readability".
>
> I did a small subjective comparison:
> I had no questions about old implementation after 7 minutes of study.
> I had no questions about new implementation after 137 minutes of study.
I'd expect that, because the old code was much simpler.
I did not do any control what is coming in the input
and accepted garbage.
With input control added, if I didn't use the functions
I'd have to copy a lot of duplicate code,
which would be harder to read.
>
> But new code does what it supposed to do. It doesn't break existing use
> cases. So I'm ok with new implementation.
Thanks :)
>
> Regards,
> Sergey