From: ingo Date: March 23 2007 8:54am Subject: bk commit into 4.1 tree (istruewing:1.2609) BUG#24985 List-Archive: http://lists.mysql.com/commits/22734 X-Bug: 24985 Message-Id: Below is the list of changes that have just been committed into a local 4.1 repository of istruewing. When istruewing does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2007-03-23 09:54:49+01:00, istruewing@stripped +5 -0 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Keys for a BTREE index on an ENUM column of a MEMORY table with character set UTF8 were computed incorrectly. Many different column values got the same key value. Apart of possible performance problems it made unique indexes of this type unusable because it rejected many different values as duplicates. The problem was that multibyte character detection was tried on the internal numeric column value. Many values were not identified as characters. Their key value became blank filled. The solution is modelled after the equivalent behavior of MyISAM. A character set is defined for key segments of text type only. Binary segments get a pseudo binary charset attached. All other types do not get a charset attached. heap/hp_hash.c@stripped, 2007-03-23 09:54:46+01:00, istruewing@stripped +11 -4 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Added tests for unset HA_KEYSEG::charset. Added DBUG_ASSERT for HA_KEYSEG::charset to be set in key segments of text type. Fixed a compiler warning. heap/hp_write.c@stripped, 2007-03-23 09:54:47+01:00, istruewing@stripped +4 -4 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Fixed two compiler warnings. mysql-test/r/heap_btree.result@stripped, 2007-03-23 09:54:47+01:00, istruewing@stripped +6 -0 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Added test result. mysql-test/t/heap_btree.test@stripped, 2007-03-23 09:54:47+01:00, istruewing@stripped +11 -0 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Added test. sql/ha_heap.cc@stripped, 2007-03-23 09:54:47+01:00, istruewing@stripped +12 -1 Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE causes incorrect duplicate entries Added code to set up the character set for key segments. This is modelled after the equivalent code snippet in mi_open(). # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: istruewing # Host: chilla.local # Root: /home/mydev/mysql-4.1-bug24985 --- 1.36/heap/hp_hash.c 2007-03-23 09:54:52 +01:00 +++ 1.37/heap/hp_hash.c 2007-03-23 09:54:52 +01:00 @@ -120,7 +120,7 @@ byte *hp_search(HP_INFO *info, HP_KEYDEF { switch (nextflag) { case 0: /* Search after key */ - DBUG_PRINT("exit",("found key at %d",pos->ptr_to_rec)); + DBUG_PRINT("exit",("found key at 0x%lx", (long) pos->ptr_to_rec)); info->current_hash_ptr=pos; DBUG_RETURN(info->current_ptr= pos->ptr_to_rec); case 1: /* Search next */ @@ -263,6 +263,7 @@ ulong hp_hashnr(register HP_KEYDEF *keyd { CHARSET_INFO *cs= seg->charset; uint char_length= (uint) ((uchar*) key - pos); + DBUG_ASSERT(cs); if (cs->mbmaxlen > 1) { uint length= char_length; @@ -306,6 +307,7 @@ ulong hp_rec_hashnr(register HP_KEYDEF * { CHARSET_INFO *cs= seg->charset; uint char_length= seg->length; + DBUG_ASSERT(cs); if (cs->mbmaxlen > 1) { char_length= my_charpos(cs, pos, pos + char_length, @@ -438,6 +440,7 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, co uint char_length2; uchar *pos1= (uchar*)rec1 + seg->start; uchar *pos2= (uchar*)rec2 + seg->start; + DBUG_ASSERT(cs); if (cs->mbmaxlen > 1) { uint char_length= seg->length / cs->mbmaxlen; @@ -488,6 +491,7 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uint char_length_key; uint char_length_rec; uchar *pos= (uchar*) rec + seg->start; + DBUG_ASSERT(cs); if (cs->mbmaxlen > 1) { uint char_length= seg->length / cs->mbmaxlen; @@ -530,7 +534,8 @@ void hp_make_key(HP_KEYDEF *keydef, byte uchar *pos= (uchar*) rec + seg->start; if (seg->null_bit) *key++= test(rec[seg->null_pos] & seg->null_bit); - if (cs->mbmaxlen > 1) + /* Charset is defined for text and binary key segments only. */ + if (cs && (cs->mbmaxlen > 1)) { char_length= my_charpos(cs, pos, pos + seg->length, char_length / cs->mbmaxlen); @@ -594,7 +599,8 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, b continue; } char_length= seg->length; - if (seg->charset->mbmaxlen > 1) + /* Charset is defined for text and binary key segments only. */ + if (seg->charset && (seg->charset->mbmaxlen > 1)) { char_length= my_charpos(seg->charset, rec + seg->start, rec + seg->start + char_length, @@ -644,7 +650,8 @@ uint hp_rb_pack_key(HP_KEYDEF *keydef, u continue; } char_length= seg->length; - if (seg->charset->mbmaxlen > 1) + /* Charset is defined for text and binary key segments only. */ + if (seg->charset && (seg->charset->mbmaxlen > 1)) { char_length= my_charpos(seg->charset, old, old+char_length, char_length / seg->charset->mbmaxlen); --- 1.21/heap/hp_write.c 2007-03-23 09:54:52 +01:00 +++ 1.22/heap/hp_write.c 2007-03-23 09:54:52 +01:00 @@ -144,7 +144,7 @@ static byte *next_free_record_pos(HP_SHA pos=info->del_link; info->del_link= *((byte**) pos); info->deleted--; - DBUG_PRINT("exit",("Used old position: %lx",pos)); + DBUG_PRINT("exit",("Used old position: 0x%lx", (long) pos)); DBUG_RETURN(pos); } if (!(block_pos=(info->records % info->block.records_in_block))) @@ -159,9 +159,9 @@ static byte *next_free_record_pos(HP_SHA DBUG_RETURN(NULL); info->data_length+=length; } - DBUG_PRINT("exit",("Used new position: %lx", - (byte*) info->block.level_info[0].last_blocks+block_pos* - info->block.recbuffer)); + DBUG_PRINT("exit",("Used new position: 0x%lx", + (long) info->block.level_info[0].last_blocks + + block_pos * info->block.recbuffer)); DBUG_RETURN((byte*) info->block.level_info[0].last_blocks+ block_pos*info->block.recbuffer); } --- 1.61/sql/ha_heap.cc 2007-03-23 09:54:52 +01:00 +++ 1.62/sql/ha_heap.cc 2007-03-23 09:54:52 +01:00 @@ -549,7 +549,18 @@ int ha_heap::create(const char *name, TA seg->start= (uint) key_part->offset; seg->length= (uint) key_part->length; seg->flag = 0; - seg->charset= field->charset(); + /* + Set the character set for the key segment. Most segment types do + not have/need a charset. For text segments the field has a valid + charset. For binary segments we can use a pseudo charset. + */ + seg->charset= NULL; + if (seg->type == HA_KEYTYPE_TEXT || seg->type == HA_KEYTYPE_VARTEXT) + { + seg->charset= field->charset(); + } + else if (seg->type == HA_KEYTYPE_BINARY) + seg->charset= &my_charset_bin; if (field->null_ptr) { seg->null_bit= field->null_bit; --- 1.19/mysql-test/r/heap_btree.result 2007-03-23 09:54:53 +01:00 +++ 1.20/mysql-test/r/heap_btree.result 2007-03-23 09:54:53 +01:00 @@ -280,4 +280,10 @@ a 1 1 drop table t1; +CREATE TABLE t1 ( +c1 ENUM('1', '2'), +UNIQUE USING BTREE(c1) +) ENGINE= MEMORY DEFAULT CHARSET= utf8; +INSERT INTO t1 VALUES('1'), ('2'); +DROP TABLE t1; End of 4.1 tests --- 1.15/mysql-test/t/heap_btree.test 2007-03-23 09:54:53 +01:00 +++ 1.16/mysql-test/t/heap_btree.test 2007-03-23 09:54:53 +01:00 @@ -182,4 +182,15 @@ delete from t1 where a >= 2; select a from t1 order by a; drop table t1; +# +# Bug#24985 - UTF8 ENUM primary key on MEMORY using BTREE +# causes incorrect duplicate entries +# +CREATE TABLE t1 ( + c1 ENUM('1', '2'), + UNIQUE USING BTREE(c1) +) ENGINE= MEMORY DEFAULT CHARSET= utf8; +INSERT INTO t1 VALUES('1'), ('2'); +DROP TABLE t1; + --echo End of 4.1 tests