From: Date: December 9 2005 1:38pm Subject: bk commit into 4.1 tree (bar:1.2488) BUG#15377 List-Archive: http://lists.mysql.com/commits/54 X-Bug: 15377 Message-Id: <200512091238.jB9Cc4nn056092@bar.intranet.mysql.r18.ru> Below is the list of changes that have just been committed into a local 4.1 repository of bar. When bar does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2488 05/12/09 16:37:58 bar@stripped +10 -0 Bug#15377 Valid multibyte sequences are truncated on INSERT ctype-euc_kr.c: ctype-gb2312.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set specifications but just don't have Unicode mapping. Previously only those which have Unicode mapping could be stored, while unassigned characters lead to data truncation. Many files: new file strings/ctype-euc_kr.c 1.61 05/12/09 16:35:57 bar@stripped +36 -1 Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored. strings/ctype-gb2312.c 1.58 05/12/09 16:34:08 bar@stripped +36 -1 Bug#15377 Valid multibyte sequences are truncated on INSERT Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored. mysql-test/t/ctype_gb2312.test 1.1 05/12/09 16:33:43 bar@stripped +33 -0 mysql-test/t/ctype_euckr.test 1.1 05/12/09 16:33:43 bar@stripped +33 -0 mysql-test/t/ctype_gb2312.test 1.0 05/12/09 16:33:43 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/t/ctype_gb2312.test mysql-test/t/ctype_euckr.test 1.0 05/12/09 16:33:43 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/t/ctype_euckr.test mysql-test/include/have_gb2312.inc 1.1 05/12/09 16:33:31 bar@stripped +4 -0 mysql-test/include/have_euckr.inc 1.1 05/12/09 16:33:31 bar@stripped +4 -0 mysql-test/include/have_gb2312.inc 1.0 05/12/09 16:33:31 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/include/have_gb2312.inc mysql-test/include/have_euckr.inc 1.0 05/12/09 16:33:31 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/include/have_euckr.inc mysql-test/r/have_gb2312.require 1.1 05/12/09 16:33:14 bar@stripped +2 -0 mysql-test/r/have_euckr.require 1.1 05/12/09 16:33:14 bar@stripped +2 -0 mysql-test/r/ctype_gb2312.result 1.1 05/12/09 16:33:14 bar@stripped +167 -0 mysql-test/r/have_gb2312.require 1.0 05/12/09 16:33:14 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/r/have_gb2312.require mysql-test/r/have_euckr.require 1.0 05/12/09 16:33:14 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/r/have_euckr.require mysql-test/r/ctype_gb2312.result 1.0 05/12/09 16:33:14 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/r/ctype_gb2312.result mysql-test/r/ctype_euckr.result 1.1 05/12/09 16:33:13 bar@stripped +167 -0 mysql-test/r/ctype_euckr.result 1.0 05/12/09 16:33:13 bar@stripped +0 -0 BitKeeper file /usr/home/bar/mysql-4.1.b15377/mysql-test/r/ctype_euckr.result # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: bar # Host: bar.intranet.mysql.r18.ru # Root: /usr/home/bar/mysql-4.1.b15377 --- 1.60/strings/ctype-euc_kr.c 2005-08-17 13:26:28 +05:00 +++ 1.61/strings/ctype-euc_kr.c 2005-12-09 16:35:57 +04:00 @@ -8635,6 +8635,41 @@ } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -8655,7 +8690,7 @@ mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_euckr, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_euc_kr, /* mb_wc */ --- 1.57/strings/ctype-gb2312.c 2005-08-17 13:26:28 +05:00 +++ 1.58/strings/ctype-gb2312.c 2005-12-09 16:34:08 +04:00 @@ -5686,6 +5686,41 @@ } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -5706,7 +5741,7 @@ mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_gb2312, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_gb2312, /* mb_wc */ --- New file --- +++ mysql-test/include/have_euckr.inc 05/12/09 16:33:31 -- require r/have_euckr.require disable_query_log; show collation like "euckr_korean_ci"; enable_query_log; --- New file --- +++ mysql-test/include/have_gb2312.inc 05/12/09 16:33:31 -- require r/have_gb2312.require disable_query_log; show collation like "gb2312_chinese_ci"; enable_query_log; --- New file --- +++ mysql-test/r/ctype_euckr.result 05/12/09 16:33:13 drop table if exists t1; SET @test_character_set= 'euckr'; SET @test_collation= 'euckr_korean_ci'; SET @safe_character_set_server= @@character_set_server; SET @safe_collation_server= @@collation_server; SET character_set_server= @test_character_set; SET collation_server= @test_collation; CREATE DATABASE d1; USE d1; CREATE TABLE t1 (c CHAR(10), KEY(c)); SHOW FULL COLUMNS FROM t1; Field Type Collation Null Key Default Extra Privileges Comment c char(10) euckr_korean_ci YES MUL NULL INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; want3results aaa aaaa aaaaa DROP TABLE t1; CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); SHOW FULL COLUMNS FROM t1; Field Type Collation Null Key Default Extra Privileges Comment c1 varchar(15) euckr_korean_ci YES MUL NULL INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); SELECT c1 as want3results from t1 where c1 like 'l%'; want3results location loberge lotre SELECT c1 as want3results from t1 where c1 like 'lo%'; want3results location loberge lotre SELECT c1 as want1result from t1 where c1 like 'loc%'; want1result location SELECT c1 as want1result from t1 where c1 like 'loca%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locat%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locati%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locatio%'; want1result location SELECT c1 as want1result from t1 where c1 like 'location%'; want1result location DROP TABLE t1; DROP DATABASE d1; USE test; SET character_set_server= @safe_character_set_server; SET collation_server= @safe_collation_server; SET NAMES euckr; SET collation_connection='euckr_korean_ci'; create table t1 select repeat('a',4000) a; delete from t1; insert into t1 values ('a'), ('a '), ('a\t'); select collation(a),hex(a) from t1 order by a; collation(a) hex(a) euckr_korean_ci 6109 euckr_korean_ci 61 euckr_korean_ci 6120 drop table t1; create table t1 engine=innodb select repeat('a',50) as c1; alter table t1 add index(c1(5)); insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); select collation(c1) from t1 limit 1; collation(c1) euckr_korean_ci select c1 from t1 where c1 like 'abcdef%' order by c1; c1 abcdefg select c1 from t1 where c1 like 'abcde1%' order by c1; c1 abcde100 abcde110 abcde111 select c1 from t1 where c1 like 'abcde11%' order by c1; c1 abcde110 abcde111 select c1 from t1 where c1 like 'abcde111%' order by c1; c1 abcde111 drop table t1; select @@collation_connection; @@collation_connection euckr_korean_ci create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; insert into t1 values('abcdef'); insert into t1 values('_bcdef'); insert into t1 values('a_cdef'); insert into t1 values('ab_def'); insert into t1 values('abc_ef'); insert into t1 values('abcd_f'); insert into t1 values('abcde_'); select c1 as c1u from t1 where c1 like 'ab\_def'; c1u ab_def select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; SET collation_connection='euckr_bin'; create table t1 select repeat('a',4000) a; delete from t1; insert into t1 values ('a'), ('a '), ('a\t'); select collation(a),hex(a) from t1 order by a; collation(a) hex(a) euckr_bin 6109 euckr_bin 61 euckr_bin 6120 drop table t1; create table t1 engine=innodb select repeat('a',50) as c1; alter table t1 add index(c1(5)); insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); select collation(c1) from t1 limit 1; collation(c1) euckr_bin select c1 from t1 where c1 like 'abcdef%' order by c1; c1 abcdefg select c1 from t1 where c1 like 'abcde1%' order by c1; c1 abcde100 abcde110 abcde111 select c1 from t1 where c1 like 'abcde11%' order by c1; c1 abcde110 abcde111 select c1 from t1 where c1 like 'abcde111%' order by c1; c1 abcde111 drop table t1; select @@collation_connection; @@collation_connection euckr_bin create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; insert into t1 values('abcdef'); insert into t1 values('_bcdef'); insert into t1 values('a_cdef'); insert into t1 values('ab_def'); insert into t1 values('abc_ef'); insert into t1 values('abcd_f'); insert into t1 values('abcde_'); select c1 as c1u from t1 where c1 like 'ab\_def'; c1u ab_def select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; SET NAMES euckr; CREATE TABLE t1 (a text) character set euckr; INSERT INTO t1 VALUES (0xA2E6),(0xFEF7); SELECT hex(a) FROM t1 ORDER BY a; hex(a) A2E6 FEF7 DROP TABLE t1; --- New file --- +++ mysql-test/r/ctype_gb2312.result 05/12/09 16:33:14 drop table if exists t1; SET @test_character_set= 'gb2312'; SET @test_collation= 'gb2312_chinese_ci'; SET @safe_character_set_server= @@character_set_server; SET @safe_collation_server= @@collation_server; SET character_set_server= @test_character_set; SET collation_server= @test_collation; CREATE DATABASE d1; USE d1; CREATE TABLE t1 (c CHAR(10), KEY(c)); SHOW FULL COLUMNS FROM t1; Field Type Collation Null Key Default Extra Privileges Comment c char(10) gb2312_chinese_ci YES MUL NULL INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa'); SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%'; want3results aaa aaaa aaaaa DROP TABLE t1; CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2))); SHOW FULL COLUMNS FROM t1; Field Type Collation Null Key Default Extra Privileges Comment c1 varchar(15) gb2312_chinese_ci YES MUL NULL INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab'); SELECT c1 as want3results from t1 where c1 like 'l%'; want3results location loberge lotre SELECT c1 as want3results from t1 where c1 like 'lo%'; want3results location loberge lotre SELECT c1 as want1result from t1 where c1 like 'loc%'; want1result location SELECT c1 as want1result from t1 where c1 like 'loca%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locat%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locati%'; want1result location SELECT c1 as want1result from t1 where c1 like 'locatio%'; want1result location SELECT c1 as want1result from t1 where c1 like 'location%'; want1result location DROP TABLE t1; DROP DATABASE d1; USE test; SET character_set_server= @safe_character_set_server; SET collation_server= @safe_collation_server; SET NAMES gb2312; SET collation_connection='gb2312_chinese_ci'; create table t1 select repeat('a',4000) a; delete from t1; insert into t1 values ('a'), ('a '), ('a\t'); select collation(a),hex(a) from t1 order by a; collation(a) hex(a) gb2312_chinese_ci 6109 gb2312_chinese_ci 61 gb2312_chinese_ci 6120 drop table t1; create table t1 engine=innodb select repeat('a',50) as c1; alter table t1 add index(c1(5)); insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); select collation(c1) from t1 limit 1; collation(c1) gb2312_chinese_ci select c1 from t1 where c1 like 'abcdef%' order by c1; c1 abcdefg select c1 from t1 where c1 like 'abcde1%' order by c1; c1 abcde100 abcde110 abcde111 select c1 from t1 where c1 like 'abcde11%' order by c1; c1 abcde110 abcde111 select c1 from t1 where c1 like 'abcde111%' order by c1; c1 abcde111 drop table t1; select @@collation_connection; @@collation_connection gb2312_chinese_ci create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; insert into t1 values('abcdef'); insert into t1 values('_bcdef'); insert into t1 values('a_cdef'); insert into t1 values('ab_def'); insert into t1 values('abc_ef'); insert into t1 values('abcd_f'); insert into t1 values('abcde_'); select c1 as c1u from t1 where c1 like 'ab\_def'; c1u ab_def select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; SET collation_connection='gb2312_bin'; create table t1 select repeat('a',4000) a; delete from t1; insert into t1 values ('a'), ('a '), ('a\t'); select collation(a),hex(a) from t1 order by a; collation(a) hex(a) gb2312_bin 6109 gb2312_bin 61 gb2312_bin 6120 drop table t1; create table t1 engine=innodb select repeat('a',50) as c1; alter table t1 add index(c1(5)); insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111'); select collation(c1) from t1 limit 1; collation(c1) gb2312_bin select c1 from t1 where c1 like 'abcdef%' order by c1; c1 abcdefg select c1 from t1 where c1 like 'abcde1%' order by c1; c1 abcde100 abcde110 abcde111 select c1 from t1 where c1 like 'abcde11%' order by c1; c1 abcde110 abcde111 select c1 from t1 where c1 like 'abcde111%' order by c1; c1 abcde111 drop table t1; select @@collation_connection; @@collation_connection gb2312_bin create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ; insert into t1 values('abcdef'); insert into t1 values('_bcdef'); insert into t1 values('a_cdef'); insert into t1 values('ab_def'); insert into t1 values('abc_ef'); insert into t1 values('abcd_f'); insert into t1 values('abcde_'); select c1 as c1u from t1 where c1 like 'ab\_def'; c1u ab_def select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; SET NAMES gb2312; CREATE TABLE t1 (a text) character set gb2312; INSERT INTO t1 VALUES (0xA2A1),(0xD7FE); SELECT hex(a) FROM t1 ORDER BY a; hex(a) A2A1 D7FE DROP TABLE t1; --- New file --- +++ mysql-test/r/have_euckr.require 05/12/09 16:33:14 Collation Charset Id Default Compiled Sortlen euckr_korean_ci euckr 19 Yes Yes 1 --- New file --- +++ mysql-test/r/have_gb2312.require 05/12/09 16:33:14 Collation Charset Id Default Compiled Sortlen gb2312_chinese_ci gb2312 24 Yes Yes 1 --- New file --- +++ mysql-test/t/ctype_euckr.test 05/12/09 16:33:43 -- source include/have_euckr.inc # # Tests with the euckr character set # --disable_warnings drop table if exists t1; --enable_warnings SET @test_character_set= 'euckr'; SET @test_collation= 'euckr_korean_ci'; -- source include/ctype_common.inc SET NAMES euckr; SET collation_connection='euckr_korean_ci'; -- source include/ctype_filesort.inc -- source include/ctype_innodb_like.inc -- source include/ctype_like_escape.inc SET collation_connection='euckr_bin'; -- source include/ctype_filesort.inc -- source include/ctype_innodb_like.inc -- source include/ctype_like_escape.inc # # Bug#15377 Valid multibyte sequences are truncated on INSERT # SET NAMES euckr; CREATE TABLE t1 (a text) character set euckr; INSERT INTO t1 VALUES (0xA2E6),(0xFEF7); SELECT hex(a) FROM t1 ORDER BY a; DROP TABLE t1; # End of 4.1 tests --- New file --- +++ mysql-test/t/ctype_gb2312.test 05/12/09 16:33:43 -- source include/have_gb2312.inc # # Tests with the gb2312 character set # --disable_warnings drop table if exists t1; --enable_warnings SET @test_character_set= 'gb2312'; SET @test_collation= 'gb2312_chinese_ci'; -- source include/ctype_common.inc SET NAMES gb2312; SET collation_connection='gb2312_chinese_ci'; -- source include/ctype_filesort.inc -- source include/ctype_innodb_like.inc -- source include/ctype_like_escape.inc SET collation_connection='gb2312_bin'; -- source include/ctype_filesort.inc -- source include/ctype_innodb_like.inc -- source include/ctype_like_escape.inc # # Bug#15377 Valid multibyte sequences are truncated on INSERT # SET NAMES gb2312; CREATE TABLE t1 (a text) character set gb2312; INSERT INTO t1 VALUES (0xA2A1),(0xD7FE); SELECT hex(a) FROM t1 ORDER BY a; DROP TABLE t1; # End of 4.1 tests