#At file:///home/bar/mysql-bzr/mysql-6.0.wl4164/
2657 Alexander Barkov 2008-06-24
WL#4164: Two-byte collation IDs
modified:
include/my_handler.h
include/my_sys.h
include/myisam.h
mysql-test/r/ctype_ldml.result
mysql-test/std_data/Index.xml
mysql-test/t/ctype_ldml.test
mysys/charset.c
sql/sql_show.cc
sql/table.cc
sql/unireg.cc
storage/myisam/mi_open.c
strings/conf_to_src.c
per-file comments:
include/my_handler.h
Extending data types
include/my_sys.h
Changing character set array size - 512 should be fine for the near future.
include/myisam.h
Changing data types.
mysql-test/r/ctype_ldml.result
Adding tests.
mysql-test/std_data/Index.xml
Using 2-byte collation IDs for tests
mysql-test/t/ctype_ldml.test
Adding tests
mysys/charset.c
Using a constant instead of an explicit number,
sql/sql_show.cc
Using array_elements() instead of explicit size.
sql/table.cc
Storing high byte of table default collation ID in a vacant slot in FRM file.
sql/unireg.cc
Storing high byte of a key segment collation ID into vacant slots.
storage/myisam/mi_open.c
Storing high byte of a collation ID into a vacant slot.
strings/conf_to_src.c
- using array_elements() instead of explicit number
- changing array size
=== modified file 'include/my_handler.h'
--- a/include/my_handler.h 2008-04-03 13:38:03 +0000
+++ b/include/my_handler.h 2008-06-24 10:02:11 +0000
@@ -53,8 +53,8 @@
uint16 bit_pos; /* Position to bit part */
uint16 flag;
uint16 length; /* Keylength */
+ uint16 language;
uint8 type; /* Type of key (for sort) */
- uint8 language;
uint8 null_bit; /* bitmask to test for NULL */
uint8 bit_start,bit_end; /* if bit field */
uint8 bit_length; /* Length of bit part */
=== modified file 'include/my_sys.h'
--- a/include/my_sys.h 2008-04-01 13:19:15 +0000
+++ b/include/my_sys.h 2008-06-24 10:02:11 +0000
@@ -221,8 +221,9 @@
#endif
/* charsets */
+#define MY_ALL_CHARSETS_SIZE 512
extern CHARSET_INFO *default_charset_info;
-extern CHARSET_INFO *all_charsets[256];
+extern CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE];
extern CHARSET_INFO compiled_charsets[];
/* statistics */
=== modified file 'include/myisam.h'
--- a/include/myisam.h 2008-04-03 13:38:03 +0000
+++ b/include/myisam.h 2008-06-24 10:02:11 +0000
@@ -165,7 +165,7 @@
ulonglong data_file_length;
ulonglong key_file_length;
uint old_options;
- uint8 language;
+ uint16 language;
my_bool with_auto_increment;
} MI_CREATE_INFO;
@@ -408,7 +408,7 @@
uint out_flag,warning_printed,error_printed,verbose;
uint opt_sort_key,total_files,max_level;
uint testflag, key_cache_block_size;
- uint8 language;
+ uint16 language;
my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
my_bool retry_repair, force_sort;
char temp_filename[FN_REFLEN],*isam_file_name;
=== modified file 'mysql-test/r/ctype_ldml.result'
--- a/mysql-test/r/ctype_ldml.result 2007-10-22 11:43:32 +0000
+++ b/mysql-test/r/ctype_ldml.result 2008-06-24 10:02:11 +0000
@@ -8,7 +8,7 @@
character_sets_dir MYSQL_TEST_DIR/std_data/
show collation like 'utf8_test_ci';
Collation Charset Id Default Compiled Sortlen
-utf8_test_ci utf8 253 8
+utf8_test_ci utf8 353 8
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
@@ -17,7 +17,7 @@
drop table t1;
show collation like 'ucs2_test_ci';
Collation Charset Id Default Compiled Sortlen
-ucs2_test_ci ucs2 158 8
+ucs2_test_ci ucs2 358 8
create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
@@ -26,7 +26,7 @@
drop table t1;
show collation like 'utf16_test_ci';
Collation Charset Id Default Compiled Sortlen
-utf16_test_ci utf16 127 8
+utf16_test_ci utf16 327 8
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
@@ -35,7 +35,7 @@
drop table t1;
show collation like 'utf32_test_ci';
Collation Charset Id Default Compiled Sortlen
-utf32_test_ci utf32 191 8
+utf32_test_ci utf32 391 8
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
@@ -45,7 +45,7 @@
Vietnamese experimental collation
show collation like 'ucs2_vn_ci';
Collation Charset Id Default Compiled Sortlen
-ucs2_vn_ci ucs2 159 8
+ucs2_vn_ci ucs2 359 8
create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci);
insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2),
(0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0);
@@ -316,3 +316,29 @@
Xx
YyÝýỲỳỴỵỶỷỸỹ
drop table t1;
+The following tests check that two-byte collation IDs work
+select * from information_schema.collations where id>256 order by id;
+COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
+utf16_test_ci utf16 327 8
+utf8_test_ci utf8 353 8
+ucs2_test_ci ucs2 358 8
+ucs2_vn_ci ucs2 359 8
+utf32_test_ci utf32 391 8
+show collation like '%test%';
+Collation Charset Id Default Compiled Sortlen
+ucs2_test_ci ucs2 358 8
+utf8_test_ci utf8 353 8
+utf16_test_ci utf16 327 8
+utf32_test_ci utf32 391 8
+show collation like 'ucs2_vn_ci';
+Collation Charset Id Default Compiled Sortlen
+ucs2_vn_ci ucs2 359 8
+create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci);
+insert into t1 values (0x0061);
+set @@character_set_results=NULL;
+select * from t1;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def test t1 t1 c1 c1 254 2 2 Y 0 0 359
+c1
+ a
+drop table t1;
=== modified file 'mysql-test/std_data/Index.xml'
--- a/mysql-test/std_data/Index.xml 2007-10-22 11:43:32 +0000
+++ b/mysql-test/std_data/Index.xml 2008-06-24 10:02:11 +0000
@@ -1,7 +1,7 @@
<charsets>
<charset name="utf8">
- <collation name="utf8_test_ci" id="253">
+ <collation name="utf8_test_ci" id="353">
<rules>
<reset>a</reset>
<s>b</s>
@@ -11,7 +11,7 @@
</charset>
<charset name="utf16">
- <collation name="utf16_test_ci" id="127">
+ <collation name="utf16_test_ci" id="327">
<rules>
<reset>a</reset>
<s>b</s>
@@ -20,7 +20,7 @@
</charset>
<charset name="utf32">
- <collation name="utf32_test_ci" id="191">
+ <collation name="utf32_test_ci" id="391">
<rules>
<reset>a</reset>
<s>b</s>
@@ -31,13 +31,13 @@
<charset name="ucs2">
- <collation name="ucs2_test_ci" id="158">
+ <collation name="ucs2_test_ci" id="358">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
- <collation name="ucs2_vn_ci" id="159">
+ <collation name="ucs2_vn_ci" id="359">
<!-- Vietnamese experimental collation -->
<rules>
<reset>A</reset>
=== modified file 'mysql-test/t/ctype_ldml.test'
--- a/mysql-test/t/ctype_ldml.test 2007-10-22 11:43:32 +0000
+++ b/mysql-test/t/ctype_ldml.test 2008-06-24 10:02:11 +0000
@@ -77,3 +77,21 @@
select group_concat(hex(c1) order by hex(c1)) from t1 group by c1;
select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1;
drop table t1;
+
+
+-- echo The following tests check that two-byte collation IDs work
+-- The file ../std-data/Index.xml has a number of collations with high IDs.
+
+# Test that the "ID" column in I_S and SHOW queries can handle two bytes
+select * from information_schema.collations where id>256 order by id;
+show collation like '%test%';
+
+# Test that two-byte collation ID is correctly transfered to the client side.
+show collation like 'ucs2_vn_ci';
+create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci);
+insert into t1 values (0x0061);
+--enable_metadata
+set @@character_set_results=NULL;
+select * from t1;
+--disable_metadata
+drop table t1;
=== modified file 'mysys/charset.c'
--- a/mysys/charset.c 2008-02-19 21:53:33 +0000
+++ b/mysys/charset.c 2008-06-24 10:02:11 +0000
@@ -410,7 +410,7 @@
DBUG_RETURN(res);
}
-CHARSET_INFO *all_charsets[256];
+CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE];
CHARSET_INFO *default_charset_info = &my_charset_latin1;
void add_compiled_collation(CHARSET_INFO *cs)
=== modified file 'sql/sql_show.cc'
--- a/sql/sql_show.cc 2008-06-07 09:21:15 +0000
+++ b/sql/sql_show.cc 2008-06-24 10:02:11 +0000
@@ -4017,7 +4017,9 @@
TABLE *table= tables->table;
CHARSET_INFO *scs= system_charset_info;
- for (cs= all_charsets ; cs < all_charsets+255 ; cs++)
+ for (cs= all_charsets ;
+ cs < all_charsets + array_elements(all_charsets) ;
+ cs++)
{
CHARSET_INFO *tmp_cs= cs[0];
if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) &&
@@ -4097,7 +4099,9 @@
const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS;
TABLE *table= tables->table;
CHARSET_INFO *scs= system_charset_info;
- for (cs= all_charsets ; cs < all_charsets+255 ; cs++ )
+ for (cs= all_charsets ;
+ cs < all_charsets + array_elements(all_charsets) ;
+ cs++ )
{
CHARSET_INFO **cl;
CHARSET_INFO *tmp_cs= cs[0];
@@ -4105,7 +4109,9 @@
(tmp_cs->state & MY_CS_HIDDEN) ||
!(tmp_cs->state & MY_CS_PRIMARY))
continue;
- for (cl= all_charsets; cl < all_charsets+255 ;cl ++)
+ for (cl= all_charsets;
+ cl < all_charsets + array_elements(all_charsets) ;
+ cl ++)
{
CHARSET_INFO *tmp_cl= cl[0];
if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
@@ -4138,14 +4144,18 @@
CHARSET_INFO **cs;
TABLE *table= tables->table;
CHARSET_INFO *scs= system_charset_info;
- for (cs= all_charsets ; cs < all_charsets+255 ; cs++ )
+ for (cs= all_charsets ;
+ cs < all_charsets + array_elements(all_charsets) ;
+ cs++ )
{
CHARSET_INFO **cl;
CHARSET_INFO *tmp_cs= cs[0];
if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) ||
!(tmp_cs->state & MY_CS_PRIMARY))
continue;
- for (cl= all_charsets; cl < all_charsets+255 ;cl ++)
+ for (cl= all_charsets;
+ cl < all_charsets + array_elements(all_charsets) ;
+ cl ++)
{
CHARSET_INFO *tmp_cl= cl[0];
if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2008-04-01 13:19:15 +0000
+++ b/sql/table.cc 2008-06-24 10:02:11 +0000
@@ -722,7 +722,8 @@
share->transactional= (ha_choice) (head[39] & 3);
share->page_checksum= (ha_choice) ((head[39] >> 2) & 3);
share->row_type= (row_type) head[40];
- share->table_charset= get_charset((uint) head[38],MYF(0));
+ share->table_charset= get_charset((((uint) head[41]) << 8) +
+ (uint) head[38],MYF(0));
share->null_field_first= 1;
}
if (!share->table_charset)
@@ -1259,12 +1260,13 @@
}
else
{
- if (!strpos[14])
+ uint csid= strpos[14] + (((uint) strpos[11]) << 8);
+ if (!csid)
charset= &my_charset_bin;
- else if (!(charset=get_charset((uint) strpos[14], MYF(0))))
+ else if (!(charset= get_charset(csid, MYF(0))))
{
error= 5; // Unknown or unavailable charset
- errarg= (int) strpos[14];
+ errarg= (int) csid;
goto err;
}
}
@@ -2533,8 +2535,7 @@
if ((file= my_create(name, CREATE_MODE, create_flags, MYF(0))) >= 0)
{
- uint key_length, tmp_key_length;
- uint tmp;
+ uint key_length, tmp_key_length, tmp, csid;
bzero((char*) fileinfo,64);
/* header */
fileinfo[0]=(uchar) 254;
@@ -2583,13 +2584,14 @@
fileinfo[32]=0; // No filename anymore
fileinfo[33]=5; // Mark for 5.0 frm file
int4store(fileinfo+34,create_info->avg_row_length);
- fileinfo[38]= (create_info->default_table_charset ?
- create_info->default_table_charset->number : 0);
+ csid= (create_info->default_table_charset ?
+ create_info->default_table_charset->number : 0);
+ fileinfo[38]= (uchar) csid;
fileinfo[39]= (uchar) ((uint) create_info->transactional |
((uint) create_info->page_checksum << 2));
fileinfo[40]= (uchar) create_info->row_type;
/* Next few bytes where for RAID support */
- fileinfo[41]= 0;
+ fileinfo[41]= (uchar) (csid >> 8);
fileinfo[42]= 0;
fileinfo[43]= 0;
fileinfo[44]= 0;
=== modified file 'sql/unireg.cc'
--- a/sql/unireg.cc 2008-04-01 13:19:15 +0000
+++ b/sql/unireg.cc 2008-06-24 10:02:11 +0000
@@ -875,20 +875,27 @@
recpos= field->offset+1 + (uint) data_offset;
int3store(buff+5,recpos);
int2store(buff+8,field->pack_flag);
- int2store(buff+10,field->unireg_check);
+ DBUG_ASSERT(field->unireg_check < 256);
+ buff[10]= (uchar) field->unireg_check;
buff[12]= (uchar) field->interval_id;
buff[13]= (uchar) field->sql_type;
if (field->sql_type == MYSQL_TYPE_GEOMETRY)
{
+ buff[11]= 0;
buff[14]= (uchar) field->geom_type;
#ifndef HAVE_SPATIAL
DBUG_ASSERT(0); // Should newer happen
#endif
}
else if (field->charset)
+ {
+ buff[11]= (uchar) (field->charset->number >> 8);
buff[14]= (uchar) field->charset->number;
+ }
else
- buff[14]= 0; // Numerical
+ {
+ buff[11]= buff[14]= 0; // Numerical
+ }
int2store(buff+15, field->comment.length);
comment_length+= field->comment.length;
set_if_bigger(int_count,field->interval_id);
=== modified file 'storage/myisam/mi_open.c'
--- a/storage/myisam/mi_open.c 2008-04-01 13:19:15 +0000
+++ b/storage/myisam/mi_open.c 2008-06-24 10:02:11 +0000
@@ -1120,10 +1120,10 @@
ulong pos;
*ptr++= keyseg->type;
- *ptr++= keyseg->language;
+ *ptr++= keyseg->language & 0xFF; /* Collation ID, low byte */
*ptr++= keyseg->null_bit;
*ptr++= keyseg->bit_start;
- *ptr++= keyseg->bit_end;
+ *ptr++= keyseg->language >> 8; /* Collation ID, high byte */
*ptr++= keyseg->bit_length;
mi_int2store(ptr,keyseg->flag); ptr+=2;
mi_int2store(ptr,keyseg->length); ptr+=2;
@@ -1142,12 +1142,13 @@
keyseg->language = *ptr++;
keyseg->null_bit = *ptr++;
keyseg->bit_start = *ptr++;
- keyseg->bit_end = *ptr++;
+ keyseg->language += ((uint16) (*ptr++)) << 8;
keyseg->bit_length = *ptr++;
keyseg->flag = mi_uint2korr(ptr); ptr +=2;
keyseg->length = mi_uint2korr(ptr); ptr +=2;
keyseg->start = mi_uint4korr(ptr); ptr +=4;
keyseg->null_pos = mi_uint4korr(ptr); ptr +=4;
+ keyseg->bit_end= 0;
keyseg->charset=0; /* Will be filled in later */
if (keyseg->null_bit)
keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
=== modified file 'strings/conf_to_src.c'
--- a/strings/conf_to_src.c 2007-08-24 23:25:50 +0000
+++ b/strings/conf_to_src.c 2008-06-24 10:02:11 +0000
@@ -23,7 +23,7 @@
#define ROW16_LEN 8
#define MAX_BUF 64*1024
-static CHARSET_INFO all_charsets[256];
+static CHARSET_INFO all_charsets[512];
void
@@ -63,7 +63,9 @@
static int get_charset_number(const char *charset_name)
{
CHARSET_INFO *cs;
- for (cs= all_charsets; cs < all_charsets+255; ++cs)
+ for (cs= all_charsets;
+ cs < all_charsets + array_elements(all_charsets);
+ cs++)
{
if ( cs->name && !strcmp(cs->name, charset_name))
return cs->number;
@@ -290,7 +292,9 @@
sprintf(filename,"%s/%s",argv[1],"Index.xml");
my_read_charset_file(filename);
- for (cs=all_charsets; cs < all_charsets+256; cs++)
+ for (cs= all_charsets;
+ cs < all_charsets + array_elements(all_charsets);
+ cs++)
{
if (cs->number && !(cs->state & MY_CS_COMPILED))
{
@@ -315,7 +319,9 @@
fprintf(f,"#include <m_ctype.h>\n\n");
- for (cs=all_charsets; cs < all_charsets+256; cs++)
+ for (cs= all_charsets;
+ cs < all_charsets + array_elements(all_charsets);
+ cs++)
{
if (simple_cs_is_full(cs))
{
@@ -332,7 +338,9 @@
}
fprintf(f,"CHARSET_INFO compiled_charsets[] = {\n");
- for (cs=all_charsets; cs < all_charsets+256; cs++)
+ for (cs= all_charsets;
+ cs < all_charsets + array_elements(all_charsets);
+ cs++)
{
if (simple_cs_is_full(cs))
{