List:Commits« Previous MessageNext Message »
From:marko.makela Date:June 29 2010 11:33am
Subject:bzr commit into mysql-trunk-innodb branch (marko.makela:3126) Bug#52199
View as plain text  
#At file:///home/marko/innobase/dev/mysql2a/5.5-innodb/ based on revid:vasil.dimov@strippedst4532fdoix

 3126 Marko Mäkelä	2010-06-29
      Bug#52199 utf32: mbminlen=4, mbmaxlen=4, type->mbminlen=0, type->mbmaxlen=4
      
      Merge and adjust a forgotten change to fix this bug.
      rb://393 approved by Jimmy Yang
        ------------------------------------------------------------------------
        r3794 | marko | 2009-01-07 14:14:53 +0000 (Wed, 07 Jan 2009) | 18 lines
      
        branches/6.0: Allow the minimum length of a multi-byte character to be
        up to 4 bytes. (Bug #35391)
      
        dtype_t, dict_col_t: Replace mbminlen:2, mbmaxlen:3 with mbminmaxlen:5.
        In this way, the 5 bits can hold two values of 0..4, and the storage size
        of the fields will not cross the 64-bit boundary.  Encode the values as
        DATA_MBMAX * mbmaxlen + mbminlen.  Define the auxiliary macros
        DB_MBMINLEN(mbminmaxlen), DB_MBMAXLEN(mbminmaxlen), and
        DB_MINMAXLEN(mbminlen, mbmaxlen).
      
        Try to trim and pad UTF-16 and UTF-32 with spaces as appropriate.
      
        Alexander Barkov suggested the use of cs->cset->fill(cs, buff, len, 0x20).
        ha_innobase::store_key_val_for_row() now does that, but the added function
        row_mysql_pad_col() does not, because it doesn't have the MySQL TABLE object.
      
        rb://49 approved by Heikki Tuuri
        ------------------------------------------------------------------------

    added:
      mysql-test/suite/innodb/r/innodb_bug52199.result
      mysql-test/suite/innodb/t/innodb_bug52199.test
    modified:
      storage/innobase/data/data0type.c
      storage/innobase/dict/dict0mem.c
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/handler/handler0alter.cc
      storage/innobase/include/data0type.h
      storage/innobase/include/data0type.ic
      storage/innobase/include/dict0dict.h
      storage/innobase/include/dict0dict.ic
      storage/innobase/include/dict0mem.h
      storage/innobase/include/dict0mem.ic
      storage/innobase/include/row0mysql.h
      storage/innobase/row/row0ins.c
      storage/innobase/row/row0merge.c
      storage/innobase/row/row0mysql.c
      storage/innobase/row/row0row.c
      storage/innobase/row/row0sel.c
      storage/innobase/row/row0upd.c
      storage/innobase/trx/trx0trx.c
=== added file 'mysql-test/suite/innodb/r/innodb_bug52199.result'
--- a/mysql-test/suite/innodb/r/innodb_bug52199.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/r/innodb_bug52199.result	revid:marko.makela@strippedom-20100629113248-fvl48lnzr44z94gg
@@ -0,0 +1,5 @@
+CREATE TABLE bug52199 (a INT NOT NULL,
+b CHAR(125) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL
+)ENGINE=InnoDB;
+CREATE UNIQUE INDEX idx ON bug52199(a);
+DROP TABLE bug52199;

=== added file 'mysql-test/suite/innodb/t/innodb_bug52199.test'
--- a/mysql-test/suite/innodb/t/innodb_bug52199.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/t/innodb_bug52199.test	revid:marko.makela@strippedm-20100629113248-fvl48lnzr44z94gg
@@ -0,0 +1,7 @@
+-- source include/have_innodb.inc
+
+CREATE TABLE bug52199 (a INT NOT NULL,
+b CHAR(125) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL
+)ENGINE=InnoDB;
+CREATE UNIQUE INDEX idx ON bug52199(a);
+DROP TABLE bug52199;

=== modified file 'storage/innobase/data/data0type.c'
--- a/storage/innobase/data/data0type.c	revid:vasil.dimov@stripped125224-v6m9zst4532fdoix
+++ b/storage/innobase/data/data0type.c	revid:marko.makela@strippedlnzr44z94gg
@@ -49,10 +49,8 @@ ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
 	ulint		prtype,		/*!< in: precise type */
-	ulint		mbminlen,	/*!< in: minimum length of a
-					multi-byte character */
-	ulint		mbmaxlen,	/*!< in: maximum length of a
-					multi-byte character */
+	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
+					a multi-byte character */
 	ulint		prefix_len,	/*!< in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
@@ -60,6 +58,9 @@ dtype_get_at_most_n_mbchars(
 	const char*	str)		/*!< in: the string whose prefix
 					length is being determined */
 {
+	ulint	mbminlen = DATA_MBMINLEN(mbminmaxlen);
+	ulint	mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
+
 	ut_a(data_len != UNIV_SQL_NULL);
 	ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
 
@@ -180,7 +181,7 @@ dtype_validate(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	ut_a(type->mbminlen <= type->mbmaxlen);
+	ut_a(dtype_get_mbminlen(type) <= dtype_get_mbmaxlen(type));
 #endif /* !UNIV_HOTBACKUP */
 
 	return(TRUE);

=== modified file 'storage/innobase/dict/dict0mem.c'
--- a/storage/innobase/dict/dict0mem.c	revid:vasil.dimov@stripped6m9zst4532fdoix
+++ b/storage/innobase/dict/dict0mem.c	revid:marko.makela@strippedgg
@@ -206,6 +206,37 @@ dict_mem_table_add_col(
 	dict_mem_fill_column_struct(col, i, mtype, prtype, len);
 }
 
+
+/**********************************************************************//**
+This function populates a dict_col_t memory structure with
+supplied information. */
+UNIV_INTERN
+void
+dict_mem_fill_column_struct(
+/*========================*/
+	dict_col_t*	column,		/*!< out: column struct to be
+					filled */
+	ulint		col_pos,	/*!< in: column position */
+	ulint		mtype,		/*!< in: main data type */
+	ulint		prtype,		/*!< in: precise type */
+	ulint		col_len)	/*!< in: column length */
+{
+#ifndef UNIV_HOTBACKUP
+	ulint	mbminlen;
+	ulint	mbmaxlen;
+#endif /* !UNIV_HOTBACKUP */
+
+	column->ind = (unsigned int) col_pos;
+	column->ord_part = 0;
+	column->mtype = (unsigned int) mtype;
+	column->prtype = (unsigned int) prtype;
+	column->len = (unsigned int) col_len;
+#ifndef UNIV_HOTBACKUP
+        dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
+	dict_col_set_mbminmaxlen(column, mbminlen, mbmaxlen);
+#endif /* !UNIV_HOTBACKUP */
+}
+
 /**********************************************************************//**
 Creates an index memory object.
 @return	own: index object */

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	revid:vasil.dimov@stripped
+++ b/storage/innobase/handler/ha_innodb.cc	revid:marko.makela@stripped
@@ -1036,6 +1036,8 @@ innobase_get_cset_width(
 	if (cs) {
 		*mbminlen = cs->mbminlen;
 		*mbmaxlen = cs->mbmaxlen;
+		ut_ad(*mbminlen < DATA_MBMAX);
+		ut_ad(*mbmaxlen < DATA_MBMAX);
 	} else {
 		THD*	thd = current_thd;
 
@@ -4433,15 +4435,14 @@ ha_innobase::store_key_val_for_row(
 			memcpy(buff, src_start, true_len);
 			buff += true_len;
 
-			/* Pad the unused space with spaces. Note that no
-			padding is ever needed for UCS-2 because in MySQL,
-			all UCS2 characters are 2 bytes, as MySQL does not
-			support surrogate pairs, which are needed to represent
-			characters in the range U+10000 to U+10FFFF. */
+			/* Pad the unused space with spaces. */
 
 			if (true_len < key_len) {
-				ulint pad_len = key_len - true_len;
-				memset(buff, ' ', pad_len);
+				ulint	pad_len = key_len - true_len;
+				ut_a(!(pad_len % cs->mbminlen));
+
+				cs->cset->fill(cs, buff, pad_len,
+					       0x20 /* space */);
 				buff += pad_len;
 			}
 		}
@@ -4550,6 +4551,7 @@ build_template(
 	/* Note that in InnoDB, i is the column number. MySQL calls columns
 	'fields'. */
 	for (i = 0; i < n_fields; i++) {
+		const dict_col_t* col = &index->table->cols[i];
 		templ = prebuilt->mysql_template + n_requested_fields;
 		field = table->field[i];
 
@@ -4598,7 +4600,7 @@ include_field:
 
 		if (index == clust_index) {
 			templ->rec_field_no = dict_col_get_clust_pos(
-				&index->table->cols[i], index);
+				col, index);
 		} else {
 			templ->rec_field_no = dict_index_get_nth_col_pos(
 								index, i);
@@ -4627,7 +4629,7 @@ include_field:
 			mysql_prefix_len = templ->mysql_col_offset
 				+ templ->mysql_col_len;
 		}
-		templ->type = index->table->cols[i].mtype;
+		templ->type = col->mtype;
 		templ->mysql_type = (ulint)field->type();
 
 		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
@@ -4635,12 +4637,10 @@ include_field:
 				(((Field_varstring*)field)->length_bytes);
 		}
 
-		templ->charset = dtype_get_charset_coll(
-			index->table->cols[i].prtype);
-		templ->mbminlen = index->table->cols[i].mbminlen;
-		templ->mbmaxlen = index->table->cols[i].mbmaxlen;
-		templ->is_unsigned = index->table->cols[i].prtype
-							& DATA_UNSIGNED;
+		templ->charset = dtype_get_charset_coll(col->prtype);
+		templ->mbminlen = dict_col_get_mbminlen(col);
+		templ->mbmaxlen = dict_col_get_mbmaxlen(col);
+		templ->is_unsigned = col->prtype & DATA_UNSIGNED;
 		if (templ->type == DATA_BLOB) {
 			prebuilt->templ_contains_blob = TRUE;
 		}

=== modified file 'storage/innobase/handler/handler0alter.cc'
--- a/storage/innobase/handler/handler0alter.cc	revid:vasil.dimov@stripped
+++ b/storage/innobase/handler/handler0alter.cc	revid:marko.makela@stripped
@@ -99,8 +99,10 @@ innobase_col_to_mysql(
 #ifdef UNIV_DEBUG
 	case DATA_MYSQL:
 		ut_ad(flen >= len);
-		ut_ad(col->mbmaxlen >= col->mbminlen);
-		ut_ad(col->mbmaxlen > col->mbminlen || flen == len);
+		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
+		      >= DATA_MBMINLEN(col->mbminmaxlen));
+		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
+		      > DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
 		memcpy(dest, data, len);
 		break;
 

=== modified file 'storage/innobase/include/data0type.h'
--- a/storage/innobase/include/data0type.h	revid:vasil.dimov@stripped9zst4532fdoix
+++ b/storage/innobase/include/data0type.h	revid:marko.makela@stripped94gg
@@ -168,6 +168,17 @@ SQL null*/
 store the charset-collation number; one byte is left unused, though */
 #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
 
+/* Maximum multi-byte character length in bytes, plus 1 */
+#define DATA_MBMAX	5
+
+/* Pack mbminlen, mbmaxlen to mbminmaxlen. */
+#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen)	\
+	((mbmaxlen) * DATA_MBMAX + (mbminlen))
+/* Get mbminlen from mbminmaxlen. */
+#define DATA_MBMINLEN(mbminmaxlen) UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), 1)
+/* Get mbmaxlen from mbminmaxlen. */
+#define DATA_MBMAXLEN(mbminmaxlen) ((mbminmaxlen) / DATA_MBMAX)
+
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Gets the MySQL type code from a dtype.
@@ -187,10 +198,8 @@ ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
 	ulint		prtype,		/*!< in: precise type */
-	ulint		mbminlen,	/*!< in: minimum length of a
-					multi-byte character */
-	ulint		mbmaxlen,	/*!< in: maximum length of a
-					multi-byte character */
+	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
+					a multi-byte character */
 	ulint		prefix_len,	/*!< in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
@@ -335,6 +344,19 @@ dtype_get_mbmaxlen(
 /*===============*/
 	const dtype_t*	type);	/*!< in: type */
 /*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+	dtype_t*	type,		/*!< in/out: type */
+	ulint		mbminlen,	/*!< in: minimum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+	ulint		mbmaxlen);	/*!< in: maximum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+/*********************************************************************//**
 Gets the padding character code for the type.
 @return	padding character code, or ULINT_UNDEFINED if no padding specified */
 UNIV_INLINE
@@ -354,8 +376,8 @@ dtype_get_fixed_size_low(
 	ulint	mtype,		/*!< in: main type */
 	ulint	prtype,		/*!< in: precise type */
 	ulint	len,		/*!< in: length */
-	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
-	ulint	mbmaxlen,	/*!< in: maximum length of a multibyte char */
+	ulint	mbminmaxlen,	/*!< in: minimum and maximum length of a
+				multibyte character, in bytes */
 	ulint	comp);		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
@@ -368,8 +390,8 @@ dtype_get_min_size_low(
 	ulint	mtype,		/*!< in: main type */
 	ulint	prtype,		/*!< in: precise type */
 	ulint	len,		/*!< in: length */
-	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
-	ulint	mbmaxlen);	/*!< in: maximum length of a multibyte char */
+	ulint	mbminmaxlen);	/*!< in: minimum and maximum length of a
+				multibyte character */
 /***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information.
@@ -472,10 +494,11 @@ struct dtype_struct{
 					the string, MySQL uses 1 or 2
 					bytes to store the string length) */
 #ifndef UNIV_HOTBACKUP
-	unsigned	mbminlen:2;	/*!< minimum length of a
-					character, in bytes */
-	unsigned	mbmaxlen:3;	/*!< maximum length of a
-					character, in bytes */
+	unsigned	mbminmaxlen:5;	/*!< minimum and maximum length of a
+					character, in bytes;
+					DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+					mbminlen=DATA_MBMINLEN(mbminmaxlen);
+					mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
 #endif /* !UNIV_HOTBACKUP */
 };
 

=== modified file 'storage/innobase/include/data0type.ic'
--- a/storage/innobase/include/data0type.ic	revid:vasil.dimov@stripped25224-v6m9zst4532fdoix
+++ b/storage/innobase/include/data0type.ic	revid:marko.makela@strippedl48lnzr44z94gg
@@ -93,14 +93,35 @@ dtype_get_mblen(
 		innobase_get_cset_width(dtype_get_charset_coll(prtype),
 					mbminlen, mbmaxlen);
 		ut_ad(*mbminlen <= *mbmaxlen);
-		ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
-		ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
+		ut_ad(*mbminlen < DATA_MBMAX);
+		ut_ad(*mbmaxlen < DATA_MBMAX);
 	} else {
 		*mbminlen = *mbmaxlen = 0;
 	}
 }
 
 /*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+	dtype_t*	type,		/*!< in/out: type */
+	ulint		mbminlen,	/*!< in: minimum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+	ulint		mbmaxlen)	/*!< in: maximum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+{
+	ut_ad(mbminlen < DATA_MBMAX);
+	ut_ad(mbmaxlen < DATA_MBMAX);
+	ut_ad(mbminlen <= mbmaxlen);
+
+	type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+
+/*********************************************************************//**
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
@@ -112,8 +133,7 @@ dtype_set_mblen(
 	ulint	mbmaxlen;
 
 	dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
-	type->mbminlen = mbminlen;
-	type->mbmaxlen = mbmaxlen;
+	dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen);
 
 	ut_ad(dtype_validate(type));
 }
@@ -210,7 +230,7 @@ dtype_get_mbminlen(
 	const dtype_t*	type)	/*!< in: type */
 {
 	ut_ad(type);
-	return(type->mbminlen);
+	return(DATA_MBMINLEN(type->mbminmaxlen));
 }
 /*********************************************************************//**
 Gets the maximum length of a character, in bytes.
@@ -223,7 +243,7 @@ dtype_get_mbmaxlen(
 	const dtype_t*	type)	/*!< in: type */
 {
 	ut_ad(type);
-	return(type->mbmaxlen);
+	return(DATA_MBMAXLEN(type->mbminmaxlen));
 }
 
 /*********************************************************************//**
@@ -404,8 +424,8 @@ dtype_get_fixed_size_low(
 	ulint	mtype,		/*!< in: main type */
 	ulint	prtype,		/*!< in: precise type */
 	ulint	len,		/*!< in: length */
-	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
-	ulint	mbmaxlen,	/*!< in: maximum length of a multibyte char */
+	ulint	mbminmaxlen,	/*!< in: minimum and maximum length of
+				a multibyte character, in bytes */
 	ulint	comp)		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
 	switch (mtype) {
@@ -453,8 +473,9 @@ dtype_get_fixed_size_low(
 				dtype_get_charset_coll(prtype),
 				&i_mbminlen, &i_mbmaxlen);
 
-			if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
-			    || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
+			if (UNIV_UNLIKELY
+			    (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+			     != mbminmaxlen)) {
 
 				ut_print_timestamp(stderr);
 				fprintf(stderr, "  InnoDB: "
@@ -464,10 +485,10 @@ dtype_get_fixed_size_low(
 					"type->mbmaxlen=%lu\n",
 					(ulong) i_mbminlen,
 					(ulong) i_mbmaxlen,
-					(ulong) mbminlen,
-					(ulong) mbmaxlen);
+					(ulong) DATA_MBMINLEN(mbminmaxlen),
+					(ulong) DATA_MBMAXLEN(mbminmaxlen));
 			}
-			if (mbminlen == mbmaxlen) {
+			if (i_mbminlen == i_mbmaxlen) {
 				return(len);
 			}
 		}
@@ -499,8 +520,8 @@ dtype_get_min_size_low(
 	ulint	mtype,		/*!< in: main type */
 	ulint	prtype,		/*!< in: precise type */
 	ulint	len,		/*!< in: length */
-	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
-	ulint	mbmaxlen)	/*!< in: maximum length of a multibyte char */
+	ulint	mbminmaxlen)	/*!< in: minimum and maximum length of a
+				multi-byte character */
 {
 	switch (mtype) {
 	case DATA_SYS:
@@ -527,14 +548,22 @@ dtype_get_min_size_low(
 	case DATA_DOUBLE:
 		return(len);
 	case DATA_MYSQL:
-		if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
+		if (prtype & DATA_BINARY_TYPE) {
 			return(len);
+		} else {
+			ulint	mbminlen = DATA_MBMINLEN(mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
+
+			if (mbminlen == mbmaxlen) {
+				return(len);
+			}
+
+			/* this is a variable-length character set */
+			ut_a(mbminlen > 0);
+			ut_a(mbmaxlen > mbminlen);
+			ut_a(len % mbmaxlen == 0);
+			return(len * mbminlen / mbmaxlen);
 		}
-		/* this is a variable-length character set */
-		ut_a(mbminlen > 0);
-		ut_a(mbmaxlen > mbminlen);
-		ut_a(len % mbmaxlen == 0);
-		return(len * mbminlen / mbmaxlen);
 	case DATA_VARCHAR:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
@@ -595,9 +624,9 @@ dtype_get_sql_null_size(
 {
 #ifndef UNIV_HOTBACKUP
 	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
-					type->mbminlen, type->mbmaxlen, comp));
+					type->mbminmaxlen, comp));
 #else /* !UNIV_HOTBACKUP */
 	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
-					0, 0, 0));
+					0, 0));
 #endif /* !UNIV_HOTBACKUP */
 }

=== modified file 'storage/innobase/include/dict0dict.h'
--- a/storage/innobase/include/dict0dict.h	revid:vasil.dimov@stripped532fdoix
+++ b/storage/innobase/include/dict0dict.h	revid:marko.makela@stripped
@@ -102,6 +102,33 @@ void
 dict_load_space_id_list(void);
 /*=========================*/
 /*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+	const dict_col_t*	col);	/*!< in: column */
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+	const dict_col_t*	col);	/*!< in: column */
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+	dict_col_t*	col,		/*!< in/out: column */
+	ulint		mbminlen,	/*!< in: minimum multi-byte
+					character size, in bytes */
+	ulint		mbmaxlen);	/*!< in: minimum multi-byte
+					character size, in bytes */
+/*********************************************************************//**
 Gets the column data type. */
 UNIV_INLINE
 void

=== modified file 'storage/innobase/include/dict0dict.ic'
--- a/storage/innobase/include/dict0dict.ic	revid:vasil.dimov@stripped32fdoix
+++ b/storage/innobase/include/dict0dict.ic	revid:marko.makela@stripped
@@ -29,6 +29,46 @@ Created 1/8/1996 Heikki Tuuri
 #include "rem0types.h"
 
 /*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(DATA_MBMINLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(DATA_MBMAXLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+	dict_col_t*	col,		/*!< in/out: column */
+	ulint		mbminlen,	/*!< in: minimum multi-byte
+					character size, in bytes */
+	ulint		mbmaxlen)	/*!< in: minimum multi-byte
+					character size, in bytes */
+{
+	ut_ad(mbminlen < DATA_MBMAX);
+	ut_ad(mbmaxlen < DATA_MBMAX);
+	ut_ad(mbminlen <= mbmaxlen);
+
+	col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+/*********************************************************************//**
 Gets the column data type. */
 UNIV_INLINE
 void
@@ -42,8 +82,7 @@ dict_col_copy_type(
 	type->mtype = col->mtype;
 	type->prtype = col->prtype;
 	type->len = col->len;
-	type->mbminlen = col->mbminlen;
-	type->mbmaxlen = col->mbmaxlen;
+	type->mbminmaxlen = col->mbminmaxlen;
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -65,8 +104,7 @@ dict_col_type_assert_equal(
 	ut_ad(col->prtype == type->prtype);
 	ut_ad(col->len == type->len);
 # ifndef UNIV_HOTBACKUP
-	ut_ad(col->mbminlen == type->mbminlen);
-	ut_ad(col->mbmaxlen == type->mbmaxlen);
+	ut_ad(col->mbminmaxlen == type->mbminmaxlen);
 # endif /* !UNIV_HOTBACKUP */
 
 	return(TRUE);
@@ -84,7 +122,7 @@ dict_col_get_min_size(
 	const dict_col_t*	col)	/*!< in: column */
 {
 	return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
-				      col->mbminlen, col->mbmaxlen));
+				      col->mbminmaxlen));
 }
 /***********************************************************************//**
 Returns the maximum size of the column.
@@ -109,7 +147,7 @@ dict_col_get_fixed_size(
 	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
 	return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
-					col->mbminlen, col->mbmaxlen, comp));
+					col->mbminmaxlen, comp));
 }
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.

=== modified file 'storage/innobase/include/dict0mem.h'
--- a/storage/innobase/include/dict0mem.h	revid:vasil.dimov@oracle.com-20100628125224-v6m9zst4532fdoix
+++ b/storage/innobase/include/dict0mem.h	revid:marko.makela@strippedm-20100629113248-fvl48lnzr44z94gg
@@ -151,9 +151,9 @@ dict_mem_table_add_col(
 	ulint		prtype,	/*!< in: precise type */
 	ulint		len);	/*!< in: precision */
 /**********************************************************************//**
-This function poplulates a dict_col_t memory structure with
+This function populates a dict_col_t memory structure with
 supplied information. */
-UNIV_INLINE
+UNIV_INTERN
 void
 dict_mem_fill_column_struct(
 /*========================*/
@@ -162,7 +162,7 @@ dict_mem_fill_column_struct(
 	ulint		col_pos,	/*!< in: column position */
 	ulint		mtype,		/*!< in: main data type */
 	ulint		prtype,		/*!< in: precise type */
-	ulint		col_len);	/*!< in: column lenght */
+	ulint		col_len);	/*!< in: column length */
 /**********************************************************************//**
 This function poplulates a dict_index_t index memory structure with
 supplied information. */
@@ -249,10 +249,11 @@ struct dict_col_struct{
 					the string, MySQL uses 1 or 2
 					bytes to store the string length) */
 
-	unsigned	mbminlen:2;	/*!< minimum length of a
-					character, in bytes */
-	unsigned	mbmaxlen:3;	/*!< maximum length of a
-					character, in bytes */
+	unsigned	mbminmaxlen:5;	/*!< minimum and maximum length of a
+					character, in bytes;
+					DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+					mbminlen=DATA_MBMINLEN(mbminmaxlen);
+					mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
 	/*----------------------*/
 	/* End of definitions copied from dtype_t */
 	/* @} */

=== modified file 'storage/innobase/include/dict0mem.ic'
--- a/storage/innobase/include/dict0mem.ic	revid:vasil.dimov@strippedm9zst4532fdoix
+++ b/storage/innobase/include/dict0mem.ic	revid:marko.makela@strippedz94gg
@@ -70,35 +70,3 @@ dict_mem_fill_index_struct(
         index->magic_n = DICT_INDEX_MAGIC_N;
 #endif /* UNIV_DEBUG */
 }
-
-/**********************************************************************//**
-This function poplulates a dict_col_t memory structure with
-supplied information. */
-UNIV_INLINE
-void
-dict_mem_fill_column_struct(
-/*========================*/
-	dict_col_t*	column,		/*!< out: column struct to be
-					filled */
-	ulint		col_pos,	/*!< in: column position */
-	ulint		mtype,		/*!< in: main data type */
-	ulint		prtype,		/*!< in: precise type */
-	ulint		col_len)	/*!< in: column lenght */
-{
-#ifndef UNIV_HOTBACKUP
-	ulint	mbminlen;
-	ulint	mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
-
-	column->ind = (unsigned int) col_pos;
-	column->ord_part = 0;
-	column->mtype = (unsigned int) mtype;
-	column->prtype = (unsigned int) prtype;
-	column->len = (unsigned int) col_len;
-#ifndef UNIV_HOTBACKUP
-        dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
-
-        column->mbminlen = (unsigned int) mbminlen;
-        column->mbmaxlen = (unsigned int) mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
-}

=== modified file 'storage/innobase/include/row0mysql.h'
--- a/storage/innobase/include/row0mysql.h	revid:vasil.dimov@strippedom-20100628125224-v6m9zst4532fdoix
+++ b/storage/innobase/include/row0mysql.h	revid:marko.makela@stripped29113248-fvl48lnzr44z94gg
@@ -103,6 +103,17 @@ row_mysql_read_blob_ref(
 	ulint		col_len);	/*!< in: BLOB reference length
 					(not BLOB length) */
 /**************************************************************//**
+Pad a column with spaces. */
+UNIV_INTERN
+void
+row_mysql_pad_col(
+/*==============*/
+	ulint	mbminlen,	/*!< in: minimum size of a character,
+				in bytes */
+	byte*	pad,		/*!< out: padded buffer */
+	ulint	len);		/*!< in: number of bytes to pad */
+
+/**************************************************************//**
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
 row0sel.c.

=== modified file 'storage/innobase/row/row0ins.c'
--- a/storage/innobase/row/row0ins.c	revid:vasil.dimov@strippedoix
+++ b/storage/innobase/row/row0ins.c	revid:marko.makela@stripped
@@ -515,8 +515,7 @@ row_ins_cascade_calc_update_vec(
 
 				if (!dfield_is_null(&ufield->new_val)
 				    && dtype_get_at_most_n_mbchars(
-					col->prtype,
-					col->mbminlen, col->mbmaxlen,
+					col->prtype, col->mbminmaxlen,
 					col->len,
 					ufield_len,
 					dfield_get_data(&ufield->new_val))
@@ -539,49 +538,37 @@ row_ins_cascade_calc_update_vec(
 
 				if (min_size > ufield_len) {
 
-					char*		pad_start;
-					const char*	pad_end;
-					char*		padded_data
-						= mem_heap_alloc(
-							heap, min_size);
-					pad_start = padded_data + ufield_len;
-					pad_end = padded_data + min_size;
+					byte*	pad;
+					ulint	pad_len;
+					byte*	padded_data;
+					ulint	mbminlen;
+
+					padded_data = mem_heap_alloc(
+						heap, min_size);
+
+					pad = padded_data + ufield_len;
+					pad_len = min_size - ufield_len;
 
 					memcpy(padded_data,
 					       dfield_get_data(&ufield
 							       ->new_val),
-					       dfield_get_len(&ufield
-							      ->new_val));
+					       ufield_len);
 
-					switch (UNIV_EXPECT(col->mbminlen,1)) {
-					default:
-						ut_error;
-						return(ULINT_UNDEFINED);
-					case 1:
-						if (UNIV_UNLIKELY
-						    (dtype_get_charset_coll(
-							    col->prtype)
-						     == DATA_MYSQL_BINARY_CHARSET_COLL)) {
-							/* Do not pad BINARY
-							columns. */
-							return(ULINT_UNDEFINED);
-						}
+					mbminlen = dict_col_get_mbminlen(col);
+
+					ut_ad(!(ufield_len % mbminlen));
+					ut_ad(!(min_size % mbminlen));
 
-						/* space=0x20 */
-						memset(pad_start, 0x20,
-						       pad_end - pad_start);
-						break;
-					case 2:
-						/* space=0x0020 */
-						ut_a(!(ufield_len % 2));
-						ut_a(!(min_size % 2));
-						do {
-							*pad_start++ = 0x00;
-							*pad_start++ = 0x20;
-						} while (pad_start < pad_end);
-						break;
+					if (mbminlen == 1
+					    && dtype_get_charset_coll(
+						    col->prtype)
+					    == DATA_MYSQL_BINARY_CHARSET_COLL) {
+						/* Do not pad BINARY columns */
+						return(ULINT_UNDEFINED);
 					}
 
+					row_mysql_pad_col(mbminlen,
+							  pad, pad_len);
 					dfield_set_data(&ufield->new_val,
 							padded_data, min_size);
 				}
@@ -2232,7 +2219,7 @@ row_ins_index_entry_set_vals(
 				= dict_field_get_col(ind_field);
 
 			len = dtype_get_at_most_n_mbchars(
-				col->prtype, col->mbminlen, col->mbmaxlen,
+				col->prtype, col->mbminmaxlen,
 				ind_field->prefix_len,
 				len, dfield_get_data(row_field));
 

=== modified file 'storage/innobase/row/row0merge.c'
--- a/storage/innobase/row/row0merge.c	revid:vasil.dimov@oracle.com-20100628125224-v6m9zst4532fdoix
+++ b/storage/innobase/row/row0merge.c	revid:marko.makela@stripped13248-fvl48lnzr44z94gg
@@ -338,7 +338,7 @@ row_merge_buf_add(
 		if (ifield->prefix_len) {
 			len = dtype_get_at_most_n_mbchars(
 				col->prtype,
-				col->mbminlen, col->mbmaxlen,
+				col->mbminmaxlen,
 				ifield->prefix_len,
 				len, dfield_get_data(field));
 			dfield_set_len(field, len);

=== modified file 'storage/innobase/row/row0mysql.c'
--- a/storage/innobase/row/row0mysql.c	revid:vasil.dimov@stripped
+++ b/storage/innobase/row/row0mysql.c	revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg
@@ -266,6 +266,49 @@ row_mysql_read_blob_ref(
 }
 
 /**************************************************************//**
+Pad a column with spaces. */
+UNIV_INTERN
+void
+row_mysql_pad_col(
+/*==============*/
+	ulint	mbminlen,	/*!< in: minimum size of a character,
+				in bytes */
+	byte*	pad,		/*!< out: padded buffer */
+	ulint	len)		/*!< in: number of bytes to pad */
+{
+	const byte*	pad_end;
+
+	switch (UNIV_EXPECT(mbminlen, 1)) {
+	default:
+		ut_error;
+	case 1:
+		/* space=0x20 */
+		memset(pad, 0x20, len);
+		break;
+	case 2:
+		/* space=0x0020 */
+		pad_end = pad + len;
+		ut_a(!(len % 2));
+		do {
+			*pad++ = 0x00;
+			*pad++ = 0x20;
+		} while (pad < pad_end);
+		break;
+	case 4:
+		/* space=0x00000020 */
+		pad_end = pad + len;
+		ut_a(!(len % 4));
+		do {
+			*pad++ = 0x00;
+			*pad++ = 0x00;
+			*pad++ = 0x00;
+			*pad++ = 0x20;
+		} while (pad < pad_end);
+		break;
+	}
+}
+
+/**************************************************************//**
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
 row0sel.c.
@@ -357,12 +400,28 @@ row_mysql_store_col_in_innobase_format(
 			/* Remove trailing spaces from old style VARCHAR
 			columns. */
 
-			/* Handle UCS2 strings differently. */
+			/* Handle Unicode strings differently. */
 			ulint	mbminlen	= dtype_get_mbminlen(dtype);
 
 			ptr = mysql_data;
 
-			if (mbminlen == 2) {
+			switch (mbminlen) {
+			default:
+				ut_error;
+			case 4:
+				/* space=0x00000020 */
+				/* Trim "half-chars", just in case. */
+				col_len &= ~3;
+
+				while (col_len >= 4
+				       && ptr[col_len - 4] == 0x00
+				       && ptr[col_len - 3] == 0x00
+				       && ptr[col_len - 2] == 0x00
+				       && ptr[col_len - 1] == 0x20) {
+					col_len -= 4;
+				}
+				break;
+			case 2:
 				/* space=0x0020 */
 				/* Trim "half-chars", just in case. */
 				col_len &= ~1;
@@ -371,8 +430,8 @@ row_mysql_store_col_in_innobase_format(
 				       && ptr[col_len - 1] == 0x20) {
 					col_len -= 2;
 				}
-			} else {
-				ut_a(mbminlen == 1);
+				break;
+			case 1:
 				/* space=0x20 */
 				while (col_len > 0
 				       && ptr[col_len - 1] == 0x20) {

=== modified file 'storage/innobase/row/row0row.c'
--- a/storage/innobase/row/row0row.c	revid:vasil.dimov@stripped5224-v6m9zst4532fdoix
+++ b/storage/innobase/row/row0row.c	revid:marko.makela@stripped4z94gg
@@ -156,7 +156,7 @@ row_build_index_entry(
 		}
 
 		len = dtype_get_at_most_n_mbchars(
-			col->prtype, col->mbminlen, col->mbmaxlen,
+			col->prtype, col->mbminmaxlen,
 			ind_field->prefix_len, len, dfield_get_data(dfield));
 		dfield_set_len(dfield, len);
 	}
@@ -514,8 +514,7 @@ row_build_row_ref(
 				dfield_set_len(dfield,
 					       dtype_get_at_most_n_mbchars(
 						       dtype->prtype,
-						       dtype->mbminlen,
-						       dtype->mbmaxlen,
+						       dtype->mbminmaxlen,
 						       clust_col_prefix_len,
 						       len, (char*) field));
 			}
@@ -629,8 +628,7 @@ notfound:
 				dfield_set_len(dfield,
 					       dtype_get_at_most_n_mbchars(
 						       dtype->prtype,
-						       dtype->mbminlen,
-						       dtype->mbmaxlen,
+						       dtype->mbminmaxlen,
 						       clust_col_prefix_len,
 						       len, (char*) field));
 			}

=== modified file 'storage/innobase/row/row0sel.c'
--- a/storage/innobase/row/row0sel.c	revid:vasil.dimov@stripped4532fdoix
+++ b/storage/innobase/row/row0sel.c	revid:marko.makela@stripped
@@ -88,10 +88,8 @@ row_sel_sec_rec_is_for_blob(
 /*========================*/
 	ulint		mtype,		/*!< in: main type */
 	ulint		prtype,		/*!< in: precise type */
-	ulint		mbminlen,	/*!< in: minimum length of a
-					multi-byte character */
-	ulint		mbmaxlen,	/*!< in: maximum length of a
-					multi-byte character */
+	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
+					a multi-byte character */
 	const byte*	clust_field,	/*!< in: the locally stored part of
 					the clustered index column, including
 					the BLOB pointer; the clustered
@@ -119,7 +117,7 @@ row_sel_sec_rec_is_for_blob(
 		return(FALSE);
 	}
 
-	len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen,
+	len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen,
 					  sec_len, len, (const char*) buf);
 
 	return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
@@ -202,14 +200,14 @@ row_sel_sec_rec_is_for_clust_rec(
 			}
 
 			len = dtype_get_at_most_n_mbchars(
-				col->prtype, col->mbminlen, col->mbmaxlen,
+				col->prtype, col->mbminmaxlen,
 				ifield->prefix_len, len, (char*) clust_field);
 
 			if (rec_offs_nth_extern(clust_offs, clust_pos)
 			    && len < sec_len) {
 				if (!row_sel_sec_rec_is_for_blob(
 					    col->mtype, col->prtype,
-					    col->mbminlen, col->mbmaxlen,
+					    col->mbminmaxlen,
 					    clust_field, clust_len,
 					    sec_field, sec_len,
 					    dict_table_zip_size(
@@ -2508,13 +2506,13 @@ row_sel_field_store_in_mysql_format(
 	ulint		len)	/*!< in: length of the data */
 {
 	byte*	ptr;
-	byte*	field_end;
-	byte*	pad_ptr;
 
 	ut_ad(len != UNIV_SQL_NULL);
 	UNIV_MEM_ASSERT_RW(data, len);
 
 	switch (templ->type) {
+		const byte*	field_end;
+		byte*		pad;
 	case DATA_INT:
 		/* Convert integer data from Innobase to a little-endian
 		format, sign bit restored to normal */
@@ -2558,38 +2556,32 @@ row_sel_field_store_in_mysql_format(
 		unused end of a >= 5.0.3 true VARCHAR column, just in case
 		MySQL expects its contents to be deterministic. */
 
-		pad_ptr = dest + len;
+		pad = dest + len;
 
 		ut_ad(templ->mbminlen <= templ->mbmaxlen);
 
-		/* We handle UCS2 charset strings differently. */
-		if (templ->mbminlen == 2) {
-			/* A space char is two bytes, 0x0020 in UCS2 */
+		/* We treat some Unicode charset strings specially. */
+		switch (templ->mbminlen) {
+		case 4:
+			/* InnoDB should never have stripped partial
+			UTF-32 characters. */
+			ut_a(!(len & 3));
+			break;
+		case 2:
+			/* A space char is two bytes,
+			0x0020 in UCS2 and UTF-16 */
 
-			if (len & 1) {
+			if (UNIV_UNLIKELY(len & 1)) {
 				/* A 0x20 has been stripped from the column.
 				Pad it back. */
 
-				if (pad_ptr < field_end) {
-					*pad_ptr = 0x20;
-					pad_ptr++;
+				if (pad < field_end) {
+					*pad++ = 0x20;
 				}
 			}
-
-			/* Pad the rest of the string with 0x0020 */
-
-			while (pad_ptr < field_end) {
-				*pad_ptr = 0x00;
-				pad_ptr++;
-				*pad_ptr = 0x20;
-				pad_ptr++;
-			}
-		} else {
-			ut_ad(templ->mbminlen == 1);
-			/* space=0x20 */
-
-			memset(pad_ptr, 0x20, field_end - pad_ptr);
 		}
+
+		row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
 		break;
 
 	case DATA_BLOB:
@@ -2614,9 +2606,9 @@ row_sel_field_store_in_mysql_format(
 		      || !(templ->mysql_col_len % templ->mbmaxlen));
 		ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
 
-		if (templ->mbminlen != templ->mbmaxlen) {
+		if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
 			/* Pad with spaces. This undoes the stripping
-			done in row0mysql.ic, function
+			done in row0mysql.c, function
 			row_mysql_store_col_in_innobase_format(). */
 
 			memset(dest + len, 0x20, templ->mysql_col_len - len);

=== modified file 'storage/innobase/row/row0upd.c'
--- a/storage/innobase/row/row0upd.c	revid:vasil.dimov@stripped
+++ b/storage/innobase/row/row0upd.c	revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg
@@ -949,7 +949,7 @@ row_upd_index_replace_new_col_val(
 		}
 
 		len = dtype_get_at_most_n_mbchars(col->prtype,
-						  col->mbminlen, col->mbmaxlen,
+						  col->mbminmaxlen,
 						  field->prefix_len, len,
 						  (const char*) data);
 

=== modified file 'storage/innobase/trx/trx0trx.c'
--- a/storage/innobase/trx/trx0trx.c	revid:vasil.dimov@oracle.com-20100628125224-v6m9zst4532fdoix
+++ b/storage/innobase/trx/trx0trx.c	revid:marko.makela@stripped248-fvl48lnzr44z94gg
@@ -2024,7 +2024,7 @@ trx_get_trx_by_xid(
 	while (trx) {
 		/* Compare two X/Open XA transaction id's: their
 		length should be the same and binary comparison
-		of gtrid_lenght+bqual_length bytes should be
+		of gtrid_length+bqual_length bytes should be
 		the same */
 
 		if (xid->gtrid_length == trx->xid.gtrid_length

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg.bundle
Thread
bzr commit into mysql-trunk-innodb branch (marko.makela:3126) Bug#52199marko.makela29 Jun