List:Commits« Previous MessageNext Message »
From:lars-erik.bjork Date:December 2 2008 1:15pm
Subject:bzr commit into mysql-6.0-falcon-team branch (lars-erik.bjork:2922) Bug#34479
View as plain text  
#At file:///home/lb200670/devel/mysql/bouncer/ based on revid:john.embretsen@strippedw7

 2922 lars-erik.bjork@stripped	2008-12-02
      This is a patch for bug#34479
      Falcon: search failure with indexed ucs2 varchar
      
      When computing the key length, trailing spaces are removed. This was
      done by looking at a single byte at a time. When using f. ex the ucs2
      character set (where every character is represented using two bytes), 
      this would result in a character ending in 0x20, (f .ex Ġ (0x0120) )
      having its final byte 'trimmed'. I have now implemented a charset-wise
      version, taking into account the varying lengths of multi-byte
      sequences of different character sets.
      
      added file 'mysql-test/suite/falcon/t/falcon_bug_34479.test'
      ------------------------------------------------------------
      This is a test file testing the patch. It is based on the
      bug report
      
      added file 'mysql-test/suite/falcon/r/falcon_bug_34479.result'
      --------------------------------------------------------------
      This is the result file for the test. It states the expected
      output
      
      
      modified file 'storage/falcon/MySQLCollation.cpp'
      -------------------------------------------------
      Modified the function computeKeyLength( ... ) 
      
      Earlier, when removing trailing pad characters (' '), the function
      only looked at a single byte at a time. I have implemented a
      charset-wise implementation that translates the pad character into the
      relevant character set, and that compares relative to the (possible)
      multi-byte sequences of the different character sets.
      
      
      modified file 'storage/falcon/MySQLCollation.h'
      -----------------------------------------------
      Changed the function computeKeyLength( ...) not to be inline, because the
      implementation grew big enough to clutter the header file.
      
      
      modified file 'storage/falcon/ha_falcon.cpp'
      --------------------------------------------
      Added some functions giving access to the character set functions
      
      * int falcon_conv_uni_cs ( ... )
        - Converts a character to the given character set
      * unsigned int falcon_get_mbminlen ( ... )
        - Returns the minimum multi-byte sequence for the given charset
      
      * uint falcon_get_mbcharlen( ... )
        - Returns the length of the current multi-byte sequence if the
          pointer given points to a valid header, 0 otherwise
added:
  mysql-test/suite/falcon/r/falcon_bug_34479.result
  mysql-test/suite/falcon/t/falcon_bug_34479.test
modified:
  storage/falcon/MySQLCollation.cpp
  storage/falcon/MySQLCollation.h
  storage/falcon/ha_falcon.cpp

=== added file 'mysql-test/suite/falcon/r/falcon_bug_34479.result'
--- a/mysql-test/suite/falcon/r/falcon_bug_34479.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/falcon/r/falcon_bug_34479.result	2008-12-02 13:14:45 +0000
@@ -0,0 +1,25 @@
+*** Bug #34479 ***
+SET @@storage_engine = 'Falcon';
+DROP TABLE IF EXISTS t1;
+SET NAMES utf8;
+CREATE TABLE t1 (s1 varchar(5) character set ucs2);
+INSERT INTO t1 VALUES ('Ā'),('ā'),('Ă'),('ă'),('Ą');
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+s1
+Ā
+ā
+Ă
+ă
+Ą
+CREATE INDEX i ON t1 (s1);
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+s1
+Ā
+ā
+Ă
+ă
+
=== added file 'mysql-test/suite/falcon/t/falcon_bug_34479.test'
--- a/mysql-test/suite/falcon/t/falcon_bug_34479.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/falcon/t/falcon_bug_34479.test	2008-12-02 13:14:45 +0000
@@ -0,0 +1,42 @@
+--source include/have_falcon.inc
+
+#
+# Bug #34479: Falcon: search failure with indexed ucs2 varchar
+#
+--echo *** Bug #34479 ***
+
+# ----------------------------------------------------- #
+# --- Initialisation                                --- #
+# ----------------------------------------------------- #
+let $engine = 'Falcon';
+eval SET @@storage_engine = $engine;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# ----------------------------------------------------- #
+# --- Test                                          --- #
+# ----------------------------------------------------- #
+
+SET NAMES utf8;
+CREATE TABLE t1 (s1 varchar(5) character set ucs2);
+
+# The following values are: 0x0100, 0x0101, 0x0102, 0x0103, 0x0104
+INSERT INTO t1 VALUES ('Ā'),('ā'),('Ă'),('ă'),('Ą');
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+
+# This would not find all records
+CREATE INDEX i ON t1 (s1);
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+
+# ----------------------------------------------------- #
+# --- Check                                         --- #
+# ----------------------------------------------------- #
+
+SELECT count(*) FROM t1;
+
+# ----------------------------------------------------- #
+# --- Final cleanup                                 --- #
+# ----------------------------------------------------- #
+DROP TABLE t1;

=== modified file 'storage/falcon/MySQLCollation.cpp'
--- a/storage/falcon/MySQLCollation.cpp	2007-11-27 20:07:30 +0000
+++ b/storage/falcon/MySQLCollation.cpp	2008-12-02 13:14:45 +0000
@@ -13,6 +13,7 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+#include <memory.h>
 #include "Engine.h"
 #include "MySQLCollation.h"
 #include "IndexKey.h"
@@ -43,8 +44,8 @@ int MySQLCollation::compare (Value *valu
 
 	if (!isBinary)
 		{
-		len1 = computeKeyLength(len1, string1, padChar, minSortChar);
-		len2 = computeKeyLength(len2, string2, padChar, minSortChar);
+		len1 = computeKeyLength(charset, len1, string1, padChar, minSortChar);
+		len2 = computeKeyLength(charset, len2, string2, padChar, minSortChar);
 		}
 
 	return falcon_strnncoll(charset, string1, len1, string2, len2, false);
@@ -67,7 +68,7 @@ int MySQLCollation::makeKey (Value *valu
 		srcLen = value->getString (sizeof(temp), temp);
 
 	if (!isBinary)
-		srcLen = computeKeyLength(srcLen, temp, padChar, minSortChar);
+		srcLen = computeKeyLength(charset, srcLen, temp, padChar, minSortChar);
 
 	// Since some collations make dstLen > srcLen, be sure dstLen is < partialKey.
 
@@ -114,3 +115,58 @@ int MySQLCollation::truncate(Value *valu
 
 	return len;
 }
+
+uint MySQLCollation::computeKeyLength (void *charset, uint length, const char *key,
+									   char padChar, char minSortChar)
+
+{
+	// We need to convert the padChar into the current character set
+	
+	unsigned char pad[20];
+	int padLen = falcon_conv_uni_cs (charset, padChar, pad, sizeof(pad));
+	
+	if (falcon_get_mbminlen(charset) == falcon_get_mbmaxlen(charset))
+		{
+		
+		// All characters are represented using the same number of bytes
+		// we don't have to worry about multi-byte sequences of different 
+		// lengths 
+		
+		int charLen = falcon_get_mbminlen(charset);
+		
+		for (const char *p = key + length; p > key; p = p - charLen)
+			{
+			if ((p[-charLen] != 0) && (memcmp(pad, p-charLen, charLen) != 0) 
+				&& (p[-charLen] != minSortChar))
+				{
+				return (uint) (p - key);
+				}
+			}		
+		}
+	else 
+		{
+		
+		// The number of bytes in each multi-byte sequence may vary
+		
+		int charLen = 0;
+		
+		for (const char *p = key + length; p > key; --p)
+			{
+			charLen = falcon_get_mbcharlen(charset, p-1);
+			
+			if (charLen == 0)
+				{
+				// Not at the beginning of a sequence
+				continue;
+				}
+			else if ((p[-1] != 0) && (padLen != charLen || memcmp(pad, p-1, charLen) != 0) 
+					 && (p[-1] != minSortChar))
+				{
+				return (uint) (p - key + (charLen-1));
+				}
+
+			}
+		}
+	
+	return 0;		
+}

=== modified file 'storage/falcon/MySQLCollation.h'
--- a/storage/falcon/MySQLCollation.h	2007-11-27 20:07:30 +0000
+++ b/storage/falcon/MySQLCollation.h	2008-12-02 13:14:45 +0000
@@ -26,9 +26,12 @@ extern int falcon_strnxfrm (void *cs, 
 							const char *src, uint srclen);
 
 extern char falcon_get_pad_char (void *cs);
+extern int falcon_conv_uni_cs (void *cs, char character, unsigned char* dst, int dstLen);
 extern int falcon_cs_is_binary (void *cs);
+extern unsigned int falcon_get_mbminlen (void *cs);
 extern unsigned int falcon_get_mbmaxlen (void *cs);
 extern char falcon_get_min_sort_char (void *cs);
+extern uint falcon_get_mbcharlen(void *cs, const char *s);
 extern uint falcon_strnchrlen(void *cs, const char *s, uint l);
 extern uint falcon_strnxfrmlen(void *cs, const char *s, uint srclen,
 							   int partialKey, int bufSize);
@@ -63,15 +66,9 @@ public:
 	uint	mbMaxLen;
 	char	minSortChar;
 	
-	static inline uint computeKeyLength (uint length, const char *key,
-										char padChar, char minSortChar)
-		{
-		for (const char *p = key + length; p > key; --p)
-			if ((p[-1] != 0) && (p[-1] != padChar) && (p[-1] != minSortChar))
-				return (uint) (p - key);
+	static uint computeKeyLength (void *charset, uint length, const char *key,
+								  char padChar, char minSortChar);
 
-		return 0;
-		}
 };
 
 #endif

=== modified file 'storage/falcon/ha_falcon.cpp'
--- a/storage/falcon/ha_falcon.cpp	2008-11-25 13:38:06 +0000
+++ b/storage/falcon/ha_falcon.cpp	2008-12-02 13:14:45 +0000
@@ -292,12 +292,37 @@ char falcon_get_pad_char (void *cs)
 	return (char) ((CHARSET_INFO*) cs)->pad_char;
 }
 
+int falcon_conv_uni_cs (void *cs, const char character, unsigned char* dst, int dstLen)
+{
+	
+	// Convert from unicode codepoint
+
+	CHARSET_INFO* charset = (CHARSET_INFO*) cs;	
+	int len = (charset->cset->wc_mb(charset, (my_wc_t)character, dst, dst + dstLen));
+
+	if (len <= 0)
+		{
+		// Should not happen
+		dst[0] = character;
+		return 1;
+		}
+	else
+		{
+		return len;
+		}
+}
+
 int falcon_cs_is_binary (void *cs)
 {
 	return (0 == strcmp(((CHARSET_INFO*) cs)->name, "binary"));
 //	return ((((CHARSET_INFO*) cs)->state & MY_CS_BINSORT) == MY_CS_BINSORT);
 }
 
+unsigned int falcon_get_mbminlen (void *cs)
+{
+	return ((CHARSET_INFO*) cs)->mbminlen;
+}
+
 unsigned int falcon_get_mbmaxlen (void *cs)
 {
 	return ((CHARSET_INFO*) cs)->mbmaxlen;
@@ -308,6 +333,13 @@ char falcon_get_min_sort_char (void *cs)
 	return (char) ((CHARSET_INFO*) cs)->min_sort_char;
 }
 
+uint falcon_get_mbcharlen(void *cs, const char *s)
+{
+	CHARSET_INFO *charset = (CHARSET_INFO*) cs;
+	uchar *ch = (uchar *) s;
+	return charset->cset->mbcharlen(charset, *ch);
+}
+
 // Return the actual number of characters in the string
 // Note, this is not the number of characters with collatable weight.
 

Thread
bzr commit into mysql-6.0-falcon-team branch (lars-erik.bjork:2922) Bug#34479lars-erik.bjork2 Dec