#At file:///home/lb200670/devel/mysql/bouncer/ based on revid:john.embretsen@strippedw7
2922 lars-erik.bjork@stripped 2008-12-02
This is a patch for bug#34479
Falcon: search failure with indexed ucs2 varchar
When computing the key length, trailing spaces are removed. This was
done by looking at a single byte at a time. When using f. ex the ucs2
character set (where every character is represented using two bytes),
this would result in a character ending in 0x20, (f .ex Ġ (0x0120) )
having its final byte 'trimmed'. I have now implemented a charset-wise
version, taking into account the varying lengths of multi-byte
sequences of different character sets.
added file 'mysql-test/suite/falcon/t/falcon_bug_34479.test'
------------------------------------------------------------
This is a test file testing the patch. It is based on the
bug report
added file 'mysql-test/suite/falcon/r/falcon_bug_34479.result'
--------------------------------------------------------------
This is the result file for the test. It states the expected
output
modified file 'storage/falcon/MySQLCollation.cpp'
-------------------------------------------------
Modified the function computeKeyLength( ... )
Earlier, when removing trailing pad characters (' '), the function
only looked at a single byte at a time. I have implemented a
charset-wise implementation that translates the pad character into the
relevant character set, and that compares relative to the (possible)
multi-byte sequences of the different character sets.
modified file 'storage/falcon/MySQLCollation.h'
-----------------------------------------------
Changed the function computeKeyLength( ...) not to be inline, because the
implementation grew big enough to clutter the header file.
modified file 'storage/falcon/ha_falcon.cpp'
--------------------------------------------
Added some functions giving access to the character set functions
* int falcon_conv_uni_cs ( ... )
- Converts a character to the given character set
* unsigned int falcon_get_mbminlen ( ... )
- Returns the minimum multi-byte sequence for the given charset
* uint falcon_get_mbcharlen( ... )
- Returns the length of the current multi-byte sequence if the
pointer given points to a valid header, 0 otherwise
added:
mysql-test/suite/falcon/r/falcon_bug_34479.result
mysql-test/suite/falcon/t/falcon_bug_34479.test
modified:
storage/falcon/MySQLCollation.cpp
storage/falcon/MySQLCollation.h
storage/falcon/ha_falcon.cpp
=== added file 'mysql-test/suite/falcon/r/falcon_bug_34479.result'
--- a/mysql-test/suite/falcon/r/falcon_bug_34479.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/falcon/r/falcon_bug_34479.result 2008-12-02 13:14:45 +0000
@@ -0,0 +1,25 @@
+*** Bug #34479 ***
+SET @@storage_engine = 'Falcon';
+DROP TABLE IF EXISTS t1;
+SET NAMES utf8;
+CREATE TABLE t1 (s1 varchar(5) character set ucs2);
+INSERT INTO t1 VALUES ('Ā'),('ā'),('Ă'),('ă'),('Ą');
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+s1
+Ā
+ā
+Ă
+ă
+Ą
+CREATE INDEX i ON t1 (s1);
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+s1
+Ā
+ā
+Ă
+ă
+
=== added file 'mysql-test/suite/falcon/t/falcon_bug_34479.test'
--- a/mysql-test/suite/falcon/t/falcon_bug_34479.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/falcon/t/falcon_bug_34479.test 2008-12-02 13:14:45 +0000
@@ -0,0 +1,42 @@
+--source include/have_falcon.inc
+
+#
+# Bug #34479: Falcon: search failure with indexed ucs2 varchar
+#
+--echo *** Bug #34479 ***
+
+# ----------------------------------------------------- #
+# --- Initialisation --- #
+# ----------------------------------------------------- #
+let $engine = 'Falcon';
+eval SET @@storage_engine = $engine;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# ----------------------------------------------------- #
+# --- Test --- #
+# ----------------------------------------------------- #
+
+SET NAMES utf8;
+CREATE TABLE t1 (s1 varchar(5) character set ucs2);
+
+# The following values are: 0x0100, 0x0101, 0x0102, 0x0103, 0x0104
+INSERT INTO t1 VALUES ('Ā'),('ā'),('Ă'),('ă'),('Ą');
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+
+# This would not find all records
+CREATE INDEX i ON t1 (s1);
+SELECT * FROM t1 WHERE s1 < 'Ġ';
+
+# ----------------------------------------------------- #
+# --- Check --- #
+# ----------------------------------------------------- #
+
+SELECT count(*) FROM t1;
+
+# ----------------------------------------------------- #
+# --- Final cleanup --- #
+# ----------------------------------------------------- #
+DROP TABLE t1;
=== modified file 'storage/falcon/MySQLCollation.cpp'
--- a/storage/falcon/MySQLCollation.cpp 2007-11-27 20:07:30 +0000
+++ b/storage/falcon/MySQLCollation.cpp 2008-12-02 13:14:45 +0000
@@ -13,6 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#include <memory.h>
#include "Engine.h"
#include "MySQLCollation.h"
#include "IndexKey.h"
@@ -43,8 +44,8 @@ int MySQLCollation::compare (Value *valu
if (!isBinary)
{
- len1 = computeKeyLength(len1, string1, padChar, minSortChar);
- len2 = computeKeyLength(len2, string2, padChar, minSortChar);
+ len1 = computeKeyLength(charset, len1, string1, padChar, minSortChar);
+ len2 = computeKeyLength(charset, len2, string2, padChar, minSortChar);
}
return falcon_strnncoll(charset, string1, len1, string2, len2, false);
@@ -67,7 +68,7 @@ int MySQLCollation::makeKey (Value *valu
srcLen = value->getString (sizeof(temp), temp);
if (!isBinary)
- srcLen = computeKeyLength(srcLen, temp, padChar, minSortChar);
+ srcLen = computeKeyLength(charset, srcLen, temp, padChar, minSortChar);
// Since some collations make dstLen > srcLen, be sure dstLen is < partialKey.
@@ -114,3 +115,58 @@ int MySQLCollation::truncate(Value *valu
return len;
}
+
+uint MySQLCollation::computeKeyLength (void *charset, uint length, const char *key,
+ char padChar, char minSortChar)
+
+{
+ // We need to convert the padChar into the current character set
+
+ unsigned char pad[20];
+ int padLen = falcon_conv_uni_cs (charset, padChar, pad, sizeof(pad));
+
+ if (falcon_get_mbminlen(charset) == falcon_get_mbmaxlen(charset))
+ {
+
+ // All characters are represented using the same number of bytes
+ // we don't have to worry about multi-byte sequences of different
+ // lengths
+
+ int charLen = falcon_get_mbminlen(charset);
+
+ for (const char *p = key + length; p > key; p = p - charLen)
+ {
+ if ((p[-charLen] != 0) && (memcmp(pad, p-charLen, charLen) != 0)
+ && (p[-charLen] != minSortChar))
+ {
+ return (uint) (p - key);
+ }
+ }
+ }
+ else
+ {
+
+ // The number of bytes in each multi-byte sequence may vary
+
+ int charLen = 0;
+
+ for (const char *p = key + length; p > key; --p)
+ {
+ charLen = falcon_get_mbcharlen(charset, p-1);
+
+ if (charLen == 0)
+ {
+ // Not at the beginning of a sequence
+ continue;
+ }
+ else if ((p[-1] != 0) && (padLen != charLen || memcmp(pad, p-1, charLen) != 0)
+ && (p[-1] != minSortChar))
+ {
+ return (uint) (p - key + (charLen-1));
+ }
+
+ }
+ }
+
+ return 0;
+}
=== modified file 'storage/falcon/MySQLCollation.h'
--- a/storage/falcon/MySQLCollation.h 2007-11-27 20:07:30 +0000
+++ b/storage/falcon/MySQLCollation.h 2008-12-02 13:14:45 +0000
@@ -26,9 +26,12 @@ extern int falcon_strnxfrm (void *cs,
const char *src, uint srclen);
extern char falcon_get_pad_char (void *cs);
+extern int falcon_conv_uni_cs (void *cs, char character, unsigned char* dst, int dstLen);
extern int falcon_cs_is_binary (void *cs);
+extern unsigned int falcon_get_mbminlen (void *cs);
extern unsigned int falcon_get_mbmaxlen (void *cs);
extern char falcon_get_min_sort_char (void *cs);
+extern uint falcon_get_mbcharlen(void *cs, const char *s);
extern uint falcon_strnchrlen(void *cs, const char *s, uint l);
extern uint falcon_strnxfrmlen(void *cs, const char *s, uint srclen,
int partialKey, int bufSize);
@@ -63,15 +66,9 @@ public:
uint mbMaxLen;
char minSortChar;
- static inline uint computeKeyLength (uint length, const char *key,
- char padChar, char minSortChar)
- {
- for (const char *p = key + length; p > key; --p)
- if ((p[-1] != 0) && (p[-1] != padChar) && (p[-1] != minSortChar))
- return (uint) (p - key);
+ static uint computeKeyLength (void *charset, uint length, const char *key,
+ char padChar, char minSortChar);
- return 0;
- }
};
#endif
=== modified file 'storage/falcon/ha_falcon.cpp'
--- a/storage/falcon/ha_falcon.cpp 2008-11-25 13:38:06 +0000
+++ b/storage/falcon/ha_falcon.cpp 2008-12-02 13:14:45 +0000
@@ -292,12 +292,37 @@ char falcon_get_pad_char (void *cs)
return (char) ((CHARSET_INFO*) cs)->pad_char;
}
+int falcon_conv_uni_cs (void *cs, const char character, unsigned char* dst, int dstLen)
+{
+
+ // Convert from unicode codepoint
+
+ CHARSET_INFO* charset = (CHARSET_INFO*) cs;
+ int len = (charset->cset->wc_mb(charset, (my_wc_t)character, dst, dst + dstLen));
+
+ if (len <= 0)
+ {
+ // Should not happen
+ dst[0] = character;
+ return 1;
+ }
+ else
+ {
+ return len;
+ }
+}
+
int falcon_cs_is_binary (void *cs)
{
return (0 == strcmp(((CHARSET_INFO*) cs)->name, "binary"));
// return ((((CHARSET_INFO*) cs)->state & MY_CS_BINSORT) == MY_CS_BINSORT);
}
+unsigned int falcon_get_mbminlen (void *cs)
+{
+ return ((CHARSET_INFO*) cs)->mbminlen;
+}
+
unsigned int falcon_get_mbmaxlen (void *cs)
{
return ((CHARSET_INFO*) cs)->mbmaxlen;
@@ -308,6 +333,13 @@ char falcon_get_min_sort_char (void *cs)
return (char) ((CHARSET_INFO*) cs)->min_sort_char;
}
+uint falcon_get_mbcharlen(void *cs, const char *s)
+{
+ CHARSET_INFO *charset = (CHARSET_INFO*) cs;
+ uchar *ch = (uchar *) s;
+ return charset->cset->mbcharlen(charset, *ch);
+}
+
// Return the actual number of characters in the string
// Note, this is not the number of characters with collatable weight.
| Thread |
|---|
| • bzr commit into mysql-6.0-falcon-team branch (lars-erik.bjork:2922) Bug#34479 | lars-erik.bjork | 2 Dec |