Below is the list of changes that have just been committed into a local
6.0 repository of gkodinov. When gkodinov does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-02-13 16:53:24+02:00, gkodinov@stripped +8 -0
Bug #14637: trim trailing spaces processes data only byte wise
Use and int * where possible to scan for trailing space in a
string instead of always iterating char-by-char.
Using the attached benchmark file on a 32 bit Intel Core 2
Duo CPU I've got 43485 ms run with the fix compared to 44373
without it.
include/m_string.h@stripped, 2008-02-13 16:53:21+02:00, gkodinov@stripped +2 -0
Bug #14637: scan for space through ints
libmysql/Makefile.shared@stripped, 2008-02-13 16:53:21+02:00, gkodinov@stripped +2 -1
Bug 14637: include the skip_trailing_space
strings/Makefile.am@stripped, 2008-02-13 16:53:21+02:00, gkodinov@stripped +4 -4
Bug 14637: include the skip_trailing_space
strings/ctype-bin.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +1 -4
Bug #14637: scan for space through ints
strings/ctype-latin1.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +2 -3
Bug #14637: scan for space through ints
strings/ctype-mb.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +1 -4
Bug #14637: scan for space through ints
strings/ctype-simple.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +4 -6
Bug #14637: scan for space through ints
strings/stspace.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +92 -0
New BitKeeper file ``strings/stspace.c''
strings/stspace.c@stripped, 2008-02-13 16:53:22+02:00, gkodinov@stripped +0 -0
diff -Nrup a/include/m_string.h b/include/m_string.h
--- a/include/m_string.h 2007-11-30 15:52:49 +02:00
+++ b/include/m_string.h 2008-02-13 16:53:21 +02:00
@@ -263,4 +263,6 @@ typedef struct st_mysql_lex_string LEX_S
#define USTRING_WITH_LEN(X) ((uchar*) X), ((size_t) (sizeof(X) - 1))
#define C_STRING_WITH_LEN(X) ((char *) (X)), ((size_t) (sizeof(X) - 1))
+inline const uchar *skip_trailing_space(const uchar *ptr,size_t len);
+
#endif
diff -Nrup a/libmysql/Makefile.shared b/libmysql/Makefile.shared
--- a/libmysql/Makefile.shared 2007-12-07 12:57:22 +02:00
+++ b/libmysql/Makefile.shared 2008-02-13 16:53:21 +02:00
@@ -46,7 +46,8 @@ mystringsobjects = strmov.lo strxmov.lo
ctype-win1250ch.lo ctype-utf8.lo ctype-extra.lo \
ctype-ucs2.lo ctype-gb2312.lo ctype-gbk.lo \
ctype-sjis.lo ctype-tis620.lo ctype-ujis.lo \
- ctype-uca.lo xml.lo my_strtoll10.lo str_alloc.lo
+ ctype-uca.lo xml.lo my_strtoll10.lo str_alloc.lo \
+ stspace.lo
mystringsextra= strto.c
dbugobjects = dbug.lo # IT IS IN SAFEMALLOC.C sanity.lo
diff -Nrup a/strings/Makefile.am b/strings/Makefile.am
--- a/strings/Makefile.am 2006-12-31 02:06:43 +02:00
+++ b/strings/Makefile.am 2008-02-13 16:53:21 +02:00
@@ -21,19 +21,19 @@ pkglib_LIBRARIES = libmystrings.a
# Exact one of ASSEMBLER_X
if ASSEMBLER_x86
ASRCS = strings-x86.s longlong2str-x86.s my_strtoll10-x86.s
-CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c
strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c
ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c
ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c
ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c
my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c
+CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c
strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c
ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c
ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c
ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c
my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c
stspace.c
else
if ASSEMBLER_sparc32
# These file MUST all be on the same line!! Otherwise automake
# generats a very broken makefile
ASRCS = bmove_upp-sparc.s strappend-sparc.s strend-sparc.s strinstr-sparc.s
strmake-sparc.s strmov-sparc.s strnmov-sparc.s strstr-sparc.s
-CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c
bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c
strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c
ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c
ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c
xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c
+CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c
bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c
strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c
ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c
ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c
xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c stspace.c
else
#no assembler
ASRCS =
# These file MUST all be on the same line!! Otherwise automake
# generats a very broken makefile
-CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c
is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c
bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c
strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c
ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c
ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c
xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c
+CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c
is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c
bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c
strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c
ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c
ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c
xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c stspace.c
endif
endif
@@ -53,7 +53,7 @@ EXTRA_DIST = ctype-big5.c ctype-cp932.c
bmove_upp-sparc.s strappend-sparc.s strend-sparc.s \
strinstr-sparc.s strmake-sparc.s strmov-sparc.s \
strnmov-sparc.s strstr-sparc.s strxmov-sparc.s \
- t_ctype.h my_strchr.c CMakeLists.txt
+ t_ctype.h my_strchr.c CMakeLists.txt stspace.c
libmystrings_a_LIBADD=
conf_to_src_SOURCES = conf_to_src.c xml.c ctype.c bcmp.c
diff -Nrup a/strings/ctype-bin.c b/strings/ctype-bin.c
--- a/strings/ctype-bin.c 2007-06-21 12:59:24 +03:00
+++ b/strings/ctype-bin.c 2008-02-13 16:53:22 +02:00
@@ -278,14 +278,11 @@ void my_hash_sort_8bit_bin(CHARSET_INFO
{
const uchar *pos = key;
- key+= len;
-
/*
Remove trailing spaces. We have to do this to be able to compare
'A ' and 'A' as identical
*/
- while (key > pos && key[-1] == ' ')
- key--;
+ key= skip_trailing_space(key, len);
for (; pos < (uchar*) key ; pos++)
{
diff -Nrup a/strings/ctype-latin1.c b/strings/ctype-latin1.c
--- a/strings/ctype-latin1.c 2007-06-21 12:59:24 +03:00
+++ b/strings/ctype-latin1.c 2008-02-13 16:53:22 +02:00
@@ -683,13 +683,12 @@ void my_hash_sort_latin1_de(CHARSET_INFO
const uchar *key, size_t len,
ulong *nr1, ulong *nr2)
{
- const uchar *end= key+len;
+ const uchar *end;
/*
Remove end space. We have to do this to be able to compare
'AE' and 'Ä' as identical
*/
- while (end > key && end[-1] == ' ')
- end--;
+ end= skip_trailing_space(key, len);
for (; key < end ; key++)
{
diff -Nrup a/strings/ctype-mb.c b/strings/ctype-mb.c
--- a/strings/ctype-mb.c 2007-10-22 14:43:29 +03:00
+++ b/strings/ctype-mb.c 2008-02-13 16:53:22 +02:00
@@ -565,14 +565,11 @@ void my_hash_sort_mb_bin(CHARSET_INFO *c
{
const uchar *pos = key;
- key+= len;
-
/*
Remove trailing spaces. We have to do this to be able to compare
'A ' and 'A' as identical
*/
- while (key > pos && key[-1] == ' ')
- key--;
+ key= skip_trailing_space(key, len);
for (; pos < (uchar*) key ; pos++)
{
diff -Nrup a/strings/ctype-simple.c b/strings/ctype-simple.c
--- a/strings/ctype-simple.c 2007-12-07 12:43:57 +02:00
+++ b/strings/ctype-simple.c 2008-02-13 16:53:22 +02:00
@@ -305,14 +305,13 @@ void my_hash_sort_simple(CHARSET_INFO *c
ulong *nr1, ulong *nr2)
{
register uchar *sort_order=cs->sort_order;
- const uchar *end= key + len;
+ const uchar *end;
/*
Remove end space. We have to do this to be able to compare
'A ' and 'A' as identical
*/
- while (end > key && end[-1] == ' ')
- end--;
+ end= skip_trailing_space(key, len);
for (; key < (uchar*) end ; key++)
{
@@ -1166,9 +1165,8 @@ size_t my_well_formed_len_8bit(CHARSET_I
size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
- const char *end= ptr+length;
- while (end > ptr && end[-1] == ' ')
- end--;
+ const char *end;
+ end= (const char *) skip_trailing_space((const uchar *)ptr, length);
return (size_t) (end-ptr);
}
diff -Nrup a/strings/stspace.c b/strings/stspace.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/strings/stspace.c 2008-02-13 16:53:22 +02:00
@@ -0,0 +1,92 @@
+/* Copyright (C) 2000 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Defines: skip_trailing_space()
+*/
+
+
+#include <my_global.h>
+#include "m_string.h"
+
+/* SPACE_INT is a word that contains only spaces */
+#if SIZEOF_INT == 4
+#define SPACE_INT 0x20202020
+#elif SIZEOF_INT == 8
+#define SPACE_INT 0x2020202020202020
+#else
+#error define the appropriate constant for a word full of spaces
+#endif
+
+/**
+ Skip trailing space.
+
+ On most systems reading memory in larger chunks (ideally equal to the size of
+ the chinks that the machine physically reads from memory) causes fewer memory
+ access loops and hence increased performance.
+ This is why the 'int' type is used : it's closest to that (according to how
+ it's defined in C).
+ So when we determine the amount of whitespace at the end of a string we do
+ the following :
+ 1. We divide the string into 3 zones :
+ a) from the start of the string (__start) to the first multiple
+ of sizeof(int) (__start_words)
+ b) from the end of the string (__end) to the last multiple of sizeof(int)
+ (__end_words)
+ c) a zone that is aligned to sizeof(int) and can be safely accessed
+ through an int *
+ 2. We start comparing backwards from (c) char-by-char. If all we find is
+ space then we continue
+ 3. If there are elements in zone (b) we compare them as unsigned ints to a
+ int mask (SPACE_INT) consisting of all spaces
+ 4. Finally we compare the remaining part (a) of the string char by char.
+ This covers for the last non-space unsigned int from 3. (if any)
+
+ This algorithm works well for relatively larger strings, but it will slow
+ the things down for smaller strings (because of the additional calculations
+ and checks compared to the naive method). Thus the barrier of length 20
+ is added.
+
+ @param ptr pointer to the input string
+ @param len the length of the string
+ @return the last non-space character
+*/
+
+inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
+{
+ const uchar *start= ptr;
+ const uchar *end= ptr + len;
+
+ if (len > 20)
+ {
+ const uchar *end_words= (const uchar *)
+ (((intptr)end) / SIZEOF_INT * SIZEOF_INT);
+ const uchar *start_words= (const uchar *)
+ ((((intptr)start) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);
+
+ DBUG_ASSERT(((intptr)start) >= SIZEOF_INT);
+ if (end_words > start)
+ {
+ while (end > end_words && end[-1] == 0x20)
+ end--;
+ if (end[-1] == 0x20 && start_words < end_words)
+ while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT)
+ end -= SIZEOF_INT;
+ }
+ }
+ while (end > start && end[-1] == 0x20)
+ end--;
+ return (end);
+}