Below is the list of changes that have just been committed into a
4.0 repository of monty. When monty does a push, they will be propogated to
the main repository and within 24 hours after the push to the public repository.
For information on how to access the public repository
see http://www.mysql.com/doc/I/n/Installing_source_tree.html
ChangeSet@stripped, 2001-09-11 01:40:52+03:00, monty@stripped
Fixes for German sorting order.
mysql-test/r/ctype_latin1_de.result
1.1 01/09/11 01:40:52 monty@stripped +168 -0
mysql-test/t/ctype_latin1_de-master.opt
1.1 01/09/11 01:40:52 monty@stripped +1 -0
mysql-test/t/ctype_latin1_de.test
1.1 01/09/11 01:40:52 monty@stripped +36 -0
configure.in
1.149 01/09/11 01:40:52 monty@stripped +1 -1
Don't make the German sort order default
myisam/mi_delete_all.c
1.4 01/09/11 01:40:52 monty@stripped +3 -1
Truncate files on DELETE FROM table_name to not get warnings when checking files
myisam/mi_search.c
1.26 01/09/11 01:40:52 monty@stripped +13 -6
Fix for multi-byte character sets.
mysql-test/r/ctype_latin1_de.result
1.0 01/09/11 01:40:52 monty@stripped +0 -0
BitKeeper file /my/bk/mysql-4.0/mysql-test/r/ctype_latin1_de.result
mysql-test/t/ctype_latin1_de-master.opt
1.0 01/09/11 01:40:52 monty@stripped +0 -0
BitKeeper file /my/bk/mysql-4.0/mysql-test/t/ctype_latin1_de-master.opt
mysql-test/t/ctype_latin1_de.test
1.0 01/09/11 01:40:52 monty@stripped +0 -0
BitKeeper file /my/bk/mysql-4.0/mysql-test/t/ctype_latin1_de.test
sql/item_cmpfunc.cc
1.16 01/09/11 01:40:52 monty@stripped +1 -1
Use current character set when using STRCMP()
strings/ctype-latin1_de.c
1.3 01/09/11 01:40:52 monty@stripped +73 -145
F
Docs/manual.texi
1.543 01/09/11 01:40:51 monty@stripped +32 -1
Update for German sorting
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: monty
# Host: hundin.mysql.fi
# Root: /my/bk/mysql-4.0
--- 1.542/Docs/manual.texi Thu Sep 6 15:19:43 2001
+++ 1.543/Docs/manual.texi Tue Sep 11 01:40:51 2001
@@ -748,7 +748,7 @@
@item
Full support for several different character sets, including
-ISO-8859-1 (Latin1), big5, ujis, and more. For example, the
+ISO-8859-1 (Latin1), german, big5, ujis, and more. For example, the
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
in table and column names.
@@ -20442,6 +20442,35 @@
but normally this is never needed.
+@menu
+* German character set::
+@end menu
+
+@node German character set, , Character sets, Character sets
+@subsubsection German character set
+
+To get German sorting order, you should start @code{mysqld} with
+@code{--default-character-set=latin_de}. This will give you the following
+characteristics.
+
+When sorting and comparing string's the following mapping is done on the
+strings before doing the comparison:
+
+@example
+@end example
+
+their un-accented counterpart. All letters are converted to uppercase.
+
+When comparing strings with @code{LIKE} the one -> two character mapping
+is not done. All letters are converted to uppercase. Accent are removed
+
@node Languages, Adding character set, Character sets, Localization
@subsection Non-English Error Messages
@@ -46752,6 +46781,8 @@
@cindex changes, version 4.0
@itemize @bullet
+@item
+New character set @code{latin_de} which provides correct German sorting.
@item
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
functions. One bonus is that @code{DELETE FROM table_name} now returns
--- 1.148/configure.in Sun Sep 9 03:53:34 2001
+++ 1.149/configure.in Tue Sep 11 01:40:52 2001
@@ -1826,7 +1826,7 @@
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
usa7 win1250 win1251ukr"
CHARSETS_DEPRECATED="win1251"
-DEFAULT_CHARSET=latin1_de
+DEFAULT_CHARSET=latin1
AC_DIVERT_POP
AC_ARG_WITH(charset,
--- 1.3/myisam/mi_delete_all.c Wed Aug 30 22:42:08 2000
+++ 1.4/myisam/mi_delete_all.c Tue Sep 11 01:40:52 2001
@@ -15,7 +15,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Remove all rows from a MyISAM table */
-/* This only clears the status information; The files are not truncated */
+/* This only clears the status information and truncates the data file */
#include "myisamdef.h"
@@ -50,6 +50,8 @@
myisam_log_command(MI_LOG_DELETE_ALL,info,(byte*) 0,0,0);
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ if (my_chsize(info->dfile, 0, MYF(MY_WME)))
+ goto err;
allow_break(); /* Allow SIGHUP & SIGINT */
DBUG_RETURN(0);
--- 1.25/myisam/mi_search.c Mon Jul 2 22:52:21 2001
+++ 1.26/myisam/mi_search.c Tue Sep 11 01:40:52 2001
@@ -657,19 +657,19 @@
int _mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
uchar *b, uint b_length, my_bool part_key)
{
- uint length= min(a_length,b_length);
- uchar *end= a+ length;
int flag;
#ifdef USE_STRCOLL
if (use_strcoll(charset_info))
{
- if ((flag = my_strnncoll(charset_info, a, a_length, b, b_length)))
- return flag;
+ /* QQ: This needs to work with part keys at some point */
+ return my_strnncoll(charset_info, a, a_length, b, b_length);
}
else
#endif
{
+ uint length= min(a_length,b_length);
+ uchar *end= a+ length;
uchar *sort_order=charset_info->sort_order;
while (a < end)
if ((flag= (int) sort_order[*a++] - (int) sort_order[*b++]))
@@ -768,8 +768,15 @@
}
else
{
- uint length=(uint) (end-a);
- if ((flag=_mi_compare_text(keyseg->charset,a,length,b,length,
+ uint length=(uint) (end-a), a_length=length, b_length=length;
+ if (!(nextflag & SEARCH_PREFIX))
+ {
+ while (a_length && a[a_length-1] == ' ')
+ a_length--;
+ while (b_length && b[b_length-1] == ' ')
+ b_length--;
+ }
+ if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
--- 1.15/sql/item_cmpfunc.cc Thu Aug 2 06:29:42 2001
+++ 1.16/sql/item_cmpfunc.cc Tue Sep 11 01:40:52 2001
@@ -254,7 +254,7 @@
null_value=1;
return 0;
}
- int value=stringcmp(a,b);
+ int value= binary ? stringcmp(a,b) : sortcmp(a,b);
null_value=0;
return !value ? 0 : (value < 0 ? (longlong) -1 : (longlong) 1);
}
--- New file ---
+++ mysql-test/r/ctype_latin1_de.result 01/09/11 01:40:52
a b
a 35
ac 2
ad 4
ae 3
aeae 33
aeb 6
o 37
oc 15
od 18
oe 17
oeb 20
q 34
s 21
ss 22
ssa 25
ssc 26
u 36
uc 8
ud 10
ue 9
ueb 12
uf 13
a b
a 35
ac 2
ad 4
ae 3
aeae 33
aeb 6
o 37
oc 15
od 18
oe 17
oeb 20
q 34
s 21
ss 22
ssa 25
ssc 26
u 36
uc 8
ud 10
ue 9
ueb 12
uf 13
a
uf
ueb
ue
ud
uc
u
ssc
ssa
ss
s
q
oeb
oe
od
oc
o
aeb
aeae
ae
ad
ac
a
Table Op Msg_type Msg_text
test.t1 check status OK
a b
a b
a b
a 35
ac 2
ad 4
ae 3
aeae 33
aeb 6
ssa 25
a b
u 36
uc 8
ud 10
ue 9
ueb 12
uf 13
a b
ss 22
ssa 25
ssc 26
0 0 0 0
0 0 0 0
-1 -1 -1 -1
-1 -1 -1 -1
-1 -1 -1
1 1 1 1
1 1 1 1
1 1
--- New file ---
+++ mysql-test/t/ctype_latin1_de-master.opt 01/09/11 01:40:52
--default-character-set=latin1_de
--- New file ---
+++ mysql-test/t/ctype_latin1_de.test 01/09/11 01:40:52
#
# Test latin_de character set
#
drop table if exists t1;
create table t1 (a char (20) not null, b int not null auto_increment, index (a,b),index(b));
select a,b from t1 order by a,b;
select a,b from t1 order by upper(a),b;
select a from t1 order by a desc;
check table t1;
select * from t1 where a like "%U%";
select * from t1 where a like "%ss%";
drop table t1;
# The following should all be true
# The following should all return -1
# The following should all return 1
--- 1.2/strings/ctype-latin1_de.c Fri Sep 7 23:45:05 2001
+++ 1.3/strings/ctype-latin1_de.c Tue Sep 11 01:40:52 2001
@@ -99,12 +99,10 @@
* This is a simple latin1 mapping table, which maps all accented
* characters to their non-accented equivalents. Note: in this
- * accented characters are treated the same way.
- *
- * accented 's', is mapped to 'S', to simplify the sorting
- * functions.
+ * accented characters except the following are treated the same way.
*/
+
uchar sort_order_latin1_de[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -118,10 +116,10 @@
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
- 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
- 68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
- 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
- 68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
+ 65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
+ 68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
+ 65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
+ 68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
};
#define L1_AE 196
@@ -132,6 +130,39 @@
#define L1_ue 252
#define L1_ss 223
+
+/*
+ Some notes about the following comparison rules:
+ By definition, my_strnncoll_latin_de must works exactly as if had called
+ my_strnxfrm_latin_de() on both strings and compared the result strings.
+
+ This means that:
+ both to AE.
+
+ The other option would be to not do any accent removal in
+ sort_order_latin_de[] at all
+*/
+
+
+#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent) \
+ /* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */ \
+ if (ch2 != accent) \
+ { \
+ ch1= fst; \
+ goto normal; \
+ } \
+ if (str1 == str1_end) \
+ return res_if_str1_smaller; \
+ { \
+ int diff = (int) sort_order_latin1_de[*str1] - snd; \
+ if (diff) \
+ return diff*(-(res_if_str1_smaller)); \
+ str1++; \
+ }
+
+
int my_strnncoll_latin1_de(const uchar * s1, int len1,
const uchar * s2, int len2)
{
@@ -140,172 +171,71 @@
while (s1 < e1 && s2 < e2)
{
- /* to_upper is used instead of sort_order, because we don't want
- * sort_order tables together, but that is acceptable. */
- uchar c1 = to_upper_latin1_de[*s1];
- uchar c2 = to_upper_latin1_de[*s2];
+ /*
+ can use it here.
+ */
+ uchar c1 = sort_order_latin1_de[*s1++];
+ uchar c2 = sort_order_latin1_de[*s2++];
if (c1 != c2)
{
- switch (c1)
- {
-
-#define CHECK_S1_COMBO(fst, snd, accent) \
- /* Invariant: c1 == fst == sort_order_latin1_de[accent] && c1 != c2 */ \
- if (c2 == accent) \
- { \
- if (s1 + 1 < e1) \
- { \
- if (to_upper_latin1_de[*(s1 + 1)] == snd) \
- { \
- s1 += 2; \
- s2 += 1; \
- } \
- else \
- { \
- int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
- if (diff) \
- return diff; \
- else \
- return 1; \
- } \
- } \
- else \
- return -1; \
- } \
- else \
- return fst - sort_order_latin1_de[c2]
-
+ switch (c1) {
case 'A':
- CHECK_S1_COMBO('A', 'E', L1_AE);
+ CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'A', 'E', L1_AE);
break;
case 'O':
- CHECK_S1_COMBO('O', 'E', L1_OE);
+ CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'O', 'E', L1_OE);
break;
case 'U':
- CHECK_S1_COMBO('U', 'E', L1_UE);
+ CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'U', 'E', L1_UE);
break;
case 'S':
- CHECK_S1_COMBO('S', 'S', L1_ss);
+ CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'S', 'S', L1_ss);
break;
-
-#define CHECK_S2_COMBO(fst, snd) \
- /* Invariant: sort_order_latin1_de[c1] == fst && c1 != c2 */ \
- if (c2 == fst) \
- { \
- if (s2 + 1 < e2) \
- { \
- if (to_upper_latin1_de[*(s2 + 1)] == snd) \
- { \
- s1 += 1; \
- s2 += 2; \
- } \
- else \
- { \
- int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
- if (diff) \
- return diff; \
- else \
- return -1; \
- } \
- } \
- else \
- return 1; \
- } \
- else \
- return fst - sort_order_latin1_de[c2]
-
case L1_AE:
- CHECK_S2_COMBO('A', 'E');
+ CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'A', 'E', 'A');
break;
case L1_OE:
- CHECK_S2_COMBO('O', 'E');
+ CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'O', 'E', 'O');
break;
case L1_UE:
- CHECK_S2_COMBO('U', 'E');
+ CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'U', 'E', 'U');
break;
case L1_ss:
- CHECK_S2_COMBO('S', 'S');
+ CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'S', 'S', 'S');
break;
default:
+ /*
+ Handle the case where 'c2' is a special character
+ If this is true, we know that c1 can't match this character.
+ */
+ normal:
switch (c2) {
case L1_AE:
+ return (int) c1 - (int) 'A';
case L1_OE:
+ return (int) c1 - (int) 'O';
case L1_UE:
+ return (int) c1 - (int) 'U';
case L1_ss:
- return sort_order_latin1_de[c1] - sort_order_latin1_de[c2];
- break;
+ return (int) c1 - (int) 'S';
default:
- if (sort_order_latin1_de[*s1] != sort_order_latin1_de[*s2])
- return sort_order_latin1_de[*s1] - sort_order_latin1_de[*s2];
- ++s1;
- ++s2;
- break;
+ {
+ int diff= (int) c1 - (int) c2;
+ if (diff)
+ return diff;
}
break;
-
-#undef CHECK_S1_COMBO
-#undef CHECK_S2_COMBO
-
- }
- }
- else
- {
- * letter in a combo really is the unaccented 'e' (or 's' for
- * "ss") and is not an accented character with the same sort_order. */
- ++s1;
- ++s2;
- if (s1 < e1 && s2 < e2)
- {
- switch (c1)
- {
- case 'A':
- case 'O':
- case 'U':
- if (sort_order_latin1_de[*s1] == 'E' &&
- to_upper_latin1_de[*s1] != 'E' &&
- to_upper_latin1_de[*s2] == 'E')
- return 1;
- if (sort_order_latin1_de[*s2] == 'E' &&
- to_upper_latin1_de[*s2] != 'E' &&
- to_upper_latin1_de[*s1] == 'E')
- return -1;
- break;
- case 'S':
- if (sort_order_latin1_de[*s1] == 'S' &&
- to_upper_latin1_de[*s1] != 'S' &&
- to_upper_latin1_de[*s2] == 'S')
- return 1;
- if (sort_order_latin1_de[*s2] == 'S' &&
- to_upper_latin1_de[*s2] != 'S' &&
- to_upper_latin1_de[*s1] == 'S')
- return -1;
- break;
- default:
- break;
}
}
}
}
-
/* A simple test of string lengths won't work -- we test to see
* which string ran out first */
return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
}
+
int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
{
const uchar *dest_orig = dest;
@@ -313,22 +243,19 @@
const uchar *se = src + srclen;
while (src < se && dest < de)
{
- switch (*src)
- {
+ uchar chr=sort_order_latin1_de[*src];
+ switch (chr) {
case L1_AE:
- case L1_ae:
*dest++ = 'A';
if (dest < de)
*dest++ = 'E';
break;
case L1_OE:
- case L1_oe:
*dest++ = 'O';
if (dest < de)
*dest++ = 'E';
break;
case L1_UE:
- case L1_ue:
*dest++ = 'U';
if (dest < de)
*dest++ = 'E';
@@ -339,13 +266,14 @@
*dest++ = 'S';
break;
default:
- *dest++ = sort_order_latin1_de[*src];
+ *dest++= chr;
break;
}
++src;
}
return dest - dest_orig;
}
+
int my_strcoll_latin1_de(const uchar * s1, const uchar * s2)
{
| Thread |
|---|
| • bk commit into 4.0 tree | monty | 11 Sep |