3947 Alexander Barkov 2012-06-05
Preparatory refactoring for multi-level collations:
- Introducing MY_UCA_WEIGHT_LEVEL, moving one level related data
from MY_UCA_INFO to MY_UCA_WEIGHT_LEVEL
- Splitting big functions into smaller ones
modified:
include/m_ctype.h
strings/ctype-mb.c
strings/ctype-uca.c
3946 Nuno Carvalho 2012-06-04
WL#5223: Binary Log Group Commit
MYSQL_BIN_LOG::flush_and_sync(bool force) was ignoring "force" argument
and always flushing binlog to disk.
Fix flush_and_sync(bool force) implementation by taking "force" into
account.
modified:
sql/binlog.cc
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h 2012-03-06 14:29:42 +0000
+++ b/include/m_ctype.h 2012-06-05 04:28:21 +0000
@@ -78,6 +78,7 @@ extern MY_UNICASE_INFO my_unicase_unicod
#define MY_UCA_MAX_CONTRACTION 6
#define MY_UCA_MAX_WEIGHT_SIZE 8
+#define MY_UCA_WEIGHT_LEVELS 1
typedef struct my_contraction_t
{
@@ -96,13 +97,25 @@ typedef struct my_contraction_list_t
} MY_CONTRACTIONS;
+my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
+my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
+uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
+ my_wc_t wc1, my_wc_t wc2);
-typedef struct uca_info_st
+
+/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
+typedef struct my_uca_level_info_st
{
my_wc_t maxchar;
uchar *lengths;
uint16 **weights;
MY_CONTRACTIONS contractions;
+} MY_UCA_WEIGHT_LEVEL;
+
+
+typedef struct uca_info_st
+{
+ MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
/* Logical positions */
my_wc_t first_non_ignorable;
@@ -122,12 +135,6 @@ typedef struct uca_info_st
-my_bool my_uca_have_contractions(MY_UCA_INFO *uca);
-my_bool my_uca_can_be_contraction_head(MY_UCA_INFO *uca, my_wc_t wc);
-my_bool my_uca_can_be_contraction_tail(MY_UCA_INFO *uca, my_wc_t wc);
-uint16 *my_uca_contraction2_weight(MY_UCA_INFO *uca, my_wc_t wc1, my_wc_t wc2);
-
-
extern MY_UCA_INFO my_uca_v400;
@@ -702,6 +709,9 @@ size_t my_strxfrm_pad_desc_and_reverse(c
my_bool my_charset_is_ascii_compatible(const CHARSET_INFO *cs);
+const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs,
+ int level);
+
extern size_t my_vsnprintf_ex(const CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap);
=== modified file 'strings/ctype-mb.c'
--- a/strings/ctype-mb.c 2011-12-09 21:08:37 +0000
+++ b/strings/ctype-mb.c 2012-06-05 04:28:21 +0000
@@ -774,7 +774,7 @@ my_bool my_like_range_mb(const CHARSET_I
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t maxcharlen= res_length / cs->mbmaxlen;
- my_bool have_contractions= my_uca_have_contractions(cs->uca);
+ const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
{
@@ -842,8 +842,8 @@ fill_max_and_min:
'ab\min\min\min\min' and 'ab\max\max\max\max'.
*/
- if (have_contractions && ptr + 1 < end &&
- my_uca_can_be_contraction_head(cs->uca, (uchar) *ptr))
+ if (contractions && ptr + 1 < end &&
+ my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
{
/* Ptr[0] is a contraction head. */
@@ -865,9 +865,8 @@ fill_max_and_min:
is not a contraction, then we put only ptr[0],
and continue with ptr[1] on the next loop.
*/
- if (my_uca_can_be_contraction_tail(cs->uca, (uchar) ptr[1]) &&
- my_uca_contraction2_weight(cs->uca,
- (uchar) ptr[0], ptr[1]))
+ if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
+ my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
{
/* Contraction found */
if (maxcharlen == 1 || min_str + 1 >= min_end)
@@ -932,7 +931,7 @@ my_like_range_generic(const CHARSET_INFO
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
size_t res_length_diff;
- my_bool have_contractions= cs->uca ? my_uca_have_contractions(cs->uca) : 0;
+ const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for ( ; charlen > 0; charlen--)
{
@@ -1000,8 +999,8 @@ my_like_range_generic(const CHARSET_INFO
goto pad_min_max;
}
- if (have_contractions &&
- my_uca_can_be_contraction_head(cs->uca, wc) &&
+ if (contractions &&
+ my_uca_can_be_contraction_head(contractions, wc) &&
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
{
uint16 *weight;
@@ -1012,8 +1011,8 @@ my_like_range_generic(const CHARSET_INFO
goto pad_min_max;
}
- if (my_uca_can_be_contraction_tail(cs->uca, wc2) &&
- (weight= my_uca_contraction2_weight(cs->uca, wc, wc2)) && weight[0])
+ if (my_uca_can_be_contraction_tail(contractions, wc2) &&
+ (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
{
/* Contraction found */
if (charlen == 1)
=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c 2012-03-29 13:11:42 +0000
+++ b/strings/ctype-uca.c 2012-06-05 04:28:21 +0000
@@ -6523,13 +6523,17 @@ page0FCdata,page0FDdata,page0FEdata,page
MY_UCA_INFO my_uca_v400=
{
- 0xFFFF, /* maxchar */
- uca_length,
- uca_weight,
- { /* Contractions: */
- 0, /* nitems */
- NULL, /* item */
- NULL /* flags */
+ {
+ {
+ 0xFFFF, /* maxchar */
+ uca_length,
+ uca_weight,
+ { /* Contractions: */
+ 0, /* nitems */
+ NULL, /* item */
+ NULL /* flags */
+ }
+ },
},
/* Logical positions */
@@ -19095,13 +19099,17 @@ NULL ,NULL ,NULL ,NULL
MY_UCA_INFO my_uca_v520=
{
- 0x10FFFF, /* maxchar */
- uca520_length,
- uca520_weight,
- { /* Contractions: */
- 0, /* nitems */
- NULL, /* item */
- NULL /* flags */
+ {
+ {
+ 0x10FFFF, /* maxchar */
+ uca520_length,
+ uca520_weight,
+ { /* Contractions: */
+ 0, /* nitems */
+ NULL, /* item */
+ NULL /* flags */
+ }
+ },
},
0x0009, /* first_non_ignorable p != ignore */
@@ -19453,7 +19461,7 @@ typedef struct my_uca_scanner_st
const uint16 *wbeg; /* Beginning of the current weight string */
const uchar *sbeg; /* Beginning of the input string */
const uchar *send; /* End of the input string */
- MY_UCA_INFO *uca;
+ const MY_UCA_WEIGHT_LEVEL *level;
uint16 implicit[2];
int page;
int code;
@@ -19467,6 +19475,7 @@ typedef struct my_uca_scanner_st
typedef struct my_uca_scanner_handler_st
{
void (*init)(my_uca_scanner *scanner, const CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
const uchar *str, size_t length);
int (*next)(my_uca_scanner *scanner);
} my_uca_scanner_handler;
@@ -19499,9 +19508,9 @@ static uint16 nochar[]= {0,0};
*/
static inline void
-my_uca_add_contraction_flag(MY_UCA_INFO *uca, my_wc_t wc, int flag)
+my_uca_add_contraction_flag(MY_CONTRACTIONS *list, my_wc_t wc, int flag)
{
- uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
+ list->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
}
@@ -19517,10 +19526,9 @@ my_uca_add_contraction_flag(MY_UCA_INFO
*/
static MY_CONTRACTION *
-my_uca_add_contraction(MY_UCA_INFO *uca, my_wc_t *wc, size_t len,
+my_uca_add_contraction(MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
my_bool with_context)
{
- MY_CONTRACTIONS *list= &uca->contractions;
MY_CONTRACTION *next= &list->item[list->nitems];
size_t i;
/*
@@ -19559,39 +19567,41 @@ my_uca_add_contraction(MY_UCA_INFO *uca,
*/
static my_bool
-my_uca_alloc_contractions(MY_UCA_INFO *uca,
+my_uca_alloc_contractions(MY_CONTRACTIONS *contractions,
MY_CHARSET_LOADER *loader, size_t n)
{
uint size= n * sizeof(MY_CONTRACTION);
- if (!(uca->contractions.item= (loader->once_alloc)(size)) ||
- !(uca->contractions.flags= (char *) (loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
+ if (!(contractions->item= (loader->once_alloc)(size)) ||
+ !(contractions->flags= (char *) (loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
return 1;
- memset(uca->contractions.item, 0, size);
- memset(uca->contractions.flags, 0, MY_UCA_CNT_FLAG_SIZE);
+ memset(contractions->item, 0, size);
+ memset(contractions->flags, 0, MY_UCA_CNT_FLAG_SIZE);
return 0;
}
/**
- Check if UCA data has contractions (public version)
+ Return UCA contraction data for a CHARSET_INFO structure.
- @param uca Pointer to UCA data
- @retval 0 - no contraction, 1 - have contractions.
+ @param cs Pointer to CHARSET_INFO structure
+ @retval Pointer to contraction data
+ @retval NULL, if this collation does not have UCA contraction
*/
-my_bool
-my_uca_have_contractions(MY_UCA_INFO *uca)
+const MY_CONTRACTIONS *
+my_charset_get_contractions(const CHARSET_INFO *cs, int level)
{
- return (uca != NULL) && (uca->contractions.nitems > 0);
+ return (cs->uca != NULL) && (cs->uca->level[level].contractions.nitems > 0) ?
+ &cs->uca->level[level].contractions : NULL;
}
/**
- Check if UCA data has contractions (static version)
+ Check if UCA level data has contractions (static version)
Static quick version of my_uca_have_contractions(),
optimized for performance purposes, also marked as "inline".
- @param uca Pointer to UCA data
+ @param level Pointer to UCA level data
@return Flags indicating if UCA with contractions
@retval 0 - no contractions
@@ -19599,9 +19609,9 @@ my_uca_have_contractions(MY_UCA_INFO *uc
*/
static inline my_bool
-my_uca_have_contractions_quick(MY_UCA_INFO *uca)
+my_uca_have_contractions_quick(const MY_UCA_WEIGHT_LEVEL *level)
{
- return (uca->contractions.nitems > 0);
+ return (level->contractions.nitems > 0);
}
@@ -19609,7 +19619,7 @@ my_uca_have_contractions_quick(MY_UCA_IN
/**
Check if a character can be contraction head
- @param uca Pointer to UCA data
+ @param c Pointer to UCA contraction data
@param wc Code point
@retval 0 - cannot be contraction head
@@ -19617,16 +19627,16 @@ my_uca_have_contractions_quick(MY_UCA_IN
*/
my_bool
-my_uca_can_be_contraction_head(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc)
{
- return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
}
/**
Check if a character can be contraction tail
- @param uca Pointer to UCA data
+ @param c Pointer to UCA contraction data
@param wc Code point
@retval 0 - cannot be contraction tail
@@ -19634,33 +19644,33 @@ my_uca_can_be_contraction_head(MY_UCA_IN
*/
my_bool
-my_uca_can_be_contraction_tail(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc)
{
- return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
}
/**
Check if a character can be contraction part
- @param uca Pointer to UCA data
+ @param c Pointer to UCA contraction data
@param wc Code point
@retval 0 - cannot be contraction part
@retval 1 - can be contraction part
*/
-my_bool
-my_uca_can_be_contraction_part(MY_UCA_INFO *uca, my_wc_t wc, int flag)
+static inline my_bool
+my_uca_can_be_contraction_part(const MY_CONTRACTIONS *c, my_wc_t wc, int flag)
{
- return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & flag;
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & flag;
}
/**
- Find a contraction and return its weight array
+ Find a contraction consisting of two characters and return its weight array
- @param uca Pointer to UCA data
+ @param list Pointer to UCA contraction data
@param wc1 First character
@param wc2 Second character
@@ -19670,9 +19680,8 @@ my_uca_can_be_contraction_part(MY_UCA_IN
*/
uint16 *
-my_uca_contraction2_weight(MY_UCA_INFO *uca, my_wc_t wc1, my_wc_t wc2)
+my_uca_contraction2_weight(const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
{
- MY_CONTRACTIONS *list= &uca->contractions;
MY_CONTRACTION *c, *last;
for (c= list->item, last= c + list->nitems; c < last; c++)
{
@@ -19688,7 +19697,7 @@ my_uca_contraction2_weight(MY_UCA_INFO *
/**
Check if a character can be previous context head
- @param uca Pointer to UCA data
+ @param list Pointer to UCA contraction data
@param wc Code point
@return
@@ -19697,17 +19706,16 @@ my_uca_contraction2_weight(MY_UCA_INFO *
*/
my_bool
-my_uca_can_be_previous_context_head(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_previous_context_head(const MY_CONTRACTIONS *list, my_wc_t wc)
{
- return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
- MY_UCA_PREVIOUS_CONTEXT_HEAD;
+ return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
}
/**
Check if a character can be previois context tail
- @param uca Pointer to UCA data
+ @param uca Pointer to UCA contraction data
@param wc Code point
@return
@@ -19716,10 +19724,9 @@ my_uca_can_be_previous_context_head(MY_U
*/
my_bool
-my_uca_can_be_previous_context_tail(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_previous_context_tail(const MY_CONTRACTIONS *list, my_wc_t wc)
{
- return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
- MY_UCA_PREVIOUS_CONTEXT_TAIL;
+ return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
}
@@ -19746,7 +19753,7 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_
Check if a string is a contraction,
and return its weight array on success.
- @param uca Pointer to UCA data
+ @param list Pointer to UCA contraction data
@param wc Pointer to wide string
@param len String length
@@ -19755,10 +19762,9 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_
@retval ptr - contraction weight array
*/
-uint16 *
-my_uca_contraction_weight(MY_UCA_INFO *uca, my_wc_t *wc, size_t len)
+static inline uint16 *
+my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
{
- MY_CONTRACTIONS *list= &uca->contractions;
MY_CONTRACTION *c, *last;
for (c= list->item, last= c + list->nitems; c < last; c++)
{
@@ -19803,7 +19809,8 @@ my_uca_scanner_contraction_find(my_uca_s
s, scanner->send)) <= 0)
break;
beg[clen]= s= s + mblen;
- if (!my_uca_can_be_contraction_part(scanner->uca, wc[clen++], flag))
+ if (!my_uca_can_be_contraction_part(&scanner->level->contractions,
+ wc[clen++], flag))
break;
}
@@ -19811,8 +19818,10 @@ my_uca_scanner_contraction_find(my_uca_s
for ( ; clen > 1; clen--)
{
uint16 *cweight;
- if (my_uca_can_be_contraction_tail(scanner->uca, wc[clen - 1]) &&
- (cweight= my_uca_contraction_weight(scanner->uca, wc, clen)))
+ if (my_uca_can_be_contraction_tail(&scanner->level->contractions,
+ wc[clen - 1]) &&
+ (cweight= my_uca_contraction_weight(&scanner->level->contractions,
+ wc, clen)))
{
scanner->wbeg= cweight + 1;
scanner->sbeg= beg[clen - 1];
@@ -19841,7 +19850,7 @@ uint16 *
my_uca_previous_context_find(my_uca_scanner *scanner,
my_wc_t wc0, my_wc_t wc1)
{
- MY_CONTRACTIONS *list= &scanner->uca->contractions;
+ const MY_CONTRACTIONS *list= &scanner->level->contractions;
MY_CONTRACTION *c, *last;
for (c= list->item, last= c + list->nitems; c < last; c++)
{
@@ -19891,14 +19900,16 @@ my_uca_scanner_next_implicit(my_uca_scan
The same two functions for any character set
*/
static void
-my_uca_scanner_init_any(my_uca_scanner *scanner, const CHARSET_INFO *cs,
+my_uca_scanner_init_any(my_uca_scanner *scanner,
+ const CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
const uchar *str, size_t length)
{
/* Note, no needs to initialize scanner->wbeg */
scanner->sbeg= str;
scanner->send= str + length;
scanner->wbeg= nochar;
- scanner->uca= cs->uca;
+ scanner->level= level;
scanner->cs= cs;
}
@@ -19926,14 +19937,14 @@ static int my_uca_scanner_next_any(my_uc
return -1;
scanner->sbeg+= mblen;
- if (wc[0] > scanner->uca->maxchar)
+ if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
return 0xFFFD;
}
- if (my_uca_have_contractions_quick(scanner->uca))
+ if (my_uca_have_contractions_quick(scanner->level))
{
uint16 *cweight;
/*
@@ -19945,9 +19956,10 @@ static int my_uca_scanner_next_any(my_uc
Note, we support only 2-character long sequences with previous
context at the moment. CLDR does not have longer sequences.
*/
- if (my_uca_can_be_previous_context_tail(scanner->uca, wc[0]) &&
+ if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+ wc[0]) &&
scanner->wbeg != nochar && /* if not the very first character */
- my_uca_can_be_previous_context_head(scanner->uca,
+ my_uca_can_be_previous_context_head(&scanner->level->contractions,
(wc[1]= ((scanner->page << 8) +
scanner->code))) &&
(cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
@@ -19955,7 +19967,8 @@ static int my_uca_scanner_next_any(my_uc
scanner->page= scanner->code= 0; /* Clear for the next character */
return *cweight;
}
- else if (my_uca_can_be_contraction_head(scanner->uca, wc[0]))
+ else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+ wc[0]))
{
/* Check if w[0] starts a contraction */
if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
@@ -19968,12 +19981,12 @@ static int my_uca_scanner_next_any(my_uc
scanner->code= wc[0] & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
- if (!(wpage= scanner->uca->weights[scanner->page]))
+ if (!(wpage= scanner->level->weights[scanner->page]))
return my_uca_scanner_next_implicit(scanner);
/* Calculate pointer to w[0]'s weight, using page and offset */
scanner->wbeg= wpage +
- scanner->code * scanner->uca->lengths[scanner->page];
+ scanner->code * scanner->level->lengths[scanner->page];
} while (!scanner->wbeg[0]); /* Skip ignorable characters */
return *scanner->wbeg++;
@@ -20038,8 +20051,8 @@ static int my_strnncoll_uca(const CHARSE
int s_res;
int t_res;
- scanner_handler->init(&sscanner, cs, s, slen);
- scanner_handler->init(&tscanner, cs, t, tlen);
+ scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+ scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
do
{
@@ -20052,9 +20065,9 @@ static int my_strnncoll_uca(const CHARSE
static inline int
-my_space_weight(const CHARSET_INFO *cs)
+my_space_weight(const CHARSET_INFO *cs)// W3-TODO
{
- return cs->uca->weights[0][0x20 * cs->uca->lengths[0]];
+ return cs->uca->level[0].weights[0][0x20 * cs->uca->level[0].lengths[0]];
}
@@ -20072,12 +20085,12 @@ my_space_weight(const CHARSET_INFO *cs)
*/
static inline uint16 *
-my_char_weight_addr(MY_UCA_INFO *uca, uint wc)
+my_char_weight_addr(MY_UCA_WEIGHT_LEVEL *level, uint wc)
{
uint page, ofst;
- return wc > uca->maxchar ? NULL :
- (uca->weights[page= (wc >> 8)] ?
- uca->weights[page] + (ofst= (wc & 0xFF)) * uca->lengths[page] :
+ return wc > level->maxchar ? NULL :
+ (level->weights[page= (wc >> 8)] ?
+ level->weights[page] + (ofst= (wc & 0xFF)) * level->lengths[page] :
NULL);
}
@@ -20143,8 +20156,8 @@ static int my_strnncollsp_uca(const CHAR
diff_if_only_endspace_difference= 0;
#endif
- scanner_handler->init(&sscanner, cs, s, slen);
- scanner_handler->init(&tscanner, cs, t, tlen);
+ scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+ scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
do
{
@@ -20217,7 +20230,7 @@ static void my_hash_sort_uca(const CHARS
my_uca_scanner scanner;
slen= cs->cset->lengthsp(cs, (char*) s, slen);
- scanner_handler->init(&scanner, cs, s, slen);
+ scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= scanner_handler->next(&scanner)) >0)
{
@@ -20271,7 +20284,7 @@ my_strnxfrm_uca(const CHARSET_INFO *cs,
uchar *de= dst + dstlen;
int s_res;
my_uca_scanner scanner;
- scanner_handler->init(&scanner, cs, src, srclen);
+ scanner_handler->init(&scanner, cs, &cs->uca->level[0], src, srclen);
for (; dst < de && nweights &&
(s_res= scanner_handler->next(&scanner)) > 0 ; nweights--)
@@ -20318,8 +20331,8 @@ my_strnxfrm_uca(const CHARSET_INFO *cs,
static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
size_t length1, length2;
- uint16 *weight1= my_char_weight_addr(cs->uca, wc1);
- uint16 *weight2= my_char_weight_addr(cs->uca, wc2);
+ uint16 *weight1= my_char_weight_addr(&cs->uca->level[0], wc1);// W3-TODO
+ uint16 *weight2= my_char_weight_addr(&cs->uca->level[0], wc2);
/* Check if some of the characters does not have implicit weights */
if (!weight1 || !weight2)
@@ -20330,8 +20343,8 @@ static int my_uca_charcmp(const CHARSET_
return 1;
/* Thoroughly compare all weights */
- length1= cs->uca->lengths[wc1 >> MY_UCA_PSHIFT];
- length2= cs->uca->lengths[wc2 >> MY_UCA_PSHIFT];
+ length1= cs->uca->level[0].lengths[wc1 >> MY_UCA_PSHIFT];//W3-TODO
+ length2= cs->uca->level[0].lengths[wc2 >> MY_UCA_PSHIFT];
if (length1 > length2)
return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -20344,8 +20357,7 @@ static int my_uca_charcmp(const CHARSET_
return memcmp((const void*)weight1, (const void*)weight2, length1*2);
}
-/*
-** Compare string against string with wildcard
+/*** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
@@ -21676,7 +21688,7 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
*/
static size_t
-my_char_weight_put(MY_UCA_INFO *dst_uca,
+my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
uint16 *to, size_t to_length,
my_wc_t *str, size_t len)
{
@@ -21692,7 +21704,7 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
for (chlen= len; chlen > 1; chlen--)
{
- if ((from= my_uca_contraction_weight(dst_uca, str, chlen)))
+ if ((from= my_uca_contraction_weight(&dst->contractions, str, chlen)))
{
str+= chlen;
len-= chlen;
@@ -21702,7 +21714,7 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
if (!from)
{
- from= my_char_weight_addr(dst_uca, *str);
+ from= my_char_weight_addr(dst, *str);
str++;
len--;
}
@@ -21732,140 +21744,201 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
*/
static my_bool
my_uca_copy_page(MY_CHARSET_LOADER *loader,
- MY_UCA_INFO *src_uca,
- MY_UCA_INFO *dst_uca,
+ const MY_UCA_WEIGHT_LEVEL *src,
+ MY_UCA_WEIGHT_LEVEL *dst,
size_t page)
{
- uint chc, size= 256 * dst_uca->lengths[page] * sizeof(uint16);
- if (!(dst_uca->weights[page]= (uint16 *) (loader->once_alloc)(size)))
+ uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
+ if (!(dst->weights[page]= (uint16 *) (loader->once_alloc)(size)))
return TRUE;
- DBUG_ASSERT(src_uca->lengths[page] <= dst_uca->lengths[page]);
- memset(dst_uca->weights[page], 0, size);
+ DBUG_ASSERT(src->lengths[page] <= dst->lengths[page]);
+ memset(dst->weights[page], 0, size);
for (chc=0 ; chc < 256; chc++)
{
- memcpy(dst_uca->weights[page] + chc * dst_uca->lengths[page],
- src_uca->weights[page] + chc * src_uca->lengths[page],
- src_uca->lengths[page] * sizeof(uint16));
+ memcpy(dst->weights[page] + chc * dst->lengths[page],
+ src->weights[page] + chc * src->lengths[page],
+ src->lengths[page] * sizeof(uint16));
}
return FALSE;
}
-/*
- This function copies an UCS2 collation from
- the default Unicode Collation Algorithm (UCA)
- weights applying tailorings, i.e. a set of
- alternative weights for some characters.
-
- The default UCA weights are stored in uca_weight/uca_length.
- They consist of 256 pages, 256 character each.
-
- If a page is not overwritten by tailoring rules,
- it is copies as is from UCA as is.
-
- If a page contains some overwritten characters, it is
- allocated. Untouched characters are copied from the
- default weights.
-*/
-
static my_bool
-create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
+apply_shift(MY_CHARSET_LOADER *loader,
+ MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+ uint16 *to, size_t nweights)
{
- MY_COLL_RULES rules;
- MY_COLL_RULE *r, *rlast;
- MY_UCA_INFO new_uca, *src_uca= NULL;
- int rc= 0, ncontractions= 0;
- size_t npages, i;
-
- *loader->error= '\0';
-
- if (!cs->tailoring)
- return 0; /* Ok to add a collation without tailoring */
-
- memset(&rules, 0, sizeof(rules));
- rules.loader= loader;
- rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
- memset(&new_uca, 0, sizeof(new_uca));
-
- /* Parse ICU Collation Customization expression */
- if ((rc= my_coll_rule_parse(&rules,
- cs->tailoring,
- cs->tailoring + strlen(cs->tailoring))))
- goto ex;
-
- rlast= rules.rule + rules.nrules;
-
- if (rules.version == 520) /* Unicode-5.2.0 requested */
- {
- src_uca= &my_uca_v520;
- cs->caseinfo= &my_unicase_unicode520;
- }
- else if (rules.version == 400) /* Unicode-4.0.0 requested */
+ /* Apply level difference. */
+ if (nweights)
{
- src_uca= &my_uca_v400;
- cs->caseinfo= &my_unicase_default;
+ to[nweights - 1]+= r->diff[level];
+ if (r->before_level == 1) /* Apply "&[before primary]" */
+ {
+ if (nweights >= 2)
+ {
+ to[nweights - 2]--; /* Reset before */
+ if (rules->shift_after_method == my_shift_method_expand)
+ {
+ /*
+ Special case. Don't let characters shifted after X
+ and before next(X) intermix to each other.
+
+ For example:
+ "[shift-after-method expand] &0 < a &[before primary]1 < A".
+ I.e. we reorder 'a' after '0', and then 'A' before '1'.
+ 'a' must be sorted before 'A'.
+
+ Note, there are no real collations in CLDR which shift
+ after and before two neighbourgh characters. We need this
+ just in case. Reserving 4096 (0x1000) weights for such
+ cases is perfectly enough.
+ */
+ to[nweights - 1]+= 0x1000; //W3-TODO: const may vary on levels 2,3
+ }
+ }
+ else
+ {
+ my_snprintf(loader->error, sizeof(loader->error),
+ "Can't reset before "
+ "a primary ignorable character U+%04lX", r->base[0]);
+ return TRUE;
+ }
+ }
}
- else /* No Unicode version specified */
+ else
{
- src_uca= cs->uca ? cs->uca : &my_uca_v400;
- if (!cs->caseinfo)
- cs->caseinfo= &my_unicase_default;
+ /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */
+ DBUG_ASSERT(to[0] == 0);
+ to[0]= r->diff[level];
}
+ return FALSE;
+}
- new_uca.maxchar= src_uca->maxchar;
- npages= (src_uca->maxchar + 1) / 256;
- /* Allocate memory for pages and their lengths */
- if (!(new_uca.lengths= (uchar *) (loader->once_alloc)(npages)) ||
- !(new_uca.weights= (uint16 **) (loader->once_alloc)(npages *
- sizeof(uint16 *))))
+static my_bool
+apply_one_rule(MY_CHARSET_LOADER *loader,
+ MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+ MY_UCA_WEIGHT_LEVEL *dst)
+{
+ size_t nweights;
+ size_t nreset= my_coll_rule_reset_length(r); /* Length of reset sequence */
+ size_t nshift= my_coll_rule_shift_length(r); /* Length of shift sequence */
+ uint16 *to;
+
+ if (nshift >= 2) /* Contraction */
+ {
+ size_t i;
+ int flag;
+ MY_CONTRACTIONS *contractions= &dst->contractions;
+ /* Add HEAD, MID and TAIL flags for the contraction parts */
+ my_uca_add_contraction_flag(contractions, r->curr[0],
+ r->with_context ?
+ MY_UCA_PREVIOUS_CONTEXT_HEAD :
+ MY_UCA_CNT_HEAD);
+ for (i= 1, flag= MY_UCA_CNT_MID1; i < nshift - 1; i++, flag<<= 1)
+ my_uca_add_contraction_flag(contractions, r->curr[i], flag);
+ my_uca_add_contraction_flag(contractions, r->curr[i],
+ r->with_context ?
+ MY_UCA_PREVIOUS_CONTEXT_TAIL :
+ MY_UCA_CNT_TAIL);
+ /* Add new contraction to the contraction list */
+ to= my_uca_add_contraction(contractions, r->curr, nshift,
+ r->with_context)->weight;
+ /* Store weights of the "reset to" character */
+ dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
+ nweights= my_char_weight_put(dst, to, MY_UCA_MAX_WEIGHT_SIZE,
+ r->base, nreset);
+ dst->contractions.nitems++; /* Activate, now it's complete */
+ }
+ else
{
- rc= 1;
- goto ex;
+ my_wc_t pagec= (r->curr[0] >> 8);
+ DBUG_ASSERT(dst->weights[pagec]);
+ to= my_char_weight_addr(dst, r->curr[0]);
+ /* Store weights of the "reset to" character */
+ nweights= my_char_weight_put(dst, to, dst->lengths[pagec], r->base, nreset);
}
- /* Copy pages lengths and page pointers from the default UCA weights */
- memcpy(new_uca.lengths, src_uca->lengths, npages);
- memcpy(new_uca.weights, src_uca->weights, npages * sizeof(uint16 *));
+ /* Apply level difference. */
+ return apply_shift(loader, rules, r, level, to, nweights);
+}
- /*
- Calculate maximum lenghts for the pages which will be overwritten.
- Mark pages that will be otherwriten as NULL.
- We'll allocate their own memory.
- */
- for (r= rules.rule; r < rlast; r++)
+
+/**
+ Check if collation rules are valid,
+ i.e. characters are not outside of the collation suported range.
+*/
+static int
+check_rules(MY_CHARSET_LOADER *loader,
+ const MY_COLL_RULES *rules,
+ const MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+ const MY_COLL_RULE *r, *rlast;
+ for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
{
- if (r->curr[0] > new_uca.maxchar)
+ if (r->curr[0] > dst->maxchar)
{
my_snprintf(loader->error, sizeof(loader->error),
"Shift character out of range: u%04X", (uint) r->curr[0]);
- rc= 1;
- goto ex;
+ return TRUE;
}
- else if (r->base[0] > src_uca->maxchar)
+ else if (r->base[0] > src->maxchar)
{
my_snprintf(loader->error, sizeof(loader->error),
"Reset character out of range: u%04X", (uint) r->base[0]);
- rc= 1;
- goto ex;
+ return TRUE;
}
+ }
+ return FALSE;
+}
+
+
+static my_bool
+init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, int level,
+ MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+ MY_COLL_RULE *r, *rlast;
+ int ncontractions= 0;
+ size_t i, npages= (src->maxchar + 1) / 256;
+
+ dst->maxchar= src->maxchar;
+
+ if (check_rules(loader, rules, dst, src))
+ return TRUE;
+
+ /* Allocate memory for pages and their lengths */
+ if (!(dst->lengths= (uchar *) (loader->once_alloc)(npages)) ||
+ !(dst->weights= (uint16 **) (loader->once_alloc)(npages *
+ sizeof(uint16 *))))
+ return TRUE;
+ /* Copy pages lengths and page pointers from the default UCA weights */
+ memcpy(dst->lengths, src->lengths, npages);
+ memcpy(dst->weights, src->weights, npages * sizeof(uint16 *));
+
+ /*
+ Calculate maximum lenghts for the pages which will be overwritten.
+ Mark pages that will be otherwriten as NULL.
+ We'll allocate their own memory.
+ */
+ for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
+ {
if (!r->curr[1]) /* If not a contraction */
{
uint pagec= (r->curr[0] >> 8);
if (r->base[1]) /* Expansion */
{
/* Reserve space for maximum possible length */
- new_uca.lengths[pagec]= MY_UCA_MAX_WEIGHT_SIZE;
+ dst->lengths[pagec]= MY_UCA_MAX_WEIGHT_SIZE;
}
else
{
uint pageb= (r->base[0] >> 8);
- if (new_uca.lengths[pagec] < src_uca->lengths[pageb])
- new_uca.lengths[pagec]= src_uca->lengths[pageb];
+ if (dst->lengths[pagec] < src->lengths[pageb])
+ dst->lengths[pagec]= src->lengths[pageb];
}
- new_uca.weights[pagec]= NULL; /* Mark that we'll overwrite this page */
+ dst->weights[pagec]= NULL; /* Mark that we'll overwrite this page */
}
else
ncontractions++;
@@ -21874,23 +21947,20 @@ create_tailoring(CHARSET_INFO *cs, MY_CH
/* Allocate pages that we'll overwrite and copy default weights */
for (i= 0; i < npages; i++)
{
+ my_bool rc;
/*
Don't touch pages with lengths[i]==0, they have implicit weights
calculated algorithmically.
*/
- if (!new_uca.weights[i] && new_uca.lengths[i] &&
- (rc= my_uca_copy_page(loader, src_uca, &new_uca, i)))
- goto ex;
+ if (!dst->weights[i] && dst->lengths[i] &&
+ (rc= my_uca_copy_page(loader, src, dst, i)))
+ return rc;
}
-
if (ncontractions)
{
- if (my_uca_alloc_contractions(&new_uca, loader, ncontractions))
- {
- rc= 1;
- goto ex;
- }
+ if (my_uca_alloc_contractions(&dst->contractions, loader, ncontractions))
+ return TRUE;
}
/*
@@ -21901,93 +21971,75 @@ create_tailoring(CHARSET_INFO *cs, MY_CH
Now iterate through the rules, overwrite weights for the characters
that appear in the rules, and put all contractions into contraction list.
*/
- for (r= rules.rule; r < rlast; r++)
+ for (r= rules->rule; r < rlast; r++)
{
- size_t nweights;
- size_t nreset= my_coll_rule_reset_length(r); /* Length of reset sequence */
- size_t nshift= my_coll_rule_shift_length(r); /* Length of shift sequence */
- uint16 *to;
+ if (apply_one_rule(loader, rules, r, level, dst))
+ return TRUE;
+ }
+ return FALSE;
+}
- if (nshift >= 2) /* Contraction */
- {
- size_t i;
- int flag;
- /* Add HEAD, MID and TAIL flags for the contraction parts */
- my_uca_add_contraction_flag(&new_uca, r->curr[0],
- r->with_context ?
- MY_UCA_PREVIOUS_CONTEXT_HEAD :
- MY_UCA_CNT_HEAD);
- for (i= 1, flag= MY_UCA_CNT_MID1; i < nshift - 1; i++, flag<<= 1)
- my_uca_add_contraction_flag(&new_uca, r->curr[i], flag);
- my_uca_add_contraction_flag(&new_uca, r->curr[i],
- r->with_context ?
- MY_UCA_PREVIOUS_CONTEXT_TAIL :
- MY_UCA_CNT_TAIL);
- /* Add new contraction to the contraction list */
- to= my_uca_add_contraction(&new_uca, r->curr, nshift,
- r->with_context)->weight;
- /* Store weights of the "reset to" character */
- new_uca.contractions.nitems--; /* Temporarily hide - it's incomplete */
- nweights= my_char_weight_put(&new_uca, to, MY_UCA_MAX_WEIGHT_SIZE,
- r->base, nreset);
- new_uca.contractions.nitems++; /* Activate, now it's complete */
- }
- else
- {
- my_wc_t pagec= (r->curr[0] >> 8);
- DBUG_ASSERT(new_uca.weights[pagec]);
- to= my_char_weight_addr(&new_uca, r->curr[0]);
- /* Store weights of the "reset to" character */
- nweights= my_char_weight_put(&new_uca, to, new_uca.lengths[pagec],
- r->base, nreset);
- }
- /* Apply primary difference. */
- if (nweights)
- {
- to[nweights - 1]+= r->diff[0];
- if (r->before_level == 1) /* Apply "&[before primary]" */
- {
- if (nweights >= 2)
- {
- to[nweights - 2]--; /* Reset before */
- if (rules.shift_after_method == my_shift_method_expand)
- {
- /*
- Special case. Don't let characters shifted after X
- and before next(X) intermix to each other.
-
- For example:
- "[shift-after-method expand] &0 < a &[before primary]1 < A".
- I.e. we reorder 'a' after '0', and then 'A' before '1'.
- 'a' must be sorted before 'A'.
-
- Note, there are no real collations in CLDR which shift
- after and before two neighbourgh characters. We need this
- just in case. Reserving 4096 (0x1000) weights for such
- cases is perfectly enough.
- */
- to[nweights - 1]+= 0x1000;
- }
- }
- else
- {
- my_snprintf(loader->error, sizeof(loader->error),
- "Can't reset before "
- "a primary ignorable character U+%04lX", r->base[0]);
- rc= 1;
- goto ex;
- }
- }
- }
- else
- {
- /* Shift to a primary ignorable character, e.g.: & \u0000 < \u0001 */
- DBUG_ASSERT(to[0] == 0);
- to[0]= r->diff[0];
- }
+/*
+ This function copies an UCS2 collation from
+ the default Unicode Collation Algorithm (UCA)
+ weights applying tailorings, i.e. a set of
+ alternative weights for some characters.
+
+ The default UCA weights are stored in uca_weight/uca_length.
+ They consist of 256 pages, 256 character each.
+
+ If a page is not overwritten by tailoring rules,
+ it is copies as is from UCA as is.
+
+ If a page contains some overwritten characters, it is
+ allocated. Untouched characters are copied from the
+ default weights.
+*/
+
+static my_bool
+create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
+{
+ MY_COLL_RULES rules;
+ MY_UCA_INFO new_uca, *src_uca= NULL;
+ int rc= 0;
+
+ *loader->error= '\0';
+
+ if (!cs->tailoring)
+ return 0; /* Ok to add a collation without tailoring */
+
+ memset(&rules, 0, sizeof(rules));
+ rules.loader= loader;
+ rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
+ memset(&new_uca, 0, sizeof(new_uca));
+
+ /* Parse ICU Collation Customization expression */
+ if ((rc= my_coll_rule_parse(&rules,
+ cs->tailoring,
+ cs->tailoring + strlen(cs->tailoring))))
+ goto ex;
+
+ if (rules.version == 520) /* Unicode-5.2.0 requested */
+ {
+ src_uca= &my_uca_v520;
+ cs->caseinfo= &my_unicase_unicode520;
+ }
+ else if (rules.version == 400) /* Unicode-4.0.0 requested */
+ {
+ src_uca= &my_uca_v400;
+ cs->caseinfo= &my_unicase_default;
+ }
+ else /* No Unicode version specified */
+ {
+ src_uca= cs->uca ? cs->uca : &my_uca_v400;
+ if (!cs->caseinfo)
+ cs->caseinfo= &my_unicase_default;
}
+ if ((rc= init_weight_level(loader, &rules, 0,
+ &new_uca.level[0], &src_uca->level[0])))
+ goto ex;
if (!(cs->uca= (MY_UCA_INFO *) (loader->once_alloc)(sizeof(MY_UCA_INFO))))
{
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-trunk branch (alexander.barkov:3946 to 3947) | Alexander Barkov | 5 Jun |