List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:June 5 2012 4:29am
Subject:bzr push into mysql-trunk branch (alexander.barkov:3946 to 3947)
View as plain text  
 3947 Alexander Barkov	2012-06-05
      Preparatory refactoring for multi-level collations:
      - Introducing MY_UCA_WEIGHT_LEVEL, moving one level related data
        from MY_UCA_INFO to MY_UCA_WEIGHT_LEVEL
      - Splitting big functions into smaller ones

    modified:
      include/m_ctype.h
      strings/ctype-mb.c
      strings/ctype-uca.c
 3946 Nuno Carvalho	2012-06-04
      WL#5223: Binary Log Group Commit
      
      MYSQL_BIN_LOG::flush_and_sync(bool force) was ignoring "force" argument
      and always flushing binlog to disk.
      Fix flush_and_sync(bool force) implementation by taking "force" into
      account.

    modified:
      sql/binlog.cc
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h	2012-03-06 14:29:42 +0000
+++ b/include/m_ctype.h	2012-06-05 04:28:21 +0000
@@ -78,6 +78,7 @@ extern MY_UNICASE_INFO my_unicase_unicod
 
 #define MY_UCA_MAX_CONTRACTION 6
 #define MY_UCA_MAX_WEIGHT_SIZE 8
+#define MY_UCA_WEIGHT_LEVELS   1
 
 typedef struct my_contraction_t
 {
@@ -96,13 +97,25 @@ typedef struct my_contraction_list_t
 } MY_CONTRACTIONS;
 
 
+my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
+my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
+uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
+                                   my_wc_t wc1, my_wc_t wc2);
 
-typedef struct uca_info_st
+
+/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
+typedef struct my_uca_level_info_st
 {
   my_wc_t maxchar;
   uchar   *lengths;
   uint16  **weights;
   MY_CONTRACTIONS contractions;
+} MY_UCA_WEIGHT_LEVEL;
+
+
+typedef struct uca_info_st
+{
+  MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
 
   /* Logical positions */
   my_wc_t first_non_ignorable;
@@ -122,12 +135,6 @@ typedef struct uca_info_st
 
 
 
-my_bool my_uca_have_contractions(MY_UCA_INFO *uca);
-my_bool my_uca_can_be_contraction_head(MY_UCA_INFO *uca, my_wc_t wc);
-my_bool my_uca_can_be_contraction_tail(MY_UCA_INFO *uca, my_wc_t wc);
-uint16 *my_uca_contraction2_weight(MY_UCA_INFO *uca, my_wc_t wc1, my_wc_t wc2);
-
-
 extern MY_UCA_INFO my_uca_v400;
 
 
@@ -702,6 +709,9 @@ size_t my_strxfrm_pad_desc_and_reverse(c
 
 my_bool my_charset_is_ascii_compatible(const CHARSET_INFO *cs);
 
+const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs,
+                                                   int level);
+
 extern size_t my_vsnprintf_ex(const CHARSET_INFO *cs, char *to, size_t n,
                               const char* fmt, va_list ap);
 

=== modified file 'strings/ctype-mb.c'
--- a/strings/ctype-mb.c	2011-12-09 21:08:37 +0000
+++ b/strings/ctype-mb.c	2012-06-05 04:28:21 +0000
@@ -774,7 +774,7 @@ my_bool my_like_range_mb(const CHARSET_I
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   size_t maxcharlen= res_length / cs->mbmaxlen;
-  my_bool have_contractions= my_uca_have_contractions(cs->uca);
+  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
 
   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
   {
@@ -842,8 +842,8 @@ fill_max_and_min:
         'ab\min\min\min\min' and 'ab\max\max\max\max'.
 
       */
-      if (have_contractions && ptr + 1 < end &&
-          my_uca_can_be_contraction_head(cs->uca, (uchar) *ptr))
+      if (contractions && ptr + 1 < end &&
+          my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
       {
         /* Ptr[0] is a contraction head. */
         
@@ -865,9 +865,8 @@ fill_max_and_min:
           is not a contraction, then we put only ptr[0],
           and continue with ptr[1] on the next loop.
         */
-        if (my_uca_can_be_contraction_tail(cs->uca, (uchar) ptr[1]) &&
-            my_uca_contraction2_weight(cs->uca,
-                                       (uchar) ptr[0], ptr[1]))
+        if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
+            my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
         {
           /* Contraction found */
           if (maxcharlen == 1 || min_str + 1 >= min_end)
@@ -932,7 +931,7 @@ my_like_range_generic(const CHARSET_INFO
   char *max_end= max_str + res_length;
   size_t charlen= res_length / cs->mbmaxlen;
   size_t res_length_diff;
-  my_bool have_contractions= cs->uca ? my_uca_have_contractions(cs->uca) : 0;
+  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
 
   for ( ; charlen > 0; charlen--)
   {
@@ -1000,8 +999,8 @@ my_like_range_generic(const CHARSET_INFO
       goto pad_min_max;
     }
 
-    if (have_contractions &&
-        my_uca_can_be_contraction_head(cs->uca, wc) &&
+    if (contractions &&
+        my_uca_can_be_contraction_head(contractions, wc) &&
         (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
     {
       uint16 *weight;
@@ -1012,8 +1011,8 @@ my_like_range_generic(const CHARSET_INFO
         goto pad_min_max;
       }
 
-      if (my_uca_can_be_contraction_tail(cs->uca, wc2) &&
-          (weight= my_uca_contraction2_weight(cs->uca, wc, wc2)) && weight[0])
+      if (my_uca_can_be_contraction_tail(contractions, wc2) &&
+          (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
       {
         /* Contraction found */
         if (charlen == 1)

=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c	2012-03-29 13:11:42 +0000
+++ b/strings/ctype-uca.c	2012-06-05 04:28:21 +0000
@@ -6523,13 +6523,17 @@ page0FCdata,page0FDdata,page0FEdata,page
 
 MY_UCA_INFO my_uca_v400=
 {
-  0xFFFF,    /* maxchar           */
-  uca_length,
-  uca_weight,
-  {          /* Contractions:     */
-    0,       /*   nitems          */
-    NULL,    /*   item            */
-    NULL     /*   flags           */
+  {
+    {
+      0xFFFF,    /* maxchar           */
+      uca_length,
+      uca_weight,
+      {          /* Contractions:     */
+        0,       /*   nitems          */
+        NULL,    /*   item            */
+        NULL     /*   flags           */
+      }
+    },
   },
 
   /* Logical positions */
@@ -19095,13 +19099,17 @@ NULL       ,NULL       ,NULL       ,NULL
 
 MY_UCA_INFO my_uca_v520=
 {
-  0x10FFFF,      /* maxchar           */
-  uca520_length,
-  uca520_weight,
-  {              /* Contractions:     */
-    0,           /*   nitems          */
-    NULL,        /*   item            */
-    NULL         /*   flags           */
+  {
+    {
+      0x10FFFF,      /* maxchar           */
+      uca520_length,
+      uca520_weight,
+      {              /* Contractions:     */
+        0,           /*   nitems          */
+        NULL,        /*   item            */
+        NULL         /*   flags           */
+      }
+    },
   },
 
   0x0009,    /* first_non_ignorable       p != ignore                       */
@@ -19453,7 +19461,7 @@ typedef struct my_uca_scanner_st
   const uint16 *wbeg;	/* Beginning of the current weight string */
   const uchar  *sbeg;	/* Beginning of the input string          */
   const uchar  *send;	/* End of the input string                */
-  MY_UCA_INFO  *uca;
+  const MY_UCA_WEIGHT_LEVEL *level;
   uint16 implicit[2];
   int page;
   int code;
@@ -19467,6 +19475,7 @@ typedef struct my_uca_scanner_st
 typedef struct my_uca_scanner_handler_st 
 {
   void (*init)(my_uca_scanner *scanner, const CHARSET_INFO *cs, 
+               const MY_UCA_WEIGHT_LEVEL *level,
                const uchar *str, size_t length);
   int (*next)(my_uca_scanner *scanner);
 } my_uca_scanner_handler;
@@ -19499,9 +19508,9 @@ static uint16 nochar[]= {0,0};
 */
 
 static inline void
-my_uca_add_contraction_flag(MY_UCA_INFO *uca, my_wc_t wc, int flag)
+my_uca_add_contraction_flag(MY_CONTRACTIONS *list, my_wc_t wc, int flag)
 {
-  uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
+  list->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
 }
 
 
@@ -19517,10 +19526,9 @@ my_uca_add_contraction_flag(MY_UCA_INFO
 */
 
 static MY_CONTRACTION *
-my_uca_add_contraction(MY_UCA_INFO *uca, my_wc_t *wc, size_t len,
+my_uca_add_contraction(MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
                        my_bool with_context)
 {
-  MY_CONTRACTIONS *list= &uca->contractions;
   MY_CONTRACTION *next= &list->item[list->nitems];
   size_t i;
   /*
@@ -19559,39 +19567,41 @@ my_uca_add_contraction(MY_UCA_INFO *uca,
 */
 
 static my_bool
-my_uca_alloc_contractions(MY_UCA_INFO *uca,
+my_uca_alloc_contractions(MY_CONTRACTIONS *contractions,
                           MY_CHARSET_LOADER *loader, size_t n)
 {
   uint size= n * sizeof(MY_CONTRACTION);
-  if (!(uca->contractions.item= (loader->once_alloc)(size)) ||
-      !(uca->contractions.flags= (char *) (loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
+  if (!(contractions->item= (loader->once_alloc)(size)) ||
+      !(contractions->flags= (char *) (loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
     return 1;
-  memset(uca->contractions.item, 0, size);
-  memset(uca->contractions.flags, 0, MY_UCA_CNT_FLAG_SIZE);
+  memset(contractions->item, 0, size);
+  memset(contractions->flags, 0, MY_UCA_CNT_FLAG_SIZE);
   return 0;
 }
 
 
 /**
-  Check if UCA data has contractions (public version)
+  Return UCA contraction data for a CHARSET_INFO structure.
 
-  @param uca      Pointer to UCA data
-  @retval   0 - no contraction, 1 - have contractions.
+  @param cs       Pointer to CHARSET_INFO structure
+  @retval         Pointer to contraction data
+  @retval         NULL, if this collation does not have UCA contraction
 */
 
-my_bool
-my_uca_have_contractions(MY_UCA_INFO *uca)
+const MY_CONTRACTIONS *
+my_charset_get_contractions(const CHARSET_INFO *cs, int level)
 {
-  return (uca != NULL) && (uca->contractions.nitems > 0);
+  return (cs->uca != NULL) && (cs->uca->level[level].contractions.nitems > 0) ?
+          &cs->uca->level[level].contractions : NULL;
 }
 
 
 /**
-  Check if UCA data has contractions (static version)
+  Check if UCA level data has contractions (static version)
   Static quick version of my_uca_have_contractions(),
   optimized for performance purposes, also marked as "inline".
   
-  @param uca      Pointer to UCA data
+  @param level    Pointer to UCA level data
   
   @return   Flags indicating if UCA with contractions
   @retval   0 - no contractions
@@ -19599,9 +19609,9 @@ my_uca_have_contractions(MY_UCA_INFO *uc
 */
 
 static inline my_bool
-my_uca_have_contractions_quick(MY_UCA_INFO *uca)
+my_uca_have_contractions_quick(const MY_UCA_WEIGHT_LEVEL *level)
 {
-  return (uca->contractions.nitems > 0);
+  return (level->contractions.nitems > 0);
 }
 
 
@@ -19609,7 +19619,7 @@ my_uca_have_contractions_quick(MY_UCA_IN
 /**
   Check if a character can be contraction head
   
-  @param uca      Pointer to UCA data
+  @param c        Pointer to UCA contraction data
   @param wc       Code point
   
   @retval   0 - cannot be contraction head
@@ -19617,16 +19627,16 @@ my_uca_have_contractions_quick(MY_UCA_IN
 */
 
 my_bool
-my_uca_can_be_contraction_head(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc)
 {
-  return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
+  return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
 }
 
 
 /**
   Check if a character can be contraction tail
   
-  @param uca      Pointer to UCA data
+  @param c        Pointer to UCA contraction data
   @param wc       Code point
   
   @retval   0 - cannot be contraction tail
@@ -19634,33 +19644,33 @@ my_uca_can_be_contraction_head(MY_UCA_IN
 */
 
 my_bool
-my_uca_can_be_contraction_tail(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc)
 {
-  return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
+  return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
 }
 
 
 /**
   Check if a character can be contraction part
 
-  @param uca      Pointer to UCA data
+  @param c        Pointer to UCA contraction data
   @param wc       Code point
 
   @retval   0 - cannot be contraction part
   @retval   1 - can be contraction part
 */
 
-my_bool
-my_uca_can_be_contraction_part(MY_UCA_INFO *uca, my_wc_t wc, int flag)
+static inline my_bool
+my_uca_can_be_contraction_part(const MY_CONTRACTIONS *c, my_wc_t wc, int flag)
 {
-  return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] & flag;
+  return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & flag;
 }
 
 
 /**
-  Find a contraction and return its weight array
+  Find a contraction consisting of two characters and return its weight array
 
-  @param uca      Pointer to UCA data
+  @param list     Pointer to UCA contraction data
   @param wc1      First character
   @param wc2      Second character
 
@@ -19670,9 +19680,8 @@ my_uca_can_be_contraction_part(MY_UCA_IN
 */
 
 uint16 *
-my_uca_contraction2_weight(MY_UCA_INFO *uca, my_wc_t wc1, my_wc_t wc2)
+my_uca_contraction2_weight(const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
 {
-  MY_CONTRACTIONS *list= &uca->contractions;
   MY_CONTRACTION *c, *last;
   for (c= list->item, last= c + list->nitems; c < last; c++)
   {
@@ -19688,7 +19697,7 @@ my_uca_contraction2_weight(MY_UCA_INFO *
 /**
   Check if a character can be previous context head
 
-  @param uca      Pointer to UCA data
+  @param list     Pointer to UCA contraction data
   @param wc       Code point
 
   @return
@@ -19697,17 +19706,16 @@ my_uca_contraction2_weight(MY_UCA_INFO *
 */
 
 my_bool
-my_uca_can_be_previous_context_head(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_previous_context_head(const MY_CONTRACTIONS *list, my_wc_t wc)
 {
-  return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
-         MY_UCA_PREVIOUS_CONTEXT_HEAD;
+  return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
 }
 
 
 /**
   Check if a character can be previois context tail
 
-  @param uca      Pointer to UCA data
+  @param uca      Pointer to UCA contraction data
   @param wc       Code point
 
   @return
@@ -19716,10 +19724,9 @@ my_uca_can_be_previous_context_head(MY_U
 */
 
 my_bool
-my_uca_can_be_previous_context_tail(MY_UCA_INFO *uca, my_wc_t wc)
+my_uca_can_be_previous_context_tail(const MY_CONTRACTIONS *list, my_wc_t wc)
 {
-  return uca->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
-         MY_UCA_PREVIOUS_CONTEXT_TAIL;
+  return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
 }
 
 
@@ -19746,7 +19753,7 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_
   Check if a string is a contraction,
   and return its weight array on success.
 
-  @param uca    Pointer to UCA data
+  @param list   Pointer to UCA contraction data
   @param wc     Pointer to wide string
   @param len    String length
 
@@ -19755,10 +19762,9 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_
   @retval       ptr  - contraction weight array
 */
 
-uint16 *
-my_uca_contraction_weight(MY_UCA_INFO *uca, my_wc_t *wc, size_t len)
+static inline uint16 *
+my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
 {
-  MY_CONTRACTIONS *list= &uca->contractions;
   MY_CONTRACTION *c, *last;
   for (c= list->item, last= c + list->nitems; c < last; c++)
   {
@@ -19803,7 +19809,8 @@ my_uca_scanner_contraction_find(my_uca_s
                                          s, scanner->send)) <= 0)
       break;
     beg[clen]= s= s + mblen;
-    if (!my_uca_can_be_contraction_part(scanner->uca, wc[clen++], flag))
+    if (!my_uca_can_be_contraction_part(&scanner->level->contractions,
+                                        wc[clen++], flag))
       break;
   }
 
@@ -19811,8 +19818,10 @@ my_uca_scanner_contraction_find(my_uca_s
   for ( ; clen > 1; clen--)
   {
     uint16 *cweight;
-    if (my_uca_can_be_contraction_tail(scanner->uca, wc[clen - 1]) &&
-        (cweight= my_uca_contraction_weight(scanner->uca, wc, clen)))
+    if (my_uca_can_be_contraction_tail(&scanner->level->contractions,
+                                       wc[clen - 1]) &&
+        (cweight= my_uca_contraction_weight(&scanner->level->contractions,
+                                            wc, clen)))
     {
       scanner->wbeg= cweight + 1;
       scanner->sbeg= beg[clen - 1];
@@ -19841,7 +19850,7 @@ uint16 *
 my_uca_previous_context_find(my_uca_scanner *scanner,
                              my_wc_t wc0, my_wc_t wc1)
 {
-  MY_CONTRACTIONS *list= &scanner->uca->contractions;
+  const MY_CONTRACTIONS *list= &scanner->level->contractions;
   MY_CONTRACTION *c, *last;
   for (c= list->item, last= c + list->nitems; c < last; c++)
   {
@@ -19891,14 +19900,16 @@ my_uca_scanner_next_implicit(my_uca_scan
   The same two functions for any character set
 */
 static void
-my_uca_scanner_init_any(my_uca_scanner *scanner, const CHARSET_INFO *cs,
+my_uca_scanner_init_any(my_uca_scanner *scanner,
+                        const CHARSET_INFO *cs,
+                        const MY_UCA_WEIGHT_LEVEL *level,
                         const uchar *str, size_t length)
 {
   /* Note, no needs to initialize scanner->wbeg */
   scanner->sbeg= str;
   scanner->send= str + length;
   scanner->wbeg= nochar; 
-  scanner->uca= cs->uca;
+  scanner->level= level;
   scanner->cs= cs;
 }
 
@@ -19926,14 +19937,14 @@ static int my_uca_scanner_next_any(my_uc
       return -1;
 
     scanner->sbeg+= mblen;
-    if (wc[0] > scanner->uca->maxchar)
+    if (wc[0] > scanner->level->maxchar)
     {
       /* Return 0xFFFD as weight for all characters outside BMP */
       scanner->wbeg= nochar;
       return 0xFFFD;
     }
 
-    if (my_uca_have_contractions_quick(scanner->uca))
+    if (my_uca_have_contractions_quick(scanner->level))
     {
       uint16 *cweight;
       /*
@@ -19945,9 +19956,10 @@ static int my_uca_scanner_next_any(my_uc
         Note, we support only 2-character long sequences with previous
         context at the moment. CLDR does not have longer sequences.
       */
-      if (my_uca_can_be_previous_context_tail(scanner->uca, wc[0]) &&
+      if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+                                              wc[0]) &&
           scanner->wbeg != nochar &&     /* if not the very first character */
-          my_uca_can_be_previous_context_head(scanner->uca,
+          my_uca_can_be_previous_context_head(&scanner->level->contractions,
                                               (wc[1]= ((scanner->page << 8) +
                                                         scanner->code))) &&
           (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
@@ -19955,7 +19967,8 @@ static int my_uca_scanner_next_any(my_uc
         scanner->page= scanner->code= 0; /* Clear for the next character */
         return *cweight;
       }
-      else if (my_uca_can_be_contraction_head(scanner->uca, wc[0]))
+      else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+                                              wc[0]))
       {
         /* Check if w[0] starts a contraction */
         if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
@@ -19968,12 +19981,12 @@ static int my_uca_scanner_next_any(my_uc
     scanner->code= wc[0] & 0xFF;
 
     /* If weight page for w[0] does not exist, then calculate algoritmically */
-    if (!(wpage= scanner->uca->weights[scanner->page]))
+    if (!(wpage= scanner->level->weights[scanner->page]))
       return my_uca_scanner_next_implicit(scanner);
 
     /* Calculate pointer to w[0]'s weight, using page and offset */
     scanner->wbeg= wpage +
-                   scanner->code * scanner->uca->lengths[scanner->page];
+                   scanner->code * scanner->level->lengths[scanner->page];
   } while (!scanner->wbeg[0]); /* Skip ignorable characters */
 
   return *scanner->wbeg++;
@@ -20038,8 +20051,8 @@ static int my_strnncoll_uca(const CHARSE
   int s_res;
   int t_res;
   
-  scanner_handler->init(&sscanner, cs, s, slen);
-  scanner_handler->init(&tscanner, cs, t, tlen);
+  scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+  scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
   
   do
   {
@@ -20052,9 +20065,9 @@ static int my_strnncoll_uca(const CHARSE
 
 
 static inline int
-my_space_weight(const CHARSET_INFO *cs)
+my_space_weight(const CHARSET_INFO *cs)// W3-TODO
 {
-  return cs->uca->weights[0][0x20 * cs->uca->lengths[0]];
+  return cs->uca->level[0].weights[0][0x20 * cs->uca->level[0].lengths[0]];
 }
 
 
@@ -20072,12 +20085,12 @@ my_space_weight(const CHARSET_INFO *cs)
 */
 
 static inline uint16 *
-my_char_weight_addr(MY_UCA_INFO *uca, uint wc)
+my_char_weight_addr(MY_UCA_WEIGHT_LEVEL *level, uint wc)
 {
   uint page, ofst;
-  return wc > uca->maxchar ? NULL :
-         (uca->weights[page= (wc >> 8)] ?
-          uca->weights[page] + (ofst= (wc & 0xFF)) * uca->lengths[page] :
+  return wc > level->maxchar ? NULL :
+         (level->weights[page= (wc >> 8)] ?
+          level->weights[page] + (ofst= (wc & 0xFF)) * level->lengths[page] :
           NULL);
 }
 
@@ -20143,8 +20156,8 @@ static int my_strnncollsp_uca(const CHAR
   diff_if_only_endspace_difference= 0;
 #endif
 
-  scanner_handler->init(&sscanner, cs, s, slen);
-  scanner_handler->init(&tscanner, cs, t, tlen);
+  scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+  scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
   
   do
   {
@@ -20217,7 +20230,7 @@ static void my_hash_sort_uca(const CHARS
   my_uca_scanner scanner;
   
   slen= cs->cset->lengthsp(cs, (char*) s, slen);
-  scanner_handler->init(&scanner, cs, s, slen);
+  scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
   
   while ((s_res= scanner_handler->next(&scanner)) >0)
   {
@@ -20271,7 +20284,7 @@ my_strnxfrm_uca(const CHARSET_INFO *cs,
   uchar *de= dst + dstlen;
   int   s_res;
   my_uca_scanner scanner;
-  scanner_handler->init(&scanner, cs, src, srclen);
+  scanner_handler->init(&scanner, cs, &cs->uca->level[0], src, srclen);
   
   for (; dst < de && nweights &&
          (s_res= scanner_handler->next(&scanner)) > 0 ; nweights--)
@@ -20318,8 +20331,8 @@ my_strnxfrm_uca(const CHARSET_INFO *cs,
 static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
 {
   size_t length1, length2;
-  uint16 *weight1= my_char_weight_addr(cs->uca, wc1);
-  uint16 *weight2= my_char_weight_addr(cs->uca, wc2);
+  uint16 *weight1= my_char_weight_addr(&cs->uca->level[0], wc1);// W3-TODO
+  uint16 *weight2= my_char_weight_addr(&cs->uca->level[0], wc2);
   
   /* Check if some of the characters does not have implicit weights */
   if (!weight1 || !weight2)
@@ -20330,8 +20343,8 @@ static int my_uca_charcmp(const CHARSET_
     return 1;
   
   /* Thoroughly compare all weights */
-  length1= cs->uca->lengths[wc1 >> MY_UCA_PSHIFT];
-  length2= cs->uca->lengths[wc2 >> MY_UCA_PSHIFT];
+  length1= cs->uca->level[0].lengths[wc1 >> MY_UCA_PSHIFT];//W3-TODO
+  length2= cs->uca->level[0].lengths[wc2 >> MY_UCA_PSHIFT];
   
   if (length1 > length2)
     return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -20344,8 +20357,7 @@ static int my_uca_charcmp(const CHARSET_
   return memcmp((const void*)weight1, (const void*)weight2, length1*2);
 }
 
-/*
-** Compare string against string with wildcard
+/*** Compare string against string with wildcard
 **	0 if matched
 **	-1 if not matched with wildcard
 **	 1 if matched with wildcard
@@ -21676,7 +21688,7 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
 */
 
 static size_t
-my_char_weight_put(MY_UCA_INFO *dst_uca,
+my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
                    uint16 *to, size_t to_length,
                    my_wc_t *str, size_t len)
 {
@@ -21692,7 +21704,7 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
 
     for (chlen= len; chlen > 1; chlen--)
     {
-      if ((from= my_uca_contraction_weight(dst_uca, str, chlen)))
+      if ((from= my_uca_contraction_weight(&dst->contractions, str, chlen)))
       {
         str+= chlen;
         len-= chlen;
@@ -21702,7 +21714,7 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
 
     if (!from)
     {
-      from= my_char_weight_addr(dst_uca, *str);
+      from= my_char_weight_addr(dst, *str);
       str++;
       len--;
     }
@@ -21732,140 +21744,201 @@ my_char_weight_put(MY_UCA_INFO *dst_uca,
 */
 static my_bool
 my_uca_copy_page(MY_CHARSET_LOADER *loader,
-                 MY_UCA_INFO *src_uca,
-                 MY_UCA_INFO *dst_uca,
+                 const MY_UCA_WEIGHT_LEVEL *src,
+                 MY_UCA_WEIGHT_LEVEL *dst,
                  size_t page)
 {
-  uint chc, size= 256 * dst_uca->lengths[page] * sizeof(uint16);
-  if (!(dst_uca->weights[page]= (uint16 *) (loader->once_alloc)(size)))
+  uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
+  if (!(dst->weights[page]= (uint16 *) (loader->once_alloc)(size)))
     return TRUE;
 
-  DBUG_ASSERT(src_uca->lengths[page] <= dst_uca->lengths[page]);
-  memset(dst_uca->weights[page], 0, size);
+  DBUG_ASSERT(src->lengths[page] <= dst->lengths[page]);
+  memset(dst->weights[page], 0, size);
   for (chc=0 ; chc < 256; chc++)
   {
-    memcpy(dst_uca->weights[page] + chc * dst_uca->lengths[page],
-           src_uca->weights[page] + chc * src_uca->lengths[page],
-           src_uca->lengths[page] * sizeof(uint16));
+    memcpy(dst->weights[page] + chc * dst->lengths[page],
+           src->weights[page] + chc * src->lengths[page],
+           src->lengths[page] * sizeof(uint16));
   }
   return FALSE;
 }
 
 
-/*
-  This function copies an UCS2 collation from
-  the default Unicode Collation Algorithm (UCA)
-  weights applying tailorings, i.e. a set of
-  alternative weights for some characters. 
-  
-  The default UCA weights are stored in uca_weight/uca_length.
-  They consist of 256 pages, 256 character each.
-  
-  If a page is not overwritten by tailoring rules,
-  it is copies as is from UCA as is.
-  
-  If a page contains some overwritten characters, it is
-  allocated. Untouched characters are copied from the
-  default weights.
-*/
-
 static my_bool
-create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
+apply_shift(MY_CHARSET_LOADER *loader,
+            MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+            uint16 *to, size_t nweights)
 {
-  MY_COLL_RULES rules;
-  MY_COLL_RULE *r, *rlast;
-  MY_UCA_INFO new_uca, *src_uca= NULL;
-  int rc= 0, ncontractions= 0;
-  size_t npages, i;
-
-  *loader->error= '\0';
-
-  if (!cs->tailoring)
-    return 0; /* Ok to add a collation without tailoring */
-
-  memset(&rules, 0, sizeof(rules));
-  rules.loader= loader;
-  rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
-  memset(&new_uca, 0, sizeof(new_uca));
-
-  /* Parse ICU Collation Customization expression */
-  if ((rc= my_coll_rule_parse(&rules,
-                              cs->tailoring,
-                              cs->tailoring + strlen(cs->tailoring))))
-    goto ex;
-
-  rlast= rules.rule + rules.nrules;
-
-  if (rules.version == 520)           /* Unicode-5.2.0 requested */
-  {
-    src_uca= &my_uca_v520;
-    cs->caseinfo= &my_unicase_unicode520;
-  }
-  else if (rules.version == 400)      /* Unicode-4.0.0 requested */
+  /* Apply level difference. */
+  if (nweights)
   {
-    src_uca= &my_uca_v400;
-    cs->caseinfo= &my_unicase_default;
+    to[nweights - 1]+= r->diff[level];
+    if (r->before_level == 1) /* Apply "&[before primary]" */
+    {
+      if (nweights >= 2)
+      {
+        to[nweights - 2]--; /* Reset before */
+        if (rules->shift_after_method == my_shift_method_expand)
+        {
+          /*
+            Special case. Don't let characters shifted after X
+            and before next(X) intermix to each other.
+            
+            For example:
+            "[shift-after-method expand] &0 < a &[before primary]1 < A".
+            I.e. we reorder 'a' after '0', and then 'A' before '1'.
+            'a' must be sorted before 'A'.
+            
+            Note, there are no real collations in CLDR which shift
+            after and before two neighbourgh characters. We need this
+            just in case. Reserving 4096 (0x1000) weights for such
+            cases is perfectly enough.
+          */
+          to[nweights - 1]+= 0x1000; //W3-TODO: const may vary on levels 2,3
+        }
+      }
+      else
+      {
+        my_snprintf(loader->error, sizeof(loader->error),
+                    "Can't reset before "
+                    "a primary ignorable character U+%04lX", r->base[0]);
+        return TRUE;
+      }
+    }
   }
-  else                                /* No Unicode version specified */
+  else
   {
-    src_uca= cs->uca ? cs->uca : &my_uca_v400;
-    if (!cs->caseinfo)
-      cs->caseinfo= &my_unicase_default;
+    /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */
+    DBUG_ASSERT(to[0] == 0);
+    to[0]= r->diff[level];
   }
+  return FALSE; 
+}
 
-  new_uca.maxchar= src_uca->maxchar;
-  npages= (src_uca->maxchar + 1) / 256;
 
-  /* Allocate memory for pages and their lengths */
-  if (!(new_uca.lengths= (uchar *) (loader->once_alloc)(npages)) ||
-      !(new_uca.weights= (uint16 **) (loader->once_alloc)(npages *
-                                                          sizeof(uint16 *))))
+static my_bool
+apply_one_rule(MY_CHARSET_LOADER *loader,
+               MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+               MY_UCA_WEIGHT_LEVEL *dst)
+{
+  size_t nweights;
+  size_t nreset= my_coll_rule_reset_length(r); /* Length of reset sequence */
+  size_t nshift= my_coll_rule_shift_length(r); /* Length of shift sequence */
+  uint16 *to;
+
+  if (nshift >= 2) /* Contraction */
+  {
+    size_t i;
+    int flag;
+    MY_CONTRACTIONS *contractions= &dst->contractions;
+    /* Add HEAD, MID and TAIL flags for the contraction parts */
+    my_uca_add_contraction_flag(contractions, r->curr[0],
+                                r->with_context ?
+                                MY_UCA_PREVIOUS_CONTEXT_HEAD :
+                                MY_UCA_CNT_HEAD);
+    for (i= 1, flag= MY_UCA_CNT_MID1; i < nshift - 1; i++, flag<<= 1)
+      my_uca_add_contraction_flag(contractions, r->curr[i], flag);
+    my_uca_add_contraction_flag(contractions, r->curr[i],
+                                r->with_context ?
+                                MY_UCA_PREVIOUS_CONTEXT_TAIL :
+                                MY_UCA_CNT_TAIL);
+    /* Add new contraction to the contraction list */
+    to= my_uca_add_contraction(contractions, r->curr, nshift,
+                               r->with_context)->weight;
+    /* Store weights of the "reset to" character */
+    dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
+    nweights= my_char_weight_put(dst, to, MY_UCA_MAX_WEIGHT_SIZE,
+                                 r->base, nreset);
+    dst->contractions.nitems++; /* Activate, now it's complete */
+  }
+  else
   {
-    rc= 1;
-    goto ex;
+    my_wc_t pagec= (r->curr[0] >> 8);
+    DBUG_ASSERT(dst->weights[pagec]);
+    to= my_char_weight_addr(dst, r->curr[0]);
+    /* Store weights of the "reset to" character */
+    nweights= my_char_weight_put(dst, to, dst->lengths[pagec], r->base, nreset);
   }
 
-  /* Copy pages lengths and page pointers from the default UCA weights */ 
-  memcpy(new_uca.lengths, src_uca->lengths, npages);
-  memcpy(new_uca.weights, src_uca->weights, npages * sizeof(uint16 *));
+  /* Apply level difference. */
+  return apply_shift(loader, rules, r, level, to, nweights);
+}
 
-  /*
-    Calculate maximum lenghts for the pages which will be overwritten.
-    Mark pages that will be otherwriten as NULL.
-    We'll allocate their own memory.
-  */
-  for (r= rules.rule; r < rlast; r++)
+
+/**
+  Check if collation rules are valid,
+  i.e. characters are not outside of the collation suported range.
+*/
+static int
+check_rules(MY_CHARSET_LOADER *loader,
+            const MY_COLL_RULES *rules,
+            const MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+  const MY_COLL_RULE *r, *rlast;
+  for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
   {
-    if (r->curr[0] > new_uca.maxchar)
+    if (r->curr[0] > dst->maxchar)
     {
       my_snprintf(loader->error, sizeof(loader->error),
                   "Shift character out of range: u%04X", (uint) r->curr[0]);
-      rc= 1;
-      goto ex;
+      return TRUE;
     }
-    else if (r->base[0] > src_uca->maxchar)
+    else if (r->base[0] > src->maxchar)
     {
       my_snprintf(loader->error, sizeof(loader->error),
                   "Reset character out of range: u%04X", (uint) r->base[0]);
-      rc= 1;
-      goto ex;
+      return TRUE;
     }
+  }
+  return FALSE;
+}
+
+
+static my_bool
+init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, int level,
+                  MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+  MY_COLL_RULE *r, *rlast;
+  int ncontractions= 0;
+  size_t i, npages= (src->maxchar + 1) / 256;
+
+  dst->maxchar= src->maxchar;
+
+  if (check_rules(loader, rules, dst, src))
+    return TRUE;
+
+  /* Allocate memory for pages and their lengths */
+  if (!(dst->lengths= (uchar *) (loader->once_alloc)(npages)) ||
+      !(dst->weights= (uint16 **) (loader->once_alloc)(npages *
+                                                       sizeof(uint16 *))))
+    return TRUE;
 
+  /* Copy pages lengths and page pointers from the default UCA weights */ 
+  memcpy(dst->lengths, src->lengths, npages);
+  memcpy(dst->weights, src->weights, npages * sizeof(uint16 *));
+
+  /*
+    Calculate maximum lenghts for the pages which will be overwritten.
+    Mark pages that will be otherwriten as NULL.
+    We'll allocate their own memory.
+  */
+  for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
+  {
     if (!r->curr[1]) /* If not a contraction */
     {
       uint pagec= (r->curr[0] >> 8);
       if (r->base[1]) /* Expansion */
       {
         /* Reserve space for maximum possible length */
-        new_uca.lengths[pagec]= MY_UCA_MAX_WEIGHT_SIZE;
+        dst->lengths[pagec]= MY_UCA_MAX_WEIGHT_SIZE;
       }
       else
       {
         uint pageb= (r->base[0] >> 8);
-        if (new_uca.lengths[pagec] < src_uca->lengths[pageb])
-          new_uca.lengths[pagec]= src_uca->lengths[pageb];
+        if (dst->lengths[pagec] < src->lengths[pageb])
+          dst->lengths[pagec]= src->lengths[pageb];
       }
-      new_uca.weights[pagec]= NULL; /* Mark that we'll overwrite this page */
+      dst->weights[pagec]= NULL; /* Mark that we'll overwrite this page */
     }
     else
       ncontractions++;
@@ -21874,23 +21947,20 @@ create_tailoring(CHARSET_INFO *cs, MY_CH
   /* Allocate pages that we'll overwrite and copy default weights */
   for (i= 0; i < npages; i++)
   {
+    my_bool rc;
     /*
       Don't touch pages with lengths[i]==0, they have implicit weights
       calculated algorithmically.
     */
-    if (!new_uca.weights[i] && new_uca.lengths[i] &&
-        (rc= my_uca_copy_page(loader, src_uca, &new_uca, i)))
-      goto ex;
+    if (!dst->weights[i] && dst->lengths[i] &&
+        (rc= my_uca_copy_page(loader, src, dst, i)))
+      return rc;
   }
 
-
   if (ncontractions)
   {
-    if (my_uca_alloc_contractions(&new_uca, loader, ncontractions))
-    {
-      rc= 1;
-      goto ex;
-    }
+    if (my_uca_alloc_contractions(&dst->contractions, loader, ncontractions))
+      return TRUE;
   }
 
   /*
@@ -21901,93 +21971,75 @@ create_tailoring(CHARSET_INFO *cs, MY_CH
     Now iterate through the rules, overwrite weights for the characters
     that appear in the rules, and put all contractions into contraction list.
   */
-  for (r= rules.rule; r < rlast;  r++)
+  for (r= rules->rule; r < rlast;  r++)
   {
-    size_t nweights;
-    size_t nreset= my_coll_rule_reset_length(r); /* Length of reset sequence */
-    size_t nshift= my_coll_rule_shift_length(r); /* Length of shift sequence */
-    uint16 *to;
+    if (apply_one_rule(loader, rules, r, level, dst))
+      return TRUE;
+  }
+  return FALSE;
+}
 
-    if (nshift >= 2) /* Contraction */
-    {
-      size_t i;
-      int flag;
-      /* Add HEAD, MID and TAIL flags for the contraction parts */
-      my_uca_add_contraction_flag(&new_uca, r->curr[0],
-                                  r->with_context ?
-                                  MY_UCA_PREVIOUS_CONTEXT_HEAD :
-                                  MY_UCA_CNT_HEAD);
-      for (i= 1, flag= MY_UCA_CNT_MID1; i < nshift - 1; i++, flag<<= 1)
-        my_uca_add_contraction_flag(&new_uca, r->curr[i], flag);
-      my_uca_add_contraction_flag(&new_uca, r->curr[i],
-                                  r->with_context ?
-                                  MY_UCA_PREVIOUS_CONTEXT_TAIL :
-                                  MY_UCA_CNT_TAIL);
-      /* Add new contraction to the contraction list */
-      to= my_uca_add_contraction(&new_uca, r->curr, nshift,
-                                 r->with_context)->weight;
-      /* Store weights of the "reset to" character */
-      new_uca.contractions.nitems--; /* Temporarily hide - it's incomplete */
-      nweights= my_char_weight_put(&new_uca, to, MY_UCA_MAX_WEIGHT_SIZE,
-                                   r->base, nreset);
-      new_uca.contractions.nitems++; /* Activate, now it's complete */
-    }
-    else
-    {
-      my_wc_t pagec= (r->curr[0] >> 8);
-      DBUG_ASSERT(new_uca.weights[pagec]);
-      to= my_char_weight_addr(&new_uca, r->curr[0]);
-      /* Store weights of the "reset to" character */
-      nweights= my_char_weight_put(&new_uca, to, new_uca.lengths[pagec],
-                                   r->base, nreset);
-    }
 
-    /* Apply primary difference. */
-    if (nweights)
-    {
-      to[nweights - 1]+= r->diff[0];
-      if (r->before_level == 1) /* Apply "&[before primary]" */
-      {
-        if (nweights >= 2)
-        {
-          to[nweights - 2]--; /* Reset before */
-          if (rules.shift_after_method == my_shift_method_expand)
-          {
-            /*
-              Special case. Don't let characters shifted after X
-              and before next(X) intermix to each other.
-              
-              For example:
-              "[shift-after-method expand] &0 < a &[before primary]1 < A".
-              I.e. we reorder 'a' after '0', and then 'A' before '1'.
-              'a' must be sorted before 'A'.
-              
-              Note, there are no real collations in CLDR which shift
-              after and before two neighbourgh characters. We need this
-              just in case. Reserving 4096 (0x1000) weights for such
-              cases is perfectly enough.
-            */
-            to[nweights - 1]+= 0x1000;
-          }
-        }
-        else
-        {
-          my_snprintf(loader->error, sizeof(loader->error),
-                      "Can't reset before "
-                      "a primary ignorable character U+%04lX", r->base[0]);
-          rc= 1;
-          goto ex;
-        }
-      }
-    }
-    else
-    {
-      /* Shift to a primary ignorable character, e.g.: & \u0000 < \u0001 */
-      DBUG_ASSERT(to[0] == 0);
-      to[0]= r->diff[0];
-    }
+/*
+  This function copies an UCS2 collation from
+  the default Unicode Collation Algorithm (UCA)
+  weights applying tailorings, i.e. a set of
+  alternative weights for some characters. 
+  
+  The default UCA weights are stored in uca_weight/uca_length.
+  They consist of 256 pages, 256 character each.
+  
+  If a page is not overwritten by tailoring rules,
+  it is copies as is from UCA as is.
+  
+  If a page contains some overwritten characters, it is
+  allocated. Untouched characters are copied from the
+  default weights.
+*/
+
+static my_bool
+create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
+{
+  MY_COLL_RULES rules;
+  MY_UCA_INFO new_uca, *src_uca= NULL;
+  int rc= 0;
+
+  *loader->error= '\0';
+
+  if (!cs->tailoring)
+    return 0; /* Ok to add a collation without tailoring */
+
+  memset(&rules, 0, sizeof(rules));
+  rules.loader= loader;
+  rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
+  memset(&new_uca, 0, sizeof(new_uca));
+
+  /* Parse ICU Collation Customization expression */
+  if ((rc= my_coll_rule_parse(&rules,
+                              cs->tailoring,
+                              cs->tailoring + strlen(cs->tailoring))))
+    goto ex;
+
+  if (rules.version == 520)           /* Unicode-5.2.0 requested */
+  {
+    src_uca= &my_uca_v520;
+    cs->caseinfo= &my_unicase_unicode520;
+  }
+  else if (rules.version == 400)      /* Unicode-4.0.0 requested */
+  {
+    src_uca= &my_uca_v400;
+    cs->caseinfo= &my_unicase_default;
+  }
+  else                                /* No Unicode version specified */
+  {
+    src_uca= cs->uca ? cs->uca : &my_uca_v400;
+    if (!cs->caseinfo)
+      cs->caseinfo= &my_unicase_default;
   }
 
+  if ((rc= init_weight_level(loader, &rules, 0,
+                             &new_uca.level[0], &src_uca->level[0])))
+    goto ex;
 
   if (!(cs->uca= (MY_UCA_INFO *) (loader->once_alloc)(sizeof(MY_UCA_INFO))))
   {

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (alexander.barkov:3946 to 3947) Alexander Barkov5 Jun