List:Commits« Previous MessageNext Message »
From:paul Date:May 23 2008 7:54pm
Subject:svn commit - mysqldoc@docsrva: r10833 - in trunk: . refman-6.0
View as plain text  
Author: paul
Date: 2008-05-23 21:54:05 +0200 (Fri, 23 May 2008)
New Revision: 10833

Log:
 r31668@frost:  paul | 2008-05-23 11:09:36 -0500
 Adding-collation revisions


Modified:
   trunk/refman-6.0/collation-tmp.xml

Property changes on: trunk
___________________________________________________________________
Name: svk:merge
   - 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:35828
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:31651
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:31442
   + 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:35828
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:31668
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:31442


Modified: trunk/refman-6.0/collation-tmp.xml
===================================================================
--- trunk/refman-6.0/collation-tmp.xml	2008-05-23 15:40:20 UTC (rev 10832)
+++ trunk/refman-6.0/collation-tmp.xml	2008-05-23 19:54:05 UTC (rev 10833)
Changed blocks: 29, Lines Added: 192, Lines Deleted: 106; 18234 bytes

@@ -233,7 +233,7 @@
     </para>
 
     <remark role="todo">
-      pre-6.0 example
+      Pre-6.0 example
     </remark>
 
 <programlisting>

@@ -291,10 +291,11 @@
     </para>
 
     <para>
-      For characters in the ASCII range, character codes map to weights
-      in case-insensitive fashion. For multi-byte characters outside the
-      ASCII range, there are two types of relationship between character
-      codes and weights:
+      For this type of collation, 8-bit (single-byte) and multi-byte
+      characters are handled differently. For 8-bit characters,
+      character codes map to weights in case-insensitive fashion. For
+      multi-byte characters, there are two types of relationship between
+      character codes and weights:
     </para>
 
     <itemizedlist>

@@ -303,11 +304,16 @@
         <para>
           Weights equal character codes.
           <literal>sjis_japanese_ci</literal> is an example of this kind
-          of collation.
+          of collation. The single-byte characters
+          <literal>'a'</literal> and <literal>'A'</literal> both have a
+          weight of <literal>0x41</literal>. The multi-byte character
+          <literal>'&#x3062;'</literal> has a character code of
+          <literal>0x82C0</literal>, and the weight is also
+          <literal>0x82C0</literal>.
         </para>
 
         <remark role="todo">
-          Example will not work pre-6.0.
+          6.0 example
         </remark>
 
 <programlisting>

@@ -340,11 +346,16 @@
           Character codes map one-to-one to weights, but a code is not
           necessarily equal to the weight.
           <literal>gbk_chinese_ci</literal> is an example of this kind
-          of collation.
+          of collation. The single-byte characters
+          <literal>'a'</literal> and <literal>'A'</literal> both have a
+          weight of <literal>0x41</literal>. The multi-byte character
+          <literal>'&#x81b0;'</literal> has a character code of
+          <literal>0x81B0</literal> but the weight is different,
+          <literal>0xC286</literal>.
         </para>
 
         <remark role="todo">
-          Example will not work pre-6.0.
+          6.0 example
         </remark>
 
 <programlisting>

@@ -354,11 +365,11 @@
 -->
 mysql&gt; <userinput>CREATE TABLE t1</userinput>
     -&gt; <userinput>(c1 VARCHAR(2) CHARACTER SET gbk COLLATE gbk_chinese_ci);</userinput>
-Query OK, 0 rows affected (0.01 sec)
+Query OK, 0 rows affected (0.33 sec)
 
-mysql&gt; <userinput>INSERT INTO t1 VALUES ('a'),('A'),(0x81B0),(0x81B1);</userinput>
-Query OK, 4 rows affected (0.00 sec)
-Records: 4  Duplicates: 0  Warnings: 0
+mysql&gt; <userinput>INSERT INTO t1 VALUES ('a'),('A'),(0x81B0);</userinput>
+Query OK, 3 rows affected (0.00 sec)
+Records: 3  Duplicates: 0  Warnings: 0
 
 mysql&gt; <userinput>SELECT c1, HEX(c1), HEX(WEIGHT_STRING(c1)) FROM t1;</userinput>
 +------+---------+------------------------+

@@ -367,9 +378,8 @@
 | a    | 61      | 41                     | 
 | A    | 41      | 41                     | 
 | &#x81b0;    | 81B0    | C286                   | 
-| &#x81b1;    | 81B1    | CACC                   | 
 +------+---------+------------------------+
-4 rows in set (0.00 sec)
+3 rows in set (0.00 sec)
 </programlisting>
       </listitem>
 

@@ -381,8 +391,8 @@
     </para>
 
     <para>
-      Some of these are based on the Unicode Collation Algorithm (UCA).
-      Others are not.
+      Some of these collations are based on the Unicode Collation
+      Algorithm (UCA), others are not.
     </para>
 
     <para>

@@ -396,7 +406,7 @@
     </para>
 
     <remark role="todo">
-      6.0: Use slide 9. Pre-6.0: Use explicit comparisons?
+      Pre-6.0 example
     </remark>
 
 <programlisting>

@@ -412,6 +422,10 @@
 1 row in set (0.06 sec)
 </programlisting>
 
+    <remark role="todo">
+      6.0 example
+    </remark>
+
 <programlisting>
 <!--
 mysql> DROP TABLE IF EXISTS t1;

@@ -456,7 +470,7 @@
         <para>
           A character may have zero weights (or an empty weight). In
           this case, the character is ignorable. Example: "U+0000 NULL"
-          does not have a weight and is ignorable
+          does not have a weight and is ignorable.
         </para>
       </listitem>
 

@@ -484,7 +498,7 @@
         <para>
           A character may have many weights. This is an expansion.
           Example: German letter <literal>'ß'</literal> (SZ LEAGUE, or
-          SHARP S)
+          SHARP S).
         </para>
 
 <programlisting>

@@ -504,7 +518,7 @@
       <listitem>
         <para>
           Many characters may have one weight. This is a contraction.
-          Example: <literal>'ch'</literal> is a single letter in Czech
+          Example: <literal>'ch'</literal> is a single letter in Czech.
         </para>
 
 <programlisting>

@@ -543,12 +557,103 @@
 
     <title>Choosing a Collation ID</title>
 
+    <remark role="todo">
+      What is the maximum ID value that you can choose?
+    </remark>
+
     <para>
-      Each collation must have a unique ID. Therefore, to add a
-      collation, you must choose an ID value that is not currently used.
+      Each collation must have a unique ID. To add a new collation, you
+      must choose an ID value that is not currently used. The ID that
+      you choose is the value that will show up in these contexts:
     </para>
 
+    <itemizedlist>
+
+      <listitem>
+        <para>
+          The <literal>Id</literal> column of <literal>SHOW
+          COLLATION</literal> output
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          <remark role="todo">
+            5.0 and up only
+          </remark>
+
+          The <literal>ID</literal> column of the
+          <literal>INFORMATION_SCHEMA.COLLATIONS</literal> table
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          The <literal>charsetnr</literal> member of the
+          <literal>MYSQL_FIELD</literal> C API data structure
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          <remark role="todo">
+            5.0 and up only
+          </remark>
+
+          The <literal>number</literal> member of the
+          <literal>MY_CHARSET_INFO</literal> data structure returned by
+          the
+          <function role="capi">mysql_get_character_set_info()</function>
+          C API function
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+    <remark role="todo">
+      4.1-only
+    </remark>
+
     <para>
+      To display a list of the currently used collation IDs, use this
+      statement:
+    </para>
+
+<programlisting>
+mysql&gt; <userinput>SHOW COLLATION;</userinput>
++----------------------+----------+-----+---------+----------+---------+
+| Collation            | Charset  | Id  | Default | Compiled | Sortlen |
++----------------------+----------+-----+---------+----------+---------+
+| big5_chinese_ci      | big5     |   1 | Yes     | Yes      |       1 | 
+| big5_bin             | big5     |  84 |         | Yes      |       1 | 
+...
+| latin1_german1_ci    | latin1   |   5 |         | Yes      |       1 | 
+| latin1_swedish_ci    | latin1   |   8 | Yes     | Yes      |       1 | 
+| latin1_danish_ci     | latin1   |  15 |         | Yes      |       1 | 
+| latin1_german2_ci    | latin1   |  31 |         | Yes      |       2 | 
+| latin1_bin           | latin1   |  47 |         | Yes      |       1 | 
+| latin1_general_ci    | latin1   |  48 |         | Yes      |       1 | 
+| latin1_general_cs    | latin1   |  49 |         | Yes      |       1 | 
+| latin1_spanish_ci    | latin1   |  94 |         | Yes      |       1 | 
+| latin2_czech_cs      | latin2   |   2 |         | Yes      |       4 | 
+| latin2_general_ci    | latin2   |   9 | Yes     | Yes      |       1 | 
+| latin2_hungarian_ci  | latin2   |  21 |         | Yes      |       1 | 
+| latin2_croatian_ci   | latin2   |  27 |         | Yes      |       1 | 
+| latin2_bin           | latin2   |  77 |         | Yes      |       1 | 
+...
++----------------------+----------+-----+---------+----------+---------+
+</programlisting>
+
+    <para>
+      Look through the values in the <literal>Id</literal> column and
+      pick a value that is not used.
+    </para>
+
+    <remark role="todo">
+      5.1 and up
+    </remark>
+
+    <para>
       To determine the largest currently used ID, issue the following
       statement:
     </para>

@@ -601,46 +706,6 @@
       could choose an ID higher than 210.
     </para>
 
-    <para>
-      The collation ID that you choose is the value that will show up in
-      these contexts:
-    </para>
-
-    <itemizedlist>
-
-      <listitem>
-        <para>
-          The <literal>Id</literal> column of <literal>SHOW
-          COLLATION</literal> output
-        </para>
-      </listitem>
-
-      <listitem>
-        <para>
-          The <literal>ID</literal> column of the
-          <literal>INFORMATION_SCHEMA.COLLATIONS</literal> table
-        </para>
-      </listitem>
-
-      <listitem>
-        <para>
-          The <literal>charsetnr</literal> member of the
-          <literal>MYSQL_FIELD</literal> C API data structure
-        </para>
-      </listitem>
-
-      <listitem>
-        <para>
-          The <literal>number</literal> member of the
-          <literal>MY_CHARSET_INFO</literal> data structure returned by
-          the
-          <function role="capi">mysql_get_character_set_info()</function>
-          C API function
-        </para>
-      </listitem>
-
-    </itemizedlist>
-
     <warning>
       <para>
         If you upgrade MySQL, you may find that the collation ID you

@@ -656,6 +721,11 @@
 
     <title>Adding a Simple Collation for an 8-Bit Character Set</title>
 
+    <remark role="todo">
+      I tried these instructions and the collation did not appear to
+      register.
+    </remark>
+
     <para>
       To add a simple collation for an 8-bit character set without
       recompiling MySQL, use the following procedure. The example adds a

@@ -669,7 +739,7 @@
         <para>
           Choose a collation ID, as shown in
           <xref linkend="adding-collation-choosing-id"/>. The following
-          discussion uses an ID of 56.
+          steps use an ID of 56.
         </para>
       </listitem>
 

@@ -685,11 +755,11 @@
 
 <programlisting>
 mysql&gt; <userinput>SHOW VARIABLES LIKE 'character_sets_dir';</userinput>
-+--------------------+----------------------------------------+
-| Variable_name      | Value                                  |
-+--------------------+----------------------------------------+
-| character_sets_dir | /user/local/mysqlshare/mysql/charsets/ | 
-+--------------------+----------------------------------------+
++--------------------+-----------------------------------------+
+| Variable_name      | Value                                   |
++--------------------+-----------------------------------------+
+| character_sets_dir | /user/local/mysql/share/mysql/charsets/ | 
++--------------------+-----------------------------------------+
 </programlisting>
       </listitem>
 

@@ -716,9 +786,11 @@
       <listitem>
         <para>
           In the <filename>latin1.xml</filename> configuration file, add
-          a <literal>&lt;collation&gt;</literal> element containing a
-          <literal>&lt;map&gt;</literal> element that defines a
-          character code-to-weight mapping table:
+          a <literal>&lt;collation&gt;</literal> element that names the
+          collation and that contains a <literal>&lt;map&gt;</literal>
+          element that defines a character code-to-weight mapping table.
+          Each word within the <literal>&lt;map&gt;</literal> element
+          must be a number in hexadecimal format.
         </para>
 
 <programlisting>

@@ -770,6 +842,10 @@
     <title>Adding a UCA Collation for a Unicode Character Set</title>
 
     <remark role="todo">
+      5.0 and up only
+    </remark>
+
+    <remark role="todo">
       Fix next para per manual version.
     </remark>
 

@@ -800,20 +876,20 @@
             <entry><emphasis role="bold">Base Collation</emphasis></entry>
           </row>
           <row>
-            <entry>utf8</entry>
-            <entry>utf8_unicode_ci</entry>
+            <entry><literal>utf8</literal></entry>
+            <entry><literal>utf8_unicode_ci</literal></entry>
           </row>
           <row>
-            <entry>ucs2</entry>
-            <entry>ucs2_unicode_ci</entry>
+            <entry><literal>ucs2</literal></entry>
+            <entry><literal>ucs2_unicode_ci</literal></entry>
           </row>
           <row>
-            <entry>utf16</entry>
-            <entry>utf16_unicode_ci</entry>
+            <entry><literal>utf16</literal></entry>
+            <entry><literal>utf16_unicode_ci</literal></entry>
           </row>
           <row>
-            <entry>utf32</entry>
-            <entry>utf32_unicode_ci</entry>
+            <entry><literal>utf32</literal></entry>
+            <entry><literal>utf32_unicode_ci</literal></entry>
           </row>
         </tbody>
       </tgroup>

@@ -835,10 +911,10 @@
 
       <listitem>
         <para>
-          Characters in these rules can be written literally or in
-          <literal>\uNNNN</literal> format, where
-          <literal>NNNN</literal> is the hexadecimal Unicode code point
-          value.
+          Characters named in these rules can be written literally or in
+          <literal>\u<replaceable>nnnn</replaceable></literal> format,
+          where <replaceable>nnnn</replaceable> is the hexadecimal
+          Unicode code point value.
         </para>
       </listitem>
 

@@ -847,14 +923,15 @@
           A reset rule does not specify any ordering in and of itself.
           Instead, it <quote>resets</quote> the ordering for subsequent
           shift rules to cause them to be taken in relation to a given
-          character. Either of these rules reset subsequent shift rules
-          to be taken in relation to the letter <literal>'A'</literal>:
+          character. Either of the following rules resets subsequent
+          shift rules to be taken in relation to the letter
+          <literal>'A'</literal>:
         </para>
 
 <programlisting>
 &lt;reset&gt;A&lt;/reset&gt;
 
-&lt;reset&gt;\u004a&lt;/reset&gt;
+&lt;reset&gt;\u0041&lt;/reset&gt;
 </programlisting>
       </listitem>
 

@@ -864,8 +941,8 @@
           differences of a character from another character. They are
           specified using <literal>&lt;p&gt;</literal>,
           <literal>&lt;s&gt;</literal>, and <literal>&lt;t&gt;</literal>
-          elements. The following elements specify a primary shift rule
-          for the <literal>'G'</literal> character:
+          elements. Either of the following rules specifies a primary
+          shift rule for the <literal>'G'</literal> character:
         </para>
 
 <programlisting>

@@ -911,9 +988,9 @@
       recompiling MySQL, use the following procedure. The example adds a
       collation named <literal>utf8_phone_ci</literal> to the
       <literal>utf8</literal> character set. The collation is designed
-      for a scenario in which we have a web application in which users
-      post their names and phone numbers. Phone numbers can be given in
-      very different formats:
+      for a scenario involving a Web application for which users post
+      their names and phone numbers. Phone numbers can be given in very
+      different formats:
     </para>
 
 <programlisting>

@@ -937,7 +1014,7 @@
         <para>
           Choose a collation ID, as shown in
           <xref linkend="adding-collation-choosing-id"/>. The following
-          discussion uses an ID of 252.
+          steps use an ID of 252.
         </para>
       </listitem>
 

@@ -952,11 +1029,11 @@
 
 <programlisting>
 mysql&gt; <userinput>SHOW VARIABLES LIKE 'character_sets_dir';</userinput>
-+--------------------+----------------------------------------+
-| Variable_name      | Value                                  |
-+--------------------+----------------------------------------+
-| character_sets_dir | /user/local/mysqlshare/mysql/charsets/ | 
-+--------------------+----------------------------------------+
++--------------------+-----------------------------------------+
+| Variable_name      | Value                                   |
++--------------------+-----------------------------------------+
+| character_sets_dir | /user/local/mysql/share/mysql/charsets/ | 
++--------------------+-----------------------------------------+
 </programlisting>
       </listitem>
 

@@ -981,11 +1058,11 @@
   &lt;collation name="utf8_phone_ci" id="252"&gt;
     &lt;rules&gt;
       &lt;reset&gt;\u0000&lt;/reset&gt;
-      &lt;s&gt;\u0020&lt;/s&gt; &lt;!-- space --&gt;
-      &lt;s&gt;\u0028&lt;/s&gt; &lt;!-- left parenthesis --&gt;
-      &lt;s&gt;\u0029&lt;/s&gt; &lt;!-- right parenthesis --&gt;
-      &lt;s&gt;\u002B&lt;/s&gt; &lt;!-- plus --&gt;
-      &lt;s&gt;\u002D&lt;/s&gt; &lt;!-- hyphen --&gt;
+        &lt;s&gt;\u0020&lt;/s&gt; &lt;!-- space --&gt;
+        &lt;s&gt;\u0028&lt;/s&gt; &lt;!-- left parenthesis --&gt;
+        &lt;s&gt;\u0029&lt;/s&gt; &lt;!-- right parenthesis --&gt;
+        &lt;s&gt;\u002B&lt;/s&gt; &lt;!-- plus --&gt;
+        &lt;s&gt;\u002D&lt;/s&gt; &lt;!-- hyphen --&gt;
     &lt;/rules&gt;
   &lt;/collation&gt;
   ...

@@ -997,9 +1074,10 @@
         <para>
           If you want a similar collation for other Unicode character
           sets, add other <literal>&lt;collation&gt;</literal> elements.
-          For example, to define <literal>ucs2_phone_ci</literal>, add
-          an element to the <literal>&lt;charset
-          name="ucs2"&gt;</literal> element.
+          For example, to define <literal>ucs2_phone_ci</literal>, add a
+          <literal>&lt;collation&gt;</literal> element to the
+          <literal>&lt;charset name="ucs2"&gt;</literal> element.
+          Remember that each of these must have its own unique ID.
         </para>
       </listitem>
 

@@ -1091,6 +1169,14 @@
 | Bar  | +7-912-800-80-01 | 
 +------+------------------+
 1 row in set (0.00 sec)
+
+mysql&gt; <userinput>SELECT * FROM phonebook WHERE phone='7 9 1 2 8 0 0 8 0 0 1';</userinput>
++------+------------------+
+| name | phone            |
++------+------------------+
+| Bar  | +7-912-800-80-01 | 
++------+------------------+
+1 row in set (0.00 sec)
 </programlisting>
 
     <remark role="todo">


Thread
svn commit - mysqldoc@docsrva: r10833 - in trunk: . refman-6.0paul24 May