Below is the list of changes that have just been committed into a local
5.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-05-04 14:52:31+05:00, bar@stripped +2 -0
WL#3759 Optimize identifier conversion in client-server protocol
send_fields() when character_set_results != utf8
is now about twice faster for ASCII column/table/database
names than it used to be.
sql/protocol.cc@stripped, 2007-05-04 14:52:29+05:00, bar@stripped +89 -3
Adding quick method to store string into protocol
buffer with character set conversion, especially
optimized for ASCII characters 0x00..0x7F.
sql/protocol.h@stripped, 2007-05-04 14:52:29+05:00, bar@stripped +2 -0
Adding new method prototype.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: bar
# Host: bar.myoffice.izhnet.ru
# Root: /home/bar/mysql-5.2.wl3759
--- 1.124/sql/protocol.cc 2007-01-29 03:47:30 +04:00
+++ 1.125/sql/protocol.cc 2007-05-04 14:52:29 +05:00
@@ -55,6 +55,79 @@
/*
+ net_store_data() - extended version with character set conversion,
+ optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+
+ It currenly works only for short strings whose length after
+ conversion is garanteed to be less than 251, which accupies
+ exactly one byte to store length. This is good enough to optimize
+ send_fields() because column, table, database names fit into this limit.
+
+ Extending this function to support longer strings (e.g. to send
+ long VARCHAR or TEXT values) may not bring benefits,
+ because we don't know the result length until conversion is actually done:
+
+ We calculate maximum possible result length using this formula:
+ conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen
+ while the real result string after conversion can be shorter.
+
+ For example, if from_cs is utf8 (mbminlen=1) and to_cs is latin1
+ (mbmaxlen=1), and the source length is 300 bytes, then
+ maximum length will be 1*300/1=300. Length=300 uses 3 bytes in
+ protocol, so we have to reserve 3 bytes for length and then convert.
+ After conversion, the real length can be only 100 bytes
+ (if the source string is 100 characters, each 3 byte-long).
+ Length=100 needs only one byte to store in protocol.
+ So we expected length to accupy three bytes, but in reality it needs
+ only one byte in this example. Which means we'd have to move the
+ converted buffer two bytes left, and which would have performance
+ downgradation.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+bool Protocol::net_store_data(const char *from, uint length,
+ CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+ uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+ ulong packet_length= packet->length();
+ ulong new_length= packet_length + conv_length + 1;
+ DBUG_ASSERT(conv_length < 251);
+
+ if (new_length > packet->alloced_length() && packet->realloc(new_length))
+ return 1;
+
+ char *length_pos= (char*) packet->ptr() + packet_length;
+ uint dummy_error;
+ char *to= length_pos + 1;
+
+ if (to_cs->mbminlen == 1 && from_cs->mbminlen == 1)
+ {
+ for (; ; *to++= *from++, length--)
+ {
+ if (!length)
+ goto store_length;
+ if (*((unsigned char*) from) > 127)
+ break; /* A non-ASCII character found, exit the loop */
+ }
+ }
+ /*
+ It's ok to pass the original value of conv_length,
+ because we've allocated enough memory to convert
+ whole string. Conversion loop inside copy_and_convert()
+ will be limited by "length".
+ */
+ to+= copy_and_convert(to, conv_length, to_cs,
+ from, length, from_cs, &dummy_error);
+
+store_length:
+ net_store_length(length_pos, to - length_pos - 1);
+ packet->length((uint) (to - packet->ptr()));
+ return 0;
+}
+#endif
+
+
+/*
Send a error string to client
Design note:
@@ -793,9 +866,22 @@
fromcs != &my_charset_bin &&
tocs != &my_charset_bin)
{
- uint dummy_errors;
- return convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
- net_store_data(convert->ptr(), convert->length());
+#ifndef EMBEDDED_LIBRARY
+ if ((length / tocs->mbminlen * fromcs->mbmaxlen) < 251)
+ {
+ /*
+ If length after conversion does not exceed 251 byte
+ we can use quick method.
+ */
+ return net_store_data(from, length, fromcs, tocs);
+ }
+ else
+#endif
+ {
+ uint dummy_errors;
+ return convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
+ net_store_data(convert->ptr(), convert->length());
+ }
}
return net_store_data(from, length);
}
--- 1.37/sql/protocol.h 2007-02-01 21:34:06 +04:00
+++ 1.38/sql/protocol.h 2007-05-04 14:52:29 +05:00
@@ -36,6 +36,8 @@
uint field_count;
#ifndef EMBEDDED_LIBRARY
bool net_store_data(const char *from, uint length);
+ bool net_store_data(const char *from, uint length,
+ CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
#else
virtual bool net_store_data(const char *from, uint length);
char **next_field;
| Thread |
|---|
| • bk commit into 5.1 tree (bar:1.2478) | bar | 4 May |