Dominic Mitchell wrote:
> Are there any plans to introduce UTF-8 support to DBD::mysql? At the
> moment, when I retrieve UTF-8 data from MySQL, it doesn't have the UTF-8
> flag set on the returned string. This means that I have to litter my
> code with calls to Encode::decode_utf8().
>
> Ideally, I'm looking for something like PostgreSQL's pg_enable_utf8
> flag: http://search.cpan.org/~dbdpg/DBD-Pg-1.44/Pg.pm#pg_enable_utf8
>
> If it's agreed that the functionality is desired, I'm happy to create a
> patch based on the PostgreSQL support.
Ok, this is a reworking of the previous UTF-8 patch, with the following
features:
* Only turned on if defined(sv_decode_utf8) && MYSQL_VERSION_ID >=
SERVER_PREPARE_VERSION.
The first requirement is so that the functions are actually available
in Perl. The second is because there doesn't appear to be any column
type information available in the older branch of dbd_st_fetch. If
somebody could point out how to do that, I would be most appreciative.
* The patch includes a way to retrieve the value of the flag as well as
set it. :-)
* This patch includes the fix for quote() that I posted last week.
* I've included a test to ensure that we can get UTF-8 out of the
database again.
This is all patched against DBD::mysql 3.0002_5.
-Dom
diff -urN DBD-mysql-3.0002_5/MANIFEST DBD-mysql-3.0002_5.dom/MANIFEST
--- DBD-mysql-3.0002_5/MANIFEST 2005-09-27 00:15:01.000000000 +0100
+++ DBD-mysql-3.0002_5.dom/MANIFEST 2006-03-07 11:03:43.000000000 +0000
@@ -21,6 +21,7 @@
t/mysql.dbtest
t/mysql.t
t/mysql2.t
+t/utf8.t
lib/DBD/mysql/GetInfo.pm
lib/DBD/mysql/INSTALL.pod
lib/DBD/mysql.pm
diff -urN DBD-mysql-3.0002_5/dbdimp.c DBD-mysql-3.0002_5.dom/dbdimp.c
--- DBD-mysql-3.0002_5/dbdimp.c 2006-02-01 22:49:02.000000000 +0000
+++ DBD-mysql-3.0002_5.dom/dbdimp.c 2006-03-07 13:48:57.846450211 +0000
@@ -1383,6 +1383,17 @@
imp_dbh->use_server_side_prepare);
#endif
+#if defined(sv_utf8_decode) && MYSQL_VERSION_ID >=SERVER_PREPARE_VERSION
+ if ((svp = hv_fetch(hv, "mysql_enable_utf8", 17, FALSE)) && *svp) {
+ /* Do not touch imp_dbh->enable_utf8 as we are called earlier
+ * than it is set and mysql_options() must be before:
+ * mysql_real_connect()
+ */
+ mysql_options(sock, MYSQL_SET_CHARSET_NAME,
+ (SvTRUE(*svp) ? "utf8" : "latin1"));
+ }
+#endif
+
#if defined(DBD_MYSQL_WITH_SSL) && !defined(DBD_MYSQL_EMBEDDED) && \
(defined(CLIENT_SSL) || (MYSQL_VERSION_ID >= 40000))
if ((svp = hv_fetch(hv, "mysql_ssl", 9, FALSE)) && *svp)
@@ -1587,6 +1598,10 @@
/* Safer we flip this to TRUE perl side if we detect a mod_perl env. */
imp_dbh->auto_reconnect = FALSE;
+#if defined(sv_utf8_decode) && MYSQL_VERSION_ID >=SERVER_PREPARE_VERSION
+ imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */
+#endif
+
if (!my_login(dbh, imp_dbh))
{
do_error(dbh, mysql_errno(&imp_dbh->mysql),
@@ -1903,6 +1918,10 @@
else if (kl == 31 && strEQ(key,"mysql_unsafe_bind_type_guessing"))
imp_dbh->bind_type_guessing = SvIV(valuesv);
+#if defined(sv_utf8_decode) && MYSQL_VERSION_ID >=SERVER_PREPARE_VERSION
+ else if (kl == 17 && strEQ(key, "mysql_enable_utf8"))
+ imp_dbh->enable_utf8 = bool_value;
+#endif
else
return FALSE; /* Unknown key */
@@ -1993,6 +2012,10 @@
const char* msg = mysql_error(&imp_dbh->mysql);
result= sv_2mortal(newSVpv(msg, strlen(msg)));
}
+#if defined(sv_utf8_decode) && MYSQL_VERSION_ID >=SERVER_PREPARE_VERSION
+ else if (kl == strlen("enable_utf8") && strEQ(key, "enable_utf8"))
+ result = sv_2mortal(newSViv(imp_dbh->enable_utf8));
+#endif
break;
case 'd':
@@ -3287,6 +3310,10 @@
if (dbis->debug >= 2)
PerlIO_printf(DBILOGFP, "st_fetch string data %s\n", fbh->data);
sv_setpvn(sv, fbh->data, fbh->length);
+#ifdef sv_utf8_decode
+ if(imp_dbh->enable_utf8)
+ sv_utf8_decode(sv);
+#endif
break;
default:
@@ -4256,6 +4283,10 @@
ptr= SvPV(str, len);
result= newSV(len*2+3);
+#ifdef SvUTF8
+ if (SvUTF8(str)) SvUTF8_on(result);
+#endif
+
sptr= SvPVX(result);
*sptr++ = '\'';
diff -urN DBD-mysql-3.0002_5/dbdimp.h DBD-mysql-3.0002_5.dom/dbdimp.h
--- DBD-mysql-3.0002_5/dbdimp.h 2006-02-01 19:52:25.000000000 +0000
+++ DBD-mysql-3.0002_5.dom/dbdimp.h 2006-03-07 11:02:01.000000000 +0000
@@ -143,6 +143,9 @@
*/
int use_server_side_prepare;
int has_autodetect_prepare;
+#if defined(sv_utf8_decode) && MYSQL_VERSION_ID >=SERVER_PREPARE_VERSION
+ bool enable_utf8;
+#endif
};
diff -urN DBD-mysql-3.0002_5/lib/DBD/mysql.pm DBD-mysql-3.0002_5.dom/lib/DBD/mysql.pm
--- DBD-mysql-3.0002_5/lib/DBD/mysql.pm 2006-02-01 22:47:24.000000000 +0000
+++ DBD-mysql-3.0002_5.dom/lib/DBD/mysql.pm 2006-03-07 12:51:48.000000000 +0000
@@ -955,6 +955,20 @@
It is possible to set/unset the C<mysql_use_result> attribute after
creation of statement handle. See below.
+=item mysql_enable_utf8
+
+This attribute determines whether DBD::mysql should assume strings
+stored in the database are utf8. This feature defaults to off.
+
+When set, a data retrieved from a textual column type (char, varchar,
+etc) will have the UTF-8 flag turned on if necessary. This enables
+character semantics on that string.
+
+Additionally, turning on this flag tells MySQL that incoming data should
+be treated as UTF-8. This will only take effect if used as part of the
+call to connect().
+
+This option is experimental and may change in future versions.
=head1 STATEMENT HANDLES
diff -urN DBD-mysql-3.0002_5/t/utf8.t DBD-mysql-3.0002_5.dom/t/utf8.t
--- DBD-mysql-3.0002_5/t/utf8.t 1970-01-01 01:00:00.000000000 +0100
+++ DBD-mysql-3.0002_5.dom/t/utf8.t 2006-03-07 13:47:05.706560135 +0000
@@ -0,0 +1,123 @@
+#!/usr/local/bin/perl
+#
+# $Id$
+#
+# This checks for UTF-8 support.
+#
+
+
+#
+# Make -w happy
+#
+use vars qw($test_dsn $test_user $test_password $mdriver $verbose $state
+ $dbdriver);
+use vars qw($COL_NULLABLE $COL_KEY);
+$test_dsn = '';
+$test_user = '';
+$test_password = '';
+
+
+#
+# Include lib.pl
+#
+use DBI;
+use strict;
+$mdriver = "";
+{
+ my $file;
+ foreach $file ("lib.pl", "t/lib.pl") {
+ do $file; if ($@) { print STDERR "Error while executing lib.pl: $@\n";
+ exit 10;
+ }
+ if ($mdriver ne '') {
+ last;
+ }
+ }
+}
+
+sub ServerError() {
+ print STDERR ("Cannot connect: ", $DBI::errstr, "\n",
+ "\tEither your server is not up and running or you have no\n",
+ "\tpermissions for acessing the DSN $test_dsn.\n",
+ "\tThis test requires a running server and write permissions.\n",
+ "\tPlease make sure your server is running and you have\n",
+ "\tpermissions, then retry.\n");
+ exit 10;
+}
+
+#
+# Main loop; leave this untouched, put tests after creating
+# the new table.
+#
+while (Testing()) {
+ my ($dbh, $sth, $query);
+
+ #
+ # Connect to the database
+ Test($state or ($dbh = DBI->connect($test_dsn, $test_user,
+ $test_password, {mysql_enable_utf8 => 1})))
+ or ServerError();
+
+ #
+ # Find a possible new table name
+ #
+ my $table = '';
+ Test($state or $table = FindNewTable($dbh))
+ or ErrMsgF("Cannot determine a legal table name: Error %s.\n",
+ $dbh->errstr);
+
+ #
+ # Create a new table; EDIT THIS!
+ #
+ Test($state or ($query = TableDefinition($table,
+ ["name", "CHAR", 64, $COL_NULLABLE]),
+ $dbh->do($query)))
+ or ErrMsgF("Cannot create table: Error %s.\n",
+ $dbh->errstr);
+
+
+ #
+ # and here's the right place for inserting new tests:
+ #
+
+ my $utf8_str = "\x{0100}dam"; # "Adam" with a macron.
+ my $quoted_utf8_str = "'\x{0100}dam'";
+ Test( $state or ( $dbh->quote( $utf8_str ) eq $quoted_utf8_str ) )
+ or ErrMsg( "Failed to retain UTF-8 flag when quoteing.\n" );
+
+ Test( $state or ( $dbh->{ mysql_enable_utf8 } ) )
+ or ErrMsg( "mysql_enable_utf8 didn't survive connect()\n" );
+
+ $query = qq{INSERT INTO $table (name) VALUES (?)};
+ Test( $state or $dbh->do( $query, {}, $utf8_str ) )
+ or ErrMsgF( "INSERT failed: query $query, error %s.\n", $dbh->errstr );
+
+ $query = "SELECT name FROM $table LIMIT 1";
+ Test( $state or ($sth = $dbh->prepare( $query ) ) )
+ or ErrMsgF( "prepare failed: query $query, error %s.\n", $dbh->errstr );
+
+ Test($state or $sth->execute)
+ or ErrMsgF( "execute failed: query $query, error %s.\n", $dbh->errstr );
+
+ my $ref;
+ Test( $state or defined( $ref = $sth->fetchrow_arrayref ) )
+ or ErrMsgF( "fetch failed: query $query, error %s.\n", $sth->errstr );
+
+ # Finally, check that we got back UTF-8 correctly.
+ Test( $state or ($ref->[0] eq $utf8_str) )
+ or ErrMsgF( "got back '$ref->[0]' instead of '$utf8_str'.\n" );
+
+ Test( $state or $sth->finish )
+ or ErrMsgF( "Cannot finish: %s.\n", $sth->errstr );
+
+ #
+ # Finally drop the test table.
+ #
+ Test($state or $dbh->do("DROP TABLE $table"))
+ or ErrMsgF("Cannot DROP test table $table: %s.\n",
+ $dbh->errstr);
+
+ # ... and disconnect
+ Test($state or $dbh->disconnect)
+ or ErrMsgF("Cannot disconnect: %s.\n", $dbh->errmsg);
+}