From: Date: June 9 2004 10:01pm Subject: [PATCH] Re: blessing db data as utf8 List-Archive: http://lists.mysql.com/perl/3006 Message-Id: <20040609200103.GA17923@sike.forum2.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii On Wed, Jun 09, 2004 at 04:01:09PM +0300, Gaal Yahas wrote: > What do the maintainers of DBD::mysql say? Should the same style of fix > be added to DBD::mysql? I'm willing to work on a patch if nobody else steps > forward. Patch follows. This works for me; thanks to Dominic Mitchell for the Pg version this is based on. -- Gaal Yahas http://gaal.livejournal.com/ diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c ../DBD-mysql-2.9003/dbdimp.c --- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c 2003-10-17 19:20:50.000000000 +0200 +++ ../DBD-mysql-2.9003/dbdimp.c 2004-06-09 22:15:03.000000000 +0300 @@ -848,6 +848,9 @@ imp_dbh->has_transactions = TRUE; imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side if we detect a mod_perl env. */ +#ifdef is_utf8_string + imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */ +#endif DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes); if (sv && SvROK(sv)) { @@ -1333,6 +1336,10 @@ /*XXX: Does DBI handle the magic ? */ imp_dbh->auto_reconnect = bool_value; /* imp_dbh->mysql.reconnect=0; */ +#ifdef is_utf8_string + } else if (strEQ(key, "mysql_enable_utf8")) { + imp_dbh->enable_utf8 = bool_value; +#endif } else { return FALSE; } @@ -1413,6 +1420,8 @@ /* Obsolete, as of 2.09! */ const char* msg = mysql_error(&imp_dbh->mysql); result = sv_2mortal(newSVpv(msg, strlen(msg))); + } else if (strEQ(key, "enable_utf8")) { + result = sv_2mortal(newSViv(imp_dbh->enable_utf8)); } break; case 'd': @@ -1748,7 +1757,14 @@ * **************************************************************************/ +int is_high_bit_set(char *val) { + while (*val++) + if (*val & 0x80) return 1; + return 0; +} + AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) { + D_imp_dbh_from_sth; int num_fields; int ChopBlanks; int i; @@ -1797,6 +1813,12 @@ } sv_setpvn(sv, col, len); + +#ifdef is_utf8_string + if (imp_dbh->enable_utf8 && + is_high_bit_set(col) && is_utf8_string(col, len)) + SvUTF8_on(sv); +#endif } else { (void) SvOK_off(sv); /* Field is NULL, return undef */ } diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h ../DBD-mysql-2.9003/dbdimp.h --- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h 2003-10-17 19:20:50.000000000 +0200 +++ ../DBD-mysql-2.9003/dbdimp.h 2004-06-09 22:06:06.000000000 +0300 @@ -114,6 +114,9 @@ unsigned int auto_reconnects_ok; unsigned int auto_reconnects_failed; } stats; +#ifdef is_utf8_string + bool enable_utf8; /* should we attempt to make utf8 strings? */ +#endif }; diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm ../DBD-mysql-2.9003/lib/DBD/mysql.pm --- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm 2003-10-27 05:26:08.000000000 +0200 +++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm 2004-06-09 22:54:21.000000000 +0300 @@ -867,6 +867,18 @@ AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will not automatically reconnect to the server. +=item mysql_enable_utf8 + +This attribute determines whether DBD::mysql should assume strings stored +in the database are utf8. This feature defaults to off. When set, and if +a retrieved string validates as utf8, then the magic flag on the string +is turned on, making perl use character semantics on it. You need to +turn this on if you store your data as utf8; otherwise you may notice +that although data is displayed correctly when retrieved, length() +returns results that are too large. + +This option is experimental and may change in future versions. + =head1 STATEMENT HANDLES The statement handles of DBD::mysql support a number diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch ../DBD-mysql-2.9003/mysql-utf8.0.patch --- /home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch 1970-01-01 02:00:00.000000000 +0200 +++ ../DBD-mysql-2.9003/mysql-utf8.0.patch 2004-06-09 22:55:00.000000000 +0300 @@ -0,0 +1,96 @@ +diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c ../DBD-mysql-2.9003/dbdimp.c +--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c 2003-10-17 19:20:50.000000000 +0200 ++++ ../DBD-mysql-2.9003/dbdimp.c 2004-06-09 22:15:03.000000000 +0300 +@@ -848,6 +848,9 @@ + imp_dbh->has_transactions = TRUE; + imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side + if we detect a mod_perl env. */ ++#ifdef is_utf8_string ++ imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */ ++#endif + + DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes); + if (sv && SvROK(sv)) { +@@ -1333,6 +1336,10 @@ + /*XXX: Does DBI handle the magic ? */ + imp_dbh->auto_reconnect = bool_value; + /* imp_dbh->mysql.reconnect=0; */ ++#ifdef is_utf8_string ++ } else if (strEQ(key, "mysql_enable_utf8")) { ++ imp_dbh->enable_utf8 = bool_value; ++#endif + } else { + return FALSE; + } +@@ -1413,6 +1420,8 @@ + /* Obsolete, as of 2.09! */ + const char* msg = mysql_error(&imp_dbh->mysql); + result = sv_2mortal(newSVpv(msg, strlen(msg))); ++ } else if (strEQ(key, "enable_utf8")) { ++ result = sv_2mortal(newSViv(imp_dbh->enable_utf8)); + } + break; + case 'd': +@@ -1748,7 +1757,14 @@ + * + **************************************************************************/ + ++int is_high_bit_set(char *val) { ++ while (*val++) ++ if (*val & 0x80) return 1; ++ return 0; ++} ++ + AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) { ++ D_imp_dbh_from_sth; + int num_fields; + int ChopBlanks; + int i; +@@ -1797,6 +1813,12 @@ + } + + sv_setpvn(sv, col, len); ++ ++#ifdef is_utf8_string ++ if (imp_dbh->enable_utf8 && ++ is_high_bit_set(col) && is_utf8_string(col, len)) ++ SvUTF8_on(sv); ++#endif + } else { + (void) SvOK_off(sv); /* Field is NULL, return undef */ + } +diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h ../DBD-mysql-2.9003/dbdimp.h +--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h 2003-10-17 19:20:50.000000000 +0200 ++++ ../DBD-mysql-2.9003/dbdimp.h 2004-06-09 22:06:06.000000000 +0300 +@@ -114,6 +114,9 @@ + unsigned int auto_reconnects_ok; + unsigned int auto_reconnects_failed; + } stats; ++#ifdef is_utf8_string ++ bool enable_utf8; /* should we attempt to make utf8 strings? */ ++#endif + }; + + +diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm ../DBD-mysql-2.9003/lib/DBD/mysql.pm +--- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm 2003-10-27 05:26:08.000000000 +0200 ++++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm 2004-06-09 22:54:21.000000000 +0300 +@@ -867,6 +867,18 @@ + AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will + not automatically reconnect to the server. + ++=item mysql_enable_utf8 ++ ++This attribute determines whether DBD::mysql should assume strings stored ++in the database are utf8. This feature defaults to off. When set, and if ++a retrieved string validates as utf8, then the magic flag on the string ++is turned on, making perl use character semantics on it. You need to ++turn this on if you store your data as utf8; otherwise you may notice ++that although data is displayed correctly when retrieved, length() ++returns results that are too large. ++ ++This option is experimental and may change in future versions. ++ + =head1 STATEMENT HANDLES + + The statement handles of DBD::mysql support a number