From: Date: June 25 2005 2:16pm Subject: [patch] DBD::mysql UTF-8 handling List-Archive: http://lists.mysql.com/perl/3563 Message-Id: <20050625121615.GA8358@kashome.dyn.jankratochvil.net> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="C7zPtVaVf+AK4Oqc" --C7zPtVaVf+AK4Oqc Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hi, yet another mail in the utf8 saga. Attached patch is just a followup to: http://lists.mysql.com/perl/3006?f=plain but also implementing proper _writing_ of utf-8 data to MySQL. The patch above worked fine for me for _reading_ but I was unable to find out any way how to write them properly in utf-8. Be aware you need to use for tests (%ENV{LC_*/LANG} are not enough): mysql --default-character-set=utf8 ... Tests are not written for this patch. Regards, Lace --C7zPtVaVf+AK4Oqc Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="DBD-mysql-2.9008-lace_utf8_0.patch" diff -ru DBD-mysql-2.9008-orig/dbdimp.c DBD-mysql-2.9008/dbdimp.c --- DBD-mysql-2.9008-orig/dbdimp.c 2005-05-25 07:11:08.000000000 +0200 +++ DBD-mysql-2.9008/dbdimp.c 2005-06-25 13:34:25.000000000 +0200 @@ -934,6 +934,15 @@ client_flag &= ~CLIENT_FOUND_ROWS; } } + if ((svp = hv_fetch(hv, "mysql_enable_utf8", 17, + FALSE)) && *svp) { + /* Do not: imp_dbh->enable_utf8 ? + * as we are called earlier than it is set + * and mysql_options() must be before: mysql_real_connect() + */ + mysql_options(sock, MYSQL_SET_CHARSET_NAME, + (SvTRUE(*svp) ? "utf8" : "latin1")); + } #if defined(DBD_MYSQL_WITH_SSL) && \ (defined(CLIENT_SSL) || (MYSQL_VERSION_ID >= 40000)) if ((svp = hv_fetch(hv, "mysql_ssl", 9, FALSE)) && *svp) { @@ -1119,6 +1128,9 @@ imp_dbh->has_transactions = TRUE; imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side if we detect a mod_perl env. */ +#ifdef is_utf8_string + imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */ +#endif if (!_MyLogin(imp_dbh)) { do_error(dbh, mysql_errno(&imp_dbh->mysql), @@ -1399,6 +1411,10 @@ } else if (key_len == 31 && strEQ(key,"mysql_unsafe_bind_type_guessing") ) imp_dbh->bind_type_guessing = SvIV(valuesv); +#ifdef is_utf8_string + else if (strEQ(key, "mysql_enable_utf8")) + imp_dbh->enable_utf8 = bool_value; +#endif else return FALSE; @@ -1483,6 +1499,8 @@ /* Obsolete, as of 2.09! */ const char* msg = mysql_error(&imp_dbh->mysql); result = sv_2mortal(newSVpv(msg, strlen(msg))); + } else if (strEQ(key, "enable_utf8")) { + result = sv_2mortal(newSViv(imp_dbh->enable_utf8)); } break; case 'd': @@ -1843,6 +1861,12 @@ * **************************************************************************/ +int is_high_bit_set(char *val) { + while (*val++) + if (*val & 0x80) return 1; + return 0; +} + AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) { int num_fields; int ChopBlanks; @@ -1893,6 +1917,12 @@ } sv_setpvn(sv, col, len); + +#ifdef is_utf8_string + if (imp_dbh->enable_utf8 && + is_high_bit_set(col) && is_utf8_string(col, len)) + SvUTF8_on(sv); +#endif } else { (void) SvOK_off(sv); /* Field is NULL, return undef */ } diff -ru DBD-mysql-2.9008-orig/dbdimp.h DBD-mysql-2.9008/dbdimp.h --- DBD-mysql-2.9008-orig/dbdimp.h 2005-04-13 04:22:41.000000000 +0200 +++ DBD-mysql-2.9008/dbdimp.h 2005-06-25 11:26:37.000000000 +0200 @@ -115,6 +115,9 @@ unsigned int auto_reconnects_failed; } stats; unsigned short int bind_type_guessing; +#ifdef is_utf8_string + bool enable_utf8; /* should we attempt to make utf8 strings? */ +#endif }; diff -ru DBD-mysql-2.9008-orig/lib/DBD/mysql.pm DBD-mysql-2.9008/lib/DBD/mysql.pm --- DBD-mysql-2.9008-orig/lib/DBD/mysql.pm 2005-06-06 02:57:25.000000000 +0200 +++ DBD-mysql-2.9008/lib/DBD/mysql.pm 2005-06-25 11:26:01.000000000 +0200 @@ -879,6 +879,18 @@ AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will not automatically reconnect to the server. +=item mysql_enable_utf8 + +This attribute determines whether DBD::mysql should assume strings stored +in the database are utf8. This feature defaults to off. When set, and if +a retrieved string validates as utf8, then the magic flag on the string +is turned on, making perl use character semantics on it. You need to +turn this on if you store your data as utf8; otherwise you may notice +that although data is displayed correctly when retrieved, length() +returns results that are too large. + +This option is experimental and may change in future versions. + =head1 STATEMENT HANDLES The statement handles of DBD::mysql support a number --C7zPtVaVf+AK4Oqc--