On Wed, Jun 09, 2004 at 04:01:09PM +0300, Gaal Yahas wrote:
> What do the maintainers of DBD::mysql say? Should the same style of fix
> be added to DBD::mysql? I'm willing to work on a patch if nobody else steps
> forward.
Patch follows. This works for me; thanks to Dominic Mitchell
<dom@stripped> for the Pg version this is based on.
--
Gaal Yahas <gaal@stripped>
http://gaal.livejournal.com/
diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c
../DBD-mysql-2.9003/dbdimp.c
--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c 2003-10-17 19:20:50.000000000 +0200
+++ ../DBD-mysql-2.9003/dbdimp.c 2004-06-09 22:15:03.000000000 +0300
@@ -848,6 +848,9 @@
imp_dbh->has_transactions = TRUE;
imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side
if we detect a mod_perl env. */
+#ifdef is_utf8_string
+ imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */
+#endif
DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes);
if (sv && SvROK(sv)) {
@@ -1333,6 +1336,10 @@
/*XXX: Does DBI handle the magic ? */
imp_dbh->auto_reconnect = bool_value;
/* imp_dbh->mysql.reconnect=0; */
+#ifdef is_utf8_string
+ } else if (strEQ(key, "mysql_enable_utf8")) {
+ imp_dbh->enable_utf8 = bool_value;
+#endif
} else {
return FALSE;
}
@@ -1413,6 +1420,8 @@
/* Obsolete, as of 2.09! */
const char* msg = mysql_error(&imp_dbh->mysql);
result = sv_2mortal(newSVpv(msg, strlen(msg)));
+ } else if (strEQ(key, "enable_utf8")) {
+ result = sv_2mortal(newSViv(imp_dbh->enable_utf8));
}
break;
case 'd':
@@ -1748,7 +1757,14 @@
*
**************************************************************************/
+int is_high_bit_set(char *val) {
+ while (*val++)
+ if (*val & 0x80) return 1;
+ return 0;
+}
+
AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) {
+ D_imp_dbh_from_sth;
int num_fields;
int ChopBlanks;
int i;
@@ -1797,6 +1813,12 @@
}
sv_setpvn(sv, col, len);
+
+#ifdef is_utf8_string
+ if (imp_dbh->enable_utf8 &&
+ is_high_bit_set(col) && is_utf8_string(col, len))
+ SvUTF8_on(sv);
+#endif
} else {
(void) SvOK_off(sv); /* Field is NULL, return undef */
}
diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h
../DBD-mysql-2.9003/dbdimp.h
--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h 2003-10-17 19:20:50.000000000 +0200
+++ ../DBD-mysql-2.9003/dbdimp.h 2004-06-09 22:06:06.000000000 +0300
@@ -114,6 +114,9 @@
unsigned int auto_reconnects_ok;
unsigned int auto_reconnects_failed;
} stats;
+#ifdef is_utf8_string
+ bool enable_utf8; /* should we attempt to make utf8 strings? */
+#endif
};
diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm
../DBD-mysql-2.9003/lib/DBD/mysql.pm
--- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm 2003-10-27 05:26:08.000000000
+0200
+++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm 2004-06-09 22:54:21.000000000 +0300
@@ -867,6 +867,18 @@
AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will
not automatically reconnect to the server.
+=item mysql_enable_utf8
+
+This attribute determines whether DBD::mysql should assume strings stored
+in the database are utf8. This feature defaults to off. When set, and if
+a retrieved string validates as utf8, then the magic flag on the string
+is turned on, making perl use character semantics on it. You need to
+turn this on if you store your data as utf8; otherwise you may notice
+that although data is displayed correctly when retrieved, length()
+returns results that are too large.
+
+This option is experimental and may change in future versions.
+
=head1 STATEMENT HANDLES
The statement handles of DBD::mysql support a number
diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch
../DBD-mysql-2.9003/mysql-utf8.0.patch
--- /home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch 1970-01-01
02:00:00.000000000 +0200
+++ ../DBD-mysql-2.9003/mysql-utf8.0.patch 2004-06-09 22:55:00.000000000 +0300
@@ -0,0 +1,96 @@
+diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c
../DBD-mysql-2.9003/dbdimp.c
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c 2003-10-17 19:20:50.000000000 +0200
++++ ../DBD-mysql-2.9003/dbdimp.c 2004-06-09 22:15:03.000000000 +0300
+@@ -848,6 +848,9 @@
+ imp_dbh->has_transactions = TRUE;
+ imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side
+ if we detect a mod_perl env. */
++#ifdef is_utf8_string
++ imp_dbh->enable_utf8 = FALSE; /* initialize mysql_enable_utf8 */
++#endif
+
+ DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes);
+ if (sv && SvROK(sv)) {
+@@ -1333,6 +1336,10 @@
+ /*XXX: Does DBI handle the magic ? */
+ imp_dbh->auto_reconnect = bool_value;
+ /* imp_dbh->mysql.reconnect=0; */
++#ifdef is_utf8_string
++ } else if (strEQ(key, "mysql_enable_utf8")) {
++ imp_dbh->enable_utf8 = bool_value;
++#endif
+ } else {
+ return FALSE;
+ }
+@@ -1413,6 +1420,8 @@
+ /* Obsolete, as of 2.09! */
+ const char* msg = mysql_error(&imp_dbh->mysql);
+ result = sv_2mortal(newSVpv(msg, strlen(msg)));
++ } else if (strEQ(key, "enable_utf8")) {
++ result = sv_2mortal(newSViv(imp_dbh->enable_utf8));
+ }
+ break;
+ case 'd':
+@@ -1748,7 +1757,14 @@
+ *
+ **************************************************************************/
+
++int is_high_bit_set(char *val) {
++ while (*val++)
++ if (*val & 0x80) return 1;
++ return 0;
++}
++
+ AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) {
++ D_imp_dbh_from_sth;
+ int num_fields;
+ int ChopBlanks;
+ int i;
+@@ -1797,6 +1813,12 @@
+ }
+
+ sv_setpvn(sv, col, len);
++
++#ifdef is_utf8_string
++ if (imp_dbh->enable_utf8 &&
++ is_high_bit_set(col) && is_utf8_string(col, len))
++ SvUTF8_on(sv);
++#endif
+ } else {
+ (void) SvOK_off(sv); /* Field is NULL, return undef */
+ }
+diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h
../DBD-mysql-2.9003/dbdimp.h
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h 2003-10-17 19:20:50.000000000 +0200
++++ ../DBD-mysql-2.9003/dbdimp.h 2004-06-09 22:06:06.000000000 +0300
+@@ -114,6 +114,9 @@
+ unsigned int auto_reconnects_ok;
+ unsigned int auto_reconnects_failed;
+ } stats;
++#ifdef is_utf8_string
++ bool enable_utf8; /* should we attempt to make utf8 strings? */
++#endif
+ };
+
+
+diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm
../DBD-mysql-2.9003/lib/DBD/mysql.pm
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm 2003-10-27 05:26:08.000000000
+0200
++++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm 2004-06-09 22:54:21.000000000 +0300
+@@ -867,6 +867,18 @@
+ AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will
+ not automatically reconnect to the server.
+
++=item mysql_enable_utf8
++
++This attribute determines whether DBD::mysql should assume strings stored
++in the database are utf8. This feature defaults to off. When set, and if
++a retrieved string validates as utf8, then the magic flag on the string
++is turned on, making perl use character semantics on it. You need to
++turn this on if you store your data as utf8; otherwise you may notice
++that although data is displayed correctly when retrieved, length()
++returns results that are too large.
++
++This option is experimental and may change in future versions.
++
+ =head1 STATEMENT HANDLES
+
+ The statement handles of DBD::mysql support a number