List:MySQL and Perl« Previous MessageNext Message »
From:Gaal Yahas Date:June 9 2004 8:01pm
Subject:[PATCH] Re: blessing db data as utf8
View as plain text  
On Wed, Jun 09, 2004 at 04:01:09PM +0300, Gaal Yahas wrote:
> What do the maintainers of DBD::mysql say? Should the same style of fix
> be added to DBD::mysql? I'm willing to work on a patch if nobody else steps
> forward.

Patch follows. This works for me; thanks to Dominic Mitchell
<dom@stripped> for the Pg version this is based on.

-- 
Gaal Yahas <gaal@stripped>
http://gaal.livejournal.com/


diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c
../DBD-mysql-2.9003/dbdimp.c
--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c	2003-10-17 19:20:50.000000000 +0200
+++ ../DBD-mysql-2.9003/dbdimp.c	2004-06-09 22:15:03.000000000 +0300
@@ -848,6 +848,9 @@
       imp_dbh->has_transactions = TRUE;
       imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side 
                                          if we detect a mod_perl env. */
+#ifdef is_utf8_string
+      imp_dbh->enable_utf8 = FALSE;  /* initialize mysql_enable_utf8 */
+#endif
 
       DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes);
       if (sv  &&  SvROK(sv)) {
@@ -1333,6 +1336,10 @@
         /*XXX: Does DBI handle the magic ? */
 	imp_dbh->auto_reconnect = bool_value;
 	/* imp_dbh->mysql.reconnect=0; */
+#ifdef is_utf8_string
+    } else if (strEQ(key, "mysql_enable_utf8")) {
+	    imp_dbh->enable_utf8 = bool_value;
+#endif
     } else {
         return FALSE;
     }
@@ -1413,6 +1420,8 @@
 	/* Obsolete, as of 2.09! */
 	const char* msg = mysql_error(&imp_dbh->mysql);
 	result = sv_2mortal(newSVpv(msg, strlen(msg)));
+      } else if (strEQ(key, "enable_utf8")) {
+                result = sv_2mortal(newSViv(imp_dbh->enable_utf8));
       }
       break;
     case 'd':
@@ -1748,7 +1757,14 @@
  *
  **************************************************************************/
 
+int is_high_bit_set(char *val) {
+    while (*val++)
+      if (*val & 0x80) return 1;
+    return 0;
+}
+
 AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) {
+    D_imp_dbh_from_sth;
     int num_fields;
     int ChopBlanks;
     int i;
@@ -1797,6 +1813,12 @@
 	}
 
 	sv_setpvn(sv, col, len);
+
+#ifdef is_utf8_string
+        if (imp_dbh->enable_utf8 &&
+            is_high_bit_set(col) && is_utf8_string(col, len))
+          SvUTF8_on(sv);
+#endif
       } else {
 	(void) SvOK_off(sv);  /*  Field is NULL, return undef  */
       }
diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h
../DBD-mysql-2.9003/dbdimp.h
--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h	2003-10-17 19:20:50.000000000 +0200
+++ ../DBD-mysql-2.9003/dbdimp.h	2004-06-09 22:06:06.000000000 +0300
@@ -114,6 +114,9 @@
 	    unsigned int auto_reconnects_ok;
 	    unsigned int auto_reconnects_failed;
     } stats;
+#ifdef is_utf8_string
+    bool enable_utf8;       /* should we attempt to make utf8 strings? */
+#endif
 };
 
 
diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm
../DBD-mysql-2.9003/lib/DBD/mysql.pm
--- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm	2003-10-27 05:26:08.000000000
+0200
+++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm	2004-06-09 22:54:21.000000000 +0300
@@ -867,6 +867,18 @@
 AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will
 not automatically reconnect to the server.
 
+=item mysql_enable_utf8
+
+This attribute determines whether DBD::mysql should assume strings stored
+in the database are utf8. This feature defaults to off. When set, and if
+a retrieved string validates as utf8, then the magic flag on the string
+is turned on, making perl use character semantics on it. You need to
+turn this on if you store your data as utf8; otherwise you may notice
+that although data is displayed correctly when retrieved, length()
+returns results that are too large.
+
+This option is experimental and may change in future versions.
+
 =head1 STATEMENT HANDLES
 
 The statement handles of DBD::mysql support a number
diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch
../DBD-mysql-2.9003/mysql-utf8.0.patch
--- /home/roo/.cpan/build/DBD-mysql-2.9003/mysql-utf8.0.patch	1970-01-01
02:00:00.000000000 +0200
+++ ../DBD-mysql-2.9003/mysql-utf8.0.patch	2004-06-09 22:55:00.000000000 +0300
@@ -0,0 +1,96 @@
+diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c
../DBD-mysql-2.9003/dbdimp.c
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.c	2003-10-17 19:20:50.000000000 +0200
++++ ../DBD-mysql-2.9003/dbdimp.c	2004-06-09 22:15:03.000000000 +0300
+@@ -848,6 +848,9 @@
+       imp_dbh->has_transactions = TRUE;
+       imp_dbh->auto_reconnect = FALSE; /* Safer we flip this to TRUE perl side 
+                                          if we detect a mod_perl env. */
++#ifdef is_utf8_string
++      imp_dbh->enable_utf8 = FALSE;  /* initialize mysql_enable_utf8 */
++#endif
+ 
+       DBIc_set(imp_dbh, DBIcf_AutoCommit, &sv_yes);
+       if (sv  &&  SvROK(sv)) {
+@@ -1333,6 +1336,10 @@
+         /*XXX: Does DBI handle the magic ? */
+ 	imp_dbh->auto_reconnect = bool_value;
+ 	/* imp_dbh->mysql.reconnect=0; */
++#ifdef is_utf8_string
++    } else if (strEQ(key, "mysql_enable_utf8")) {
++	    imp_dbh->enable_utf8 = bool_value;
++#endif
+     } else {
+         return FALSE;
+     }
+@@ -1413,6 +1420,8 @@
+ 	/* Obsolete, as of 2.09! */
+ 	const char* msg = mysql_error(&imp_dbh->mysql);
+ 	result = sv_2mortal(newSVpv(msg, strlen(msg)));
++      } else if (strEQ(key, "enable_utf8")) {
++                result = sv_2mortal(newSViv(imp_dbh->enable_utf8));
+       }
+       break;
+     case 'd':
+@@ -1748,7 +1757,14 @@
+  *
+  **************************************************************************/
+ 
++int is_high_bit_set(char *val) {
++    while (*val++)
++      if (*val & 0x80) return 1;
++    return 0;
++}
++
+ AV* dbd_st_fetch(SV* sth, imp_sth_t* imp_sth) {
++    D_imp_dbh_from_sth;
+     int num_fields;
+     int ChopBlanks;
+     int i;
+@@ -1797,6 +1813,12 @@
+ 	}
+ 
+ 	sv_setpvn(sv, col, len);
++
++#ifdef is_utf8_string
++        if (imp_dbh->enable_utf8 &&
++            is_high_bit_set(col) && is_utf8_string(col, len))
++          SvUTF8_on(sv);
++#endif
+       } else {
+ 	(void) SvOK_off(sv);  /*  Field is NULL, return undef  */
+       }
+diff -uraN -X /home/roo/diff-exclude /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h
../DBD-mysql-2.9003/dbdimp.h
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/dbdimp.h	2003-10-17 19:20:50.000000000 +0200
++++ ../DBD-mysql-2.9003/dbdimp.h	2004-06-09 22:06:06.000000000 +0300
+@@ -114,6 +114,9 @@
+ 	    unsigned int auto_reconnects_ok;
+ 	    unsigned int auto_reconnects_failed;
+     } stats;
++#ifdef is_utf8_string
++    bool enable_utf8;       /* should we attempt to make utf8 strings? */
++#endif
+ };
+ 
+ 
+diff -uraN -X /home/roo/diff-exclude
/home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm
../DBD-mysql-2.9003/lib/DBD/mysql.pm
+--- /home/roo/.cpan/build/DBD-mysql-2.9003/lib/DBD/mysql.pm	2003-10-27 05:26:08.000000000
+0200
++++ ../DBD-mysql-2.9003/lib/DBD/mysql.pm	2004-06-09 22:54:21.000000000 +0300
+@@ -867,6 +867,18 @@
+ AutoCommit is turned off, and when AutoCommit is turned off, DBD::mysql will
+ not automatically reconnect to the server.
+ 
++=item mysql_enable_utf8
++
++This attribute determines whether DBD::mysql should assume strings stored
++in the database are utf8. This feature defaults to off. When set, and if
++a retrieved string validates as utf8, then the magic flag on the string
++is turned on, making perl use character semantics on it. You need to
++turn this on if you store your data as utf8; otherwise you may notice
++that although data is displayed correctly when retrieved, length()
++returns results that are too large.
++
++This option is experimental and may change in future versions.
++
+ =head1 STATEMENT HANDLES
+ 
+ The statement handles of DBD::mysql support a number
Thread
blessing db data as utf8Gaal Yahas9 Jun
  • Re: blessing db data as utf8Jochen Wiedmann9 Jun
    • Re: blessing db data as utf8Gaal Yahas9 Jun
      • Re: blessing db data as utf8Jochen Wiedmann9 Jun
        • Re: blessing db data as utf8Gaal Yahas9 Jun
  • [PATCH] Re: blessing db data as utf8Gaal Yahas9 Jun
    • Re: [PATCH] Re: blessing db data as utf8Jochen Wiedmann10 Jun
      • Re: [PATCH] Re: blessing db data as utf8Gaal Yahas10 Jun
      • Re: [PATCH] Re: blessing db data as utf8Steve Hay10 Jun
        • Re: [PATCH] Re: blessing db data as utf8Gaal Yahas10 Jun
          • Re: [PATCH] Re: blessing db data as utf8Steve Hay11 Jun
            • Re: [PATCH] Re: blessing db data as utf8Gaal Yahas11 Jun
              • Re: [PATCH] Re: blessing db data as utf8Steve Hay11 Jun
Re: blessing db data as utf8Gaal Yahas10 Jun