MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Ingo Struewing Date:September 30 2009 10:28am
Subject:bzr commit into mysql-5.4.5-next-mr branch (ingo.struewing:2880)
Bug#37267
View as plain text  
#At file:///home2/mydev/bzrroot/mysql-5.4-bug37267-2/ based on revid:mikael@stripped

 2880 Ingo Struewing	2009-09-30
      Bug#37267 - connect() EINPROGRESS failures mishandled in client library
      
      We cann connect() in a non-blocking mode to be able to specify a
      non-standard timeout.
      
      The problem was that we did not fetch the status from the
      non-blocking connect(). We assumed that poll() would not return
      a POLLIN flag if the connect failed. But on some platforms this
      is not true.
      
      After a successful poll() we do now retrieve the status value
      from connect() with getsockopt(...SO_ERROR...). Now we do know
      if (and how) the connect failed.
      
      The test case for my investigation was rpl.rlp_ssl1 on an
      Ubuntu 9.04 x86_64 machine. Both, IPV4 and IPV6 were active.
      'localhost' resolved first for IPV6 and then for IPV4. The
      connection over IPV6 was blocked. rpl.rlp_ssl1 timed out
      as it did not notice the failed connect(). The first read()
      failed, which was interpreted as a master crash and the
      connection was tried to reestablish with the same result
      until the retry limit was reached.
      
      With the fix, the connect() problem is immediately recognized,
      and the connect() is retried on the second resolution for
      'localhost', which is successful.
     @ libmysqld/libmysqld.c
        Bug#37267 - connect() EINPROGRESS failures mishandled in client library
        Changed a DBUG print string to distinguish the two mysql_real_connect()
        implementations in DBUG traces.
     @ mysql-test/include/wait_for_slave_param.inc
        Bug#37267 - connect() EINPROGRESS failures mishandled in client library
        Made timeout value available in error message.
     @ mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
        Bug#37267 - connect() EINPROGRESS failures mishandled in client library
        Fixed test result. Connect error is now detected as CR_CONN_HOST_ERROR
        (2003) instead of CR_SERVER_LOST (2013).
     @ sql-common/client.c
        Bug#37267 - connect() EINPROGRESS failures mishandled in client library
        Added retrieval of the error code from the non-blocking connect().
        Added DBUG.
        Added comment.

    modified:
      libmysqld/libmysqld.c
      mysql-test/include/wait_for_slave_param.inc
      mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
      sql-common/client.c
=== modified file 'libmysqld/libmysqld.c'
--- a/libmysqld/libmysqld.c	2008-02-28 17:55:46 +0000
+++ b/libmysqld/libmysqld.c	2009-09-30 10:28:15 +0000
@@ -99,7 +99,7 @@ mysql_real_connect(MYSQL *mysql,const ch
   char name_buff[USERNAME_LENGTH];
 
   DBUG_ENTER("mysql_real_connect");
-  DBUG_PRINT("enter",("host: %s  db: %s  user: %s",
+  DBUG_PRINT("enter",("host: %s  db: %s  user: %s (libmysqld)",
 		      host ? host : "(Null)",
 		      db ? db : "(Null)",
 		      user ? user : "(Null)"));

=== modified file 'mysql-test/include/wait_for_slave_param.inc'
--- a/mysql-test/include/wait_for_slave_param.inc	2009-01-09 14:12:31 +0000
+++ b/mysql-test/include/wait_for_slave_param.inc	2009-09-30 10:28:15 +0000
@@ -49,6 +49,8 @@ if (!$_slave_timeout_counter)
 {
   let $_slave_timeout_counter= 3000;
 }
+# Save resulting counter for later use.
+let $slave_tcnt= $_slave_timeout_counter;
 
 let $_slave_param_comparison= $slave_param_comparison;
 if (`SELECT '$_slave_param_comparison' = ''`)
@@ -70,7 +72,7 @@ while (`SELECT NOT('$_show_slave_status_
 # This has to be outside the loop until BUG#41913 has been fixed
 if (!$_slave_timeout_counter)
 {
-  --echo **** ERROR: timeout after $slave_timeout seconds while waiting for slave parameter $slave_param $_slave_param_comparison $slave_param_value ****
+  --echo **** ERROR: timeout after $slave_tcnt deci-seconds while waiting for slave parameter $slave_param $_slave_param_comparison $slave_param_value ****
   if (`SELECT '$slave_error_message' != ''`)
   {
     --echo Message: $slave_error_message

=== modified file 'mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result'
--- a/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result	2009-09-05 16:50:31 +0000
+++ b/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result	2009-09-30 10:28:15 +0000
@@ -18,7 +18,7 @@ start slave;
 SELECT RELEASE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP");
 RELEASE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP")
 1
-Slave_IO_Errno= 2013
+Slave_IO_Errno= 2003
 SELECT IS_FREE_LOCK("debug_lock.before_get_SERVER_ID");
 IS_FREE_LOCK("debug_lock.before_get_SERVER_ID")
 1
@@ -31,7 +31,7 @@ start slave;
 SELECT RELEASE_LOCK("debug_lock.before_get_SERVER_ID");
 RELEASE_LOCK("debug_lock.before_get_SERVER_ID")
 1
-Slave_IO_Errno= 2013
+Slave_IO_Errno= 2003
 set global debug= '';
 reset master;
 include/stop_slave.inc

=== modified file 'sql-common/client.c'
--- a/sql-common/client.c	2009-08-31 19:40:33 +0000
+++ b/sql-common/client.c	2009-09-30 10:28:15 +0000
@@ -145,9 +145,12 @@ int my_connect(my_socket fd, const struc
 	       uint timeout)
 {
 #if defined(__WIN__) || defined(__NETWARE__)
-  return connect(fd, (struct sockaddr*) name, namelen);
+  DBUG_ENTER("my_connect");
+  DBUG_RETURN(connect(fd, (struct sockaddr*) name, namelen));
 #else
   int flags, res, s_err;
+  DBUG_ENTER("my_connect");
+  DBUG_PRINT("enter", ("fd: %d  timeout: %u", fd, timeout));
 
   /*
     If they passed us a timeout of zero, we should behave
@@ -155,24 +158,26 @@ int my_connect(my_socket fd, const struc
   */
 
   if (timeout == 0)
-    return connect(fd, (struct sockaddr*) name, namelen);
+    DBUG_RETURN(connect(fd, (struct sockaddr*) name, namelen));
 
   flags = fcntl(fd, F_GETFL, 0);	  /* Set socket to not block */
 #ifdef O_NONBLOCK
   fcntl(fd, F_SETFL, flags | O_NONBLOCK);  /* and save the flags..  */
 #endif
 
+  DBUG_PRINT("info", ("connecting non-blocking"));
   res= connect(fd, (struct sockaddr*) name, namelen);
+  DBUG_PRINT("info", ("connect result: %d  errno: %d", res, errno));
   s_err= errno;			/* Save the error... */
   fcntl(fd, F_SETFL, flags);
   if ((res != 0) && (s_err != EINPROGRESS))
   {
     errno= s_err;			/* Restore it */
-    return(-1);
+    DBUG_RETURN(-1);
   }
   if (res == 0)				/* Connected quickly! */
-    return(0);
-  return wait_for_data(fd, timeout);
+    DBUG_RETURN(0);
+  DBUG_RETURN(wait_for_data(fd, timeout));
 #endif
 }
 
@@ -191,26 +196,58 @@ static int wait_for_data(my_socket fd, u
 #ifdef HAVE_POLL
   struct pollfd ufds;
   int res;
+  DBUG_ENTER("wait_for_data");
 
+  DBUG_PRINT("info", ("polling"));
   ufds.fd= fd;
   ufds.events= POLLIN | POLLPRI;
   if (!(res= poll(&ufds, 1, (int) timeout*1000)))
   {
+    DBUG_PRINT("info", ("poll timed out"));
     errno= EINTR;
-    return -1;
+    DBUG_RETURN(-1);
   }
+  DBUG_PRINT("info",
+             ("poll result: %d  errno: %d  revents: 0x%02d  events: 0x%02d",
+              res, errno, ufds.revents, ufds.events));
   if (res < 0 || !(ufds.revents & (POLLIN | POLLPRI)))
-    return -1;
-  return 0;
+    DBUG_RETURN(-1);
+  /*
+    At this point, we know that something happened on the socket.
+    But this does not means that everything is alright.
+    The connect might have failed. We need to retrieve the error code
+    from the socket layer. We must return success only if we are sure
+    that it was really a success. Otherwise we might prevent the caller
+    from trying another address to connect to.
+  */
+  {
+    int         s_err;
+    socklen_t   s_len= sizeof(s_err);
+
+    DBUG_PRINT("info", ("Get SO_ERROR from non-blocked connected socket."));
+    res= getsockopt(fd, SOL_SOCKET, SO_ERROR, &s_err, &s_len);
+    DBUG_PRINT("info", ("getsockopt res: %d  s_err: %d", res, s_err));
+    if (res)
+      DBUG_RETURN(res);
+    /* getsockopt() was successful, check the retrieved status value. */
+    if (s_err)
+    {
+      errno= s_err;
+      DBUG_RETURN(-1);
+    }
+    /* Status from connect() is zero. Socket is successfully connected. */
+  }
+  DBUG_RETURN(0);
 #else
   SOCKOPT_OPTLEN_TYPE s_err_size = sizeof(uint);
   fd_set sfds;
   struct timeval tv;
   time_t start_time, now_time;
   int res, s_err;
+  DBUG_ENTER("wait_for_data");
 
   if (fd >= FD_SETSIZE)				/* Check if wrong error */
-    return 0;					/* Can't use timeout */
+    DBUG_RETURN(0);					/* Can't use timeout */
 
   /*
     Our connection is "in progress."  We can use the select() call to wait
@@ -250,11 +287,11 @@ static int wait_for_data(my_socket fd, u
       break;
 #endif
     if (res == 0)					/* timeout */
-      return -1;
+      DBUG_RETURN(-1);
     now_time= my_time(0);
     timeout-= (uint) (now_time - start_time);
     if (errno != EINTR || (int) timeout <= 0)
-      return -1;
+      DBUG_RETURN(-1);
   }
 
   /*
@@ -265,14 +302,14 @@ static int wait_for_data(my_socket fd, u
 
   s_err=0;
   if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (char*) &s_err, &s_err_size) != 0)
-    return(-1);
+    DBUG_RETURN(-1);
 
   if (s_err)
   {						/* getsockopt could succeed */
     errno = s_err;
-    return(-1);					/* but return an error... */
+    DBUG_RETURN(-1);					/* but return an error... */
   }
-  return (0);					/* ok */
+  DBUG_RETURN(0);					/* ok */
 #endif /* HAVE_POLL */
 }
 #endif /* defined(__WIN__) || defined(__NETWARE__) */
@@ -1877,7 +1914,7 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
   init_sigpipe_variables
   DBUG_ENTER("mysql_real_connect");
 
-  DBUG_PRINT("enter",("host: %s  db: %s  user: %s",
+  DBUG_PRINT("enter",("host: %s  db: %s  user: %s (client)",
 		      host ? host : "(Null)",
 		      db ? db : "(Null)",
 		      user ? user : "(Null)"));
@@ -1927,6 +1964,7 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
     unix_socket=mysql->options.unix_socket;
 
   mysql->server_status=SERVER_STATUS_AUTOCOMMIT;
+  DBUG_PRINT("info", ("Connecting"));
 
   /*
     Part 0: Grab a socket and connect it to the server
@@ -1936,6 +1974,7 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
        mysql->options.protocol == MYSQL_PROTOCOL_MEMORY) &&
       (!host || !strcmp(host,LOCAL_HOST)))
   {
+    DBUG_PRINT("info", ("Using shared memory"));
     if ((create_shared_memory(mysql,net, mysql->options.connect_timeout)) ==
 	INVALID_HANDLE_VALUE)
     {
@@ -2034,6 +2073,8 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
     }
   }
 #endif
+  DBUG_PRINT("info", ("net->vio: %p  protocol: %d",
+                      net->vio, mysql->options.protocol));
   if (!net->vio &&
       (!mysql->options.protocol ||
        mysql->options.protocol == MYSQL_PROTOCOL_TCP))
@@ -2105,6 +2146,11 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
                min(sizeof(sock_addr.sin_addr), (size_t) hp->h_length));
         DBUG_PRINT("info",("Trying %s...",
                           (my_inet_ntoa(sock_addr.sin_addr, ipaddr), ipaddr)));
+        /*
+          Here we rely on my_connect() to return success only if the
+          connect attempt was really successful. Otherwise we would stop
+          trying another address, believing we were successful.
+        */
         status= my_connect(sock, (struct sockaddr *) &sock_addr,
                            sizeof(sock_addr), mysql->options.connect_timeout);
       }
@@ -2163,6 +2209,7 @@ CLI_MYSQL_REAL_CONNECT(MYSQL *mysql,cons
   /*
     Part 1: Connection established, read and parse first packet
   */
+  DBUG_PRINT("info", ("Read first packet."));
 
   if ((pkt_length=cli_safe_read(mysql)) == packet_error)
   {


Attachment: [text/bzr-bundle] bzr/ingo.struewing@sun.com-20090930102815-y6u20igtxwwx4pai.bundle
Thread
bzr commit into mysql-5.4.5-next-mr branch (ingo.struewing:2880)Bug#37267Ingo Struewing30 Sep