#At file:///media/sdb2/hezx/work/mysql/bzrwork/semisync/6.0-rpl/ based on revid:zhenxing.he@stripped
2858 He Zhenxing 2009-05-31
WL#1720 Semi-synchronous replication
Rewrite semi-sync patch from google with the semi-synchronous
replication interface.
A mysql-test/suite/rpl/r/rpl_semi_sync.result
A mysql-test/suite/rpl/t/rpl_semi_sync-master.opt
A mysql-test/suite/rpl/t/rpl_semi_sync-slave.opt
A mysql-test/suite/rpl/t/rpl_semi_sync.test
A plugin/semisync/
A plugin/semisync/Makefile.am
A plugin/semisync/configure.in
A plugin/semisync/plug.in
A plugin/semisync/semisync.cc
A plugin/semisync/semisync.h
A plugin/semisync/semisync_master.cc
A plugin/semisync/semisync_master.h
A plugin/semisync/semisync_master_plugin.cc
A plugin/semisync/semisync_slave.cc
A plugin/semisync/semisync_slave.h
A plugin/semisync/semisync_slave_plugin.cc
=== added file 'mysql-test/suite/rpl/r/rpl_semi_sync.result'
--- a/mysql-test/suite/rpl/r/rpl_semi_sync.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync.result 2009-05-31 08:35:06 +0000
@@ -0,0 +1,217 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+[ on master ]
+[ default state of semi-sync on master should be OFF ]
+show variables like 'rpl_semi_sync_master_enabled';
+Variable_name Value
+rpl_semi_sync_master_enabled OFF
+[ enable semi-sync on master ]
+set global rpl_semi_sync_master_enabled = 1;
+show variables like 'rpl_semi_sync_master_enabled';
+Variable_name Value
+rpl_semi_sync_master_enabled ON
+[ on slave ]
+stop slave;
+[ default state of semi-sync on slave should be OFF ]
+show variables like 'rpl_semi_sync_slave_enabled';
+Variable_name Value
+rpl_semi_sync_slave_enabled OFF
+[ enable semi-sync on slave ]
+set global rpl_semi_sync_slave_enabled = 1;
+show variables like 'rpl_semi_sync_slave_enabled';
+Variable_name Value
+rpl_semi_sync_slave_enabled ON
+start slave;
+[ on master ]
+[ initial master state after the semi-sync slave connected ]
+show status like 'Rpl_semi_sync_master_clients';
+Variable_name Value
+Rpl_semi_sync_master_clients 1
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 0
+create table t1(n int) engine = ENGINE_TYPE;
+[ master state after CREATE TABLE statement ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 1
+[ insert records to table ]
+[ master status after inserts ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 301
+[ on slave ]
+[ slave status after replicated inserts ]
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status ON
+select count(distinct n) from t1;
+count(distinct n)
+300
+select min(n) from t1;
+min(n)
+1
+select max(n) from t1;
+max(n)
+300
+stop slave;
+[ on master ]
+[ master status should be ON ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 301
+show status like 'Rpl_semi_sync_master_clients';
+Variable_name Value
+Rpl_semi_sync_master_clients 1
+[ semi-sync replication of these transactions will fail ]
+begin;
+insert into t1 values (500);
+delete from t1 where n < 500;
+commit;
+insert into t1 values (100);
+[ master status should be OFF ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status OFF
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 2
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 301
+show status like 'Rpl_semi_sync_master_clients';
+Variable_name Value
+Rpl_semi_sync_master_clients 0
+[ on slave ]
+[ slave status should be OFF ]
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status OFF
+start slave;
+[ slave status should be ON ]
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status ON
+select count(distinct n) from t1;
+count(distinct n)
+2
+select min(n) from t1;
+min(n)
+100
+select max(n) from t1;
+max(n)
+500
+[ on master ]
+[ do something to activate semi-sync ]
+drop table t1;
+[ master status should be ON again ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 2
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 302
+show status like 'Rpl_semi_sync_master_clients';
+Variable_name Value
+Rpl_semi_sync_master_clients 1
+[ on slave ]
+stop slave;
+[ on master ]
+show master logs;
+Log_name master-bin.000001
+File_size #
+show variables like 'rpl_semi_sync_master_enabled';
+Variable_name Value
+rpl_semi_sync_master_enabled ON
+[ disable semi-sync on the fly ]
+set global rpl_semi_sync_master_enabled=0;
+show variables like 'rpl_semi_sync_master_enabled';
+Variable_name Value
+rpl_semi_sync_master_enabled OFF
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status OFF
+[ enable semi-sync on the fly ]
+set global rpl_semi_sync_master_enabled=1;
+show variables like 'rpl_semi_sync_master_enabled';
+Variable_name Value
+rpl_semi_sync_master_enabled ON
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+[ on slave ]
+start slave;
+[ on master ]
+create table t1 (a int) engine = ENGINE_TYPE;
+drop table t1;
+show status like 'Rpl_relay%';
+Variable_name Value
+[ test reset master ]
+[ on master]
+reset master;
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 0
+[ on slave ]
+stop slave;
+reset slave;
+start slave;
+[ on master ]
+create table t1 (a int) engine = ENGINE_TYPE;
+insert into t1 values (1);
+insert into t1 values (2), (3);
+[ on slave ]
+select * from t1;
+a
+1
+2
+3
+[ on master ]
+show status like 'Rpl_semi_sync_master_status';
+Variable_name Value
+Rpl_semi_sync_master_status ON
+show status like 'Rpl_semi_sync_master_no_tx';
+Variable_name Value
+Rpl_semi_sync_master_no_tx 0
+show status like 'Rpl_semi_sync_master_yes_tx';
+Variable_name Value
+Rpl_semi_sync_master_yes_tx 3
+[ cleanup ]
+drop table t1;
=== added file 'mysql-test/suite/rpl/t/rpl_semi_sync-master.opt'
--- a/mysql-test/suite/rpl/t/rpl_semi_sync-master.opt 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync-master.opt 2009-05-31 08:35:06 +0000
@@ -0,0 +1 @@
+--plugin_dir=../plugin/semisync/.libs
=== added file 'mysql-test/suite/rpl/t/rpl_semi_sync-slave.opt'
--- a/mysql-test/suite/rpl/t/rpl_semi_sync-slave.opt 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync-slave.opt 2009-05-31 08:35:06 +0000
@@ -0,0 +1 @@
+--plugin_dir=../plugin/semisync/.libs
=== added file 'mysql-test/suite/rpl/t/rpl_semi_sync.test'
--- a/mysql-test/suite/rpl/t/rpl_semi_sync.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync.test 2009-05-31 08:35:06 +0000
@@ -0,0 +1,257 @@
+source include/have_innodb.inc;
+source include/master-slave.inc;
+
+let $engine_type= InnoDB;
+
+connection master;
+echo [ on master ];
+
+disable_query_log;
+let $value = query_get_value(show variables like 'rpl_semi_sync_master_enabled', Value, 1);
+if (`select '$value' = 'No such row'`)
+{
+ set sql_log_bin=0;
+ INSTALL PLUGIN rpl_semi_sync_master SONAME 'libsemisync_master.so';
+ set global rpl_semi_sync_master_timeout= 5000; /* 5s */
+ set sql_log_bin=1;
+}
+enable_query_log;
+
+echo [ default state of semi-sync on master should be OFF ];
+show variables like 'rpl_semi_sync_master_enabled';
+
+echo [ enable semi-sync on master ];
+set global rpl_semi_sync_master_enabled = 1;
+show variables like 'rpl_semi_sync_master_enabled';
+
+connection slave;
+echo [ on slave ];
+
+# Restart I/O thread to make sure that semi-sync is caught up.
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+disable_query_log;
+let $value= query_get_value(show variables like 'rpl_semi_sync_slave_enabled', Value, 1);
+if (`select '$value' = 'No such row'`)
+{
+ set sql_log_bin=0;
+ INSTALL PLUGIN rpl_semi_sync_slave SONAME 'libsemisync_slave.so';
+ set sql_log_bin=1;
+}
+enable_query_log;
+
+echo [ default state of semi-sync on slave should be OFF ];
+show variables like 'rpl_semi_sync_slave_enabled';
+
+echo [ enable semi-sync on slave ];
+set global rpl_semi_sync_slave_enabled = 1;
+show variables like 'rpl_semi_sync_slave_enabled';
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+echo [ on master ];
+
+# NOTE: Rpl_semi_sync_master_client will only be updated when
+# semi-sync slave has started binlog dump request
+let $status_var= Rpl_semi_sync_master_clients;
+let $status_var_value= 1;
+source include/wait_for_status_var.inc;
+
+echo [ initial master state after the semi-sync slave connected ];
+show status like 'Rpl_semi_sync_master_clients';
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+
+replace_result $engine_type ENGINE_TYPE;
+eval create table t1(n int) engine = $engine_type;
+
+echo [ master state after CREATE TABLE statement ];
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+
+let $i=300;
+echo [ insert records to table ];
+disable_query_log;
+while ($i)
+{
+ eval insert into t1 values ($i);
+ dec $i;
+}
+enable_query_log;
+
+echo [ master status after inserts ];
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+
+sync_slave_with_master;
+echo [ on slave ];
+
+echo [ slave status after replicated inserts ];
+show status like 'Rpl_semi_sync_slave_status';
+
+select count(distinct n) from t1;
+select min(n) from t1;
+select max(n) from t1;
+
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+echo [ on master ];
+
+# The first semi-sync check should be on because after slave stop,
+# there are no transactions on the master.
+echo [ master status should be ON ];
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+show status like 'Rpl_semi_sync_master_clients';
+
+echo [ semi-sync replication of these transactions will fail ];
+begin;
+insert into t1 values (500);
+delete from t1 where n < 500;
+commit;
+insert into t1 values (100);
+
+# The second semi-sync check should be off because one transaction
+# times out during waiting.
+echo [ master status should be OFF ];
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+show status like 'Rpl_semi_sync_master_clients';
+
+# Save the master position for later use.
+save_master_pos;
+
+connection slave;
+echo [ on slave ];
+
+echo [ slave status should be OFF ];
+show status like 'Rpl_semi_sync_slave_status';
+start slave;
+sync_with_master;
+
+echo [ slave status should be ON ];
+show status like 'Rpl_semi_sync_slave_status';
+
+select count(distinct n) from t1;
+select min(n) from t1;
+select max(n) from t1;
+
+connection master;
+echo [ on master ];
+
+echo [ do something to activate semi-sync ];
+drop table t1;
+
+# The third semi-sync check should be on again.
+echo [ master status should be ON again ];
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+show status like 'Rpl_semi_sync_master_clients';
+
+sync_slave_with_master;
+echo [ on slave ];
+
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+echo [ on master ];
+
+source include/show_master_logs.inc;
+show variables like 'rpl_semi_sync_master_enabled';
+
+echo [ disable semi-sync on the fly ];
+set global rpl_semi_sync_master_enabled=0;
+show variables like 'rpl_semi_sync_master_enabled';
+show status like 'Rpl_semi_sync_master_status';
+
+echo [ enable semi-sync on the fly ];
+set global rpl_semi_sync_master_enabled=1;
+show variables like 'rpl_semi_sync_master_enabled';
+show status like 'Rpl_semi_sync_master_status';
+
+connection slave;
+echo [ on slave ];
+
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+echo [ on master ];
+
+replace_result $engine_type ENGINE_TYPE;
+eval create table t1 (a int) engine = $engine_type;
+drop table t1;
+
+##show status like 'Rpl_semi_sync_master_status';
+
+sync_slave_with_master;
+--replace_column 2 #
+show status like 'Rpl_relay%';
+
+echo [ test reset master ];
+connection master;
+echo [ on master];
+
+reset master;
+
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+
+connection slave;
+echo [ on slave ];
+
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+reset slave;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+echo [ on master ];
+
+replace_result $engine_type ENGINE_TYPE;
+eval create table t1 (a int) engine = $engine_type;
+insert into t1 values (1);
+insert into t1 values (2), (3);
+
+sync_slave_with_master;
+echo [ on slave ];
+
+select * from t1;
+
+connection master;
+echo [ on master ];
+
+show status like 'Rpl_semi_sync_master_status';
+show status like 'Rpl_semi_sync_master_no_tx';
+show status like 'Rpl_semi_sync_master_yes_tx';
+
+echo [ cleanup ];
+drop table t1;
+sync_slave_with_master;
+
+# disable_query_log;
+# stop slave;
+# reset slave;
+# UNINSTALL PLUGIN rpl_semi_sync_slave;
+
+# connection master;
+# reset master;
+# set sql_log_bin=0;
+# disable_warnings;
+# UNINSTALL PLUGIN rpl_semi_sync_master;
+# enable_warnings;
+# set sql_log_bin=1;
+# enable_query_log;
=== added directory 'plugin/semisync'
=== added file 'plugin/semisync/Makefile.am'
--- a/plugin/semisync/Makefile.am 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/Makefile.am 2009-05-31 08:35:06 +0000
@@ -0,0 +1,35 @@
+# Copyright (C) 2006 MySQL AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+## Makefile.am for semi-synchronous replication
+
+pkgplugindir = $(pkglibdir)/plugin
+INCLUDES = -I$(top_srcdir)/include \
+ -I$(top_srcdir)/sql \
+ -I$(srcdir)
+
+#noinst_HEADERS = semisync.h semisync_master.h semisync_slave.h
+
+pkgplugin_LTLIBRARIES = libsemisync_master.la libsemisync_slave.la
+
+libsemisync_master_la_LDFLAGS = -module
+libsemisync_master_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_master_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_master_la_SOURCES = semisync.cc semisync_master.cc semisync_master_plugin.cc
+
+libsemisync_slave_la_LDFLAGS = -module
+libsemisync_slave_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_slave_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_slave_la_SOURCES = semisync.cc semisync_slave.cc semisync_slave_plugin.cc
=== added file 'plugin/semisync/configure.in'
--- a/plugin/semisync/configure.in 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/configure.in 2009-05-31 08:35:06 +0000
@@ -0,0 +1,9 @@
+# configure.in for semi-synchronous replication
+
+AC_INIT(mysql-semi-sync-plugin, 0.1)
+AM_INIT_AUTOMAKE
+AC_DISABLE_STATIC
+AC_PROG_LIBTOOL
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
+
=== added file 'plugin/semisync/plug.in'
--- a/plugin/semisync/plug.in 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/plug.in 2009-05-31 08:35:06 +0000
@@ -0,0 +1,3 @@
+MYSQL_PLUGIN(semisync,[Semi-synchronous Replication Plugin],
+ [Semi-synchronous replication plugin.])
+MYSQL_PLUGIN_DYNAMIC(semisync, [libsemisync_master.la libsemisync_slave.la])
=== added file 'plugin/semisync/semisync.cc'
--- a/plugin/semisync/semisync.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync.cc 2009-05-31 08:35:06 +0000
@@ -0,0 +1,30 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync.h"
+
+const unsigned char ReplSemiSyncBase::kPacketMagicNum = 0xef;
+const unsigned char ReplSemiSyncBase::kPacketFlagSync = 0x01;
+
+
+const unsigned long Trace::kTraceGeneral = 0x0001;
+const unsigned long Trace::kTraceDetail = 0x0010;
+const unsigned long Trace::kTraceNetWait = 0x0020;
+const unsigned long Trace::kTraceFunction = 0x0040;
+
+const char ReplSemiSyncBase::kSyncHeader[2] =
+ {ReplSemiSyncBase::kPacketMagicNum, 0};
=== added file 'plugin/semisync/semisync.h'
--- a/plugin/semisync/semisync.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync.h 2009-05-31 08:35:06 +0000
@@ -0,0 +1,93 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_H
+#define SEMISYNC_H
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <mysql.h>
+
+typedef uint32_t uint32;
+typedef unsigned long long my_off_t;
+#define FN_REFLEN 512 /* Max length of full path-name */
+void sql_print_error(const char *format, ...);
+void sql_print_warning(const char *format, ...);
+void sql_print_information(const char *format, ...);
+extern unsigned long max_connections;
+
+#define MYSQL_SERVER 1
+#define HAVE_REPLICATION 1
+#include <mysql/plugin.h>
+#include <replication.h>
+
+typedef struct st_mysql_show_var SHOW_VAR;
+typedef struct st_mysql_sys_var SYS_VAR;
+
+
+/**
+ This class is used to trace function calls and other process
+ information
+*/
+class Trace {
+public:
+ static const unsigned long kTraceFunction;
+ static const unsigned long kTraceGeneral;
+ static const unsigned long kTraceDetail;
+ static const unsigned long kTraceNetWait;
+
+ unsigned long trace_level_; /* the level for tracing */
+
+ inline void function_enter(const char *func_name)
+ {
+ if (trace_level_ & kTraceFunction)
+ sql_print_information("---> %s enter", func_name);
+ }
+ inline int function_exit(const char *func_name, int exit_code)
+ {
+ if (trace_level_ & kTraceFunction)
+ sql_print_information("<--- %s exit (%d)", func_name, exit_code);
+ return exit_code;
+ }
+
+ Trace()
+ :trace_level_(0L)
+ {}
+ Trace(unsigned long trace_level)
+ :trace_level_(trace_level)
+ {}
+};
+
+/**
+ Base class for semi-sync master and slave classes
+*/
+class ReplSemiSyncBase
+ :public Trace {
+public:
+ static const char kSyncHeader[2]; /* three byte packet header */
+
+ /* Constants in network packet header. */
+ static const unsigned char kPacketMagicNum;
+ static const unsigned char kPacketFlagSync;
+};
+
+#endif /* SEMISYNC_H */
=== added file 'plugin/semisync/semisync_master.cc'
--- a/plugin/semisync/semisync_master.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master.cc 2009-05-31 08:35:06 +0000
@@ -0,0 +1,1184 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_master.h"
+
+#define TIME_THOUSAND 1000
+#define TIME_MILLION 1000000
+#define TIME_BILLION 1000000000
+
+/* This indicates whether semi-synchronous replication is enabled. */
+char rpl_semi_sync_master_enabled;
+unsigned long rpl_semi_sync_master_timeout;
+unsigned long rpl_semi_sync_master_trace_level;
+unsigned long rpl_semi_sync_master_status = 0;
+unsigned long rpl_semi_sync_master_yes_transactions = 0;
+unsigned long rpl_semi_sync_master_no_transactions = 0;
+unsigned long rpl_semi_sync_master_off_times = 0;
+unsigned long rpl_semi_sync_master_timefunc_fails = 0;
+unsigned long rpl_semi_sync_master_num_timeouts = 0;
+unsigned long rpl_semi_sync_master_wait_sessions = 0;
+unsigned long rpl_semi_sync_master_back_wait_pos = 0;
+unsigned long rpl_semi_sync_master_trx_wait_time = 0;
+unsigned long long rpl_semi_sync_master_trx_wait_num = 0;
+unsigned long rpl_semi_sync_master_net_wait_time = 0;
+unsigned long long rpl_semi_sync_master_net_wait_num = 0;
+unsigned long rpl_semi_sync_master_clients = 0;
+unsigned long long rpl_semi_sync_master_net_wait_total_time = 0;
+unsigned long long rpl_semi_sync_master_trx_wait_total_time = 0;
+
+
+static int getWaitTime(const struct timeval& start_tv);
+
+/*******************************************************************************
+ *
+ * <ActiveTranx> class : manage all active transaction nodes
+ *
+ ******************************************************************************/
+
+ActiveTranx::ActiveTranx(int max_connections,
+ pthread_mutex_t *lock,
+ unsigned long trace_level)
+ : Trace(trace_level), num_transactions_(max_connections),
+ num_entries_(max_connections << 1),
+ lock_(lock)
+{
+ /* Allocate the memory for the array */
+ node_array_ = new TranxNode[num_transactions_];
+ for (int idx = 0; idx < num_transactions_; ++idx)
+ {
+ node_array_[idx].log_pos_ = 0;
+ node_array_[idx].hash_next_ = NULL;
+ node_array_[idx].next_ = node_array_ + idx + 1;
+
+ node_array_[idx].log_name_ = new char[FN_REFLEN];
+ node_array_[idx].log_name_[0] = '\x0';
+ }
+ node_array_[num_transactions_-1].next_ = NULL;
+
+ /* All nodes in the array go to the pool initially. */
+ free_pool_ = node_array_;
+
+ /* No transactions are in the list initially. */
+ trx_front_ = NULL;
+ trx_rear_ = NULL;
+
+ /* Create the hash table to find a transaction's ending event. */
+ trx_htb_ = new TranxNode *[num_entries_];
+ for (int idx = 0; idx < num_entries_; ++idx)
+ trx_htb_[idx] = NULL;
+
+ sql_print_information("Semi-sync replication initialized for %d "
+ "transactions.", num_transactions_);
+}
+
+ActiveTranx::~ActiveTranx()
+{
+ for (int idx = 0; idx < num_transactions_; ++idx)
+ {
+ delete node_array_[idx].log_name_;
+ node_array_[idx].log_name_ = NULL;
+ }
+
+ delete [] node_array_;
+ delete [] trx_htb_;
+
+ node_array_ = NULL;
+ trx_htb_ = NULL;
+ num_transactions_ = 0;
+ num_entries_ = 0;
+}
+
+unsigned int ActiveTranx::calc_hash(const unsigned char *key,
+ unsigned int length)
+{
+ unsigned int nr = 1, nr2 = 4;
+
+ /* The hash implementation comes from calc_hashnr() in mysys/hash.c. */
+ while (length--)
+ {
+ nr ^= (((nr & 63)+nr2)*((unsigned int) (unsigned char) *key++))+ (nr << 8);
+ nr2 += 3;
+ }
+ return((unsigned int) nr);
+}
+
+unsigned int ActiveTranx::get_hash_value(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ unsigned int hash1 = calc_hash((const unsigned char *)log_file_name,
+ strlen(log_file_name));
+ unsigned int hash2 = calc_hash((const unsigned char *)(&log_file_pos),
+ sizeof(log_file_pos));
+
+ return (hash1 + hash2) % num_entries_;
+}
+
+ActiveTranx::TranxNode* ActiveTranx::alloc_tranx_node()
+{
+ TranxNode *ptr = free_pool_;
+
+ if (free_pool_)
+ {
+ free_pool_ = free_pool_->next_;
+ ptr->next_ = NULL;
+ ptr->hash_next_ = NULL;
+ }
+
+ return ptr;
+}
+
+int ActiveTranx::compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const char *log_file_name2, my_off_t log_file_pos2)
+{
+ int cmp = strcmp(log_file_name1, log_file_name2);
+
+ if (cmp != 0)
+ return cmp;
+
+ if (log_file_pos1 > log_file_pos2)
+ return 1;
+ else if (log_file_pos1 < log_file_pos2)
+ return -1;
+ return 0;
+}
+
+int ActiveTranx::insert_tranx_node(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx:insert_tranx_node";
+ TranxNode *ins_node;
+ int result = 0;
+ unsigned int hash_val;
+
+ function_enter(kWho);
+
+ ins_node = alloc_tranx_node();
+ if (!ins_node)
+ {
+ sql_print_error("%s: transaction node allocation failed for: (%s, %lu)",
+ kWho, log_file_name, (unsigned long)log_file_pos);
+ result = -1;
+ goto l_end;
+ }
+
+ /* insert the binlog position in the active transaction list. */
+ strcpy(ins_node->log_name_, log_file_name);
+ ins_node->log_pos_ = log_file_pos;
+
+ if (!trx_front_)
+ {
+ /* The list is empty. */
+ trx_front_ = trx_rear_ = ins_node;
+ }
+ else
+ {
+ int cmp = compare(ins_node, trx_rear_);
+ if (cmp > 0)
+ {
+ /* Compare with the tail first. If the transaction happens later in
+ * binlog, then make it the new tail.
+ */
+ trx_rear_->next_ = ins_node;
+ trx_rear_ = ins_node;
+ }
+ else
+ {
+ /* Otherwise, it is an error because the transaction should hold the
+ * mysql_bin_log.LOCK_log when appending events.
+ */
+ sql_print_error("%s: binlog write out-of-order, tail (%s, %lu), "
+ "new node (%s, %lu)", kWho,
+ trx_rear_->log_name_, (unsigned long)trx_rear_->log_pos_,
+ ins_node->log_name_, (unsigned long)ins_node->log_pos_);
+ result = -1;
+ goto l_end;
+ }
+ }
+
+ hash_val = get_hash_value(ins_node->log_name_, ins_node->log_pos_);
+ ins_node->hash_next_ = trx_htb_[hash_val];
+ trx_htb_[hash_val] = ins_node;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: insert (%s, %lu) in entry(%u)", kWho,
+ ins_node->log_name_, (unsigned long)ins_node->log_pos_,
+ hash_val);
+
+ l_end:
+ return function_exit(kWho, result);
+}
+
+bool ActiveTranx::is_tranx_end_pos(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx::is_tranx_end_pos";
+ function_enter(kWho);
+
+ unsigned int hash_val = get_hash_value(log_file_name, log_file_pos);
+ TranxNode *entry = trx_htb_[hash_val];
+
+ while (entry != NULL)
+ {
+ if (compare(entry, log_file_name, log_file_pos) == 0)
+ break;
+
+ entry = entry->hash_next_;
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: probe (%s, %lu) in entry(%u)", kWho,
+ log_file_name, (unsigned long)log_file_pos, hash_val);
+
+ function_exit(kWho, (entry != NULL));
+ return (entry != NULL);
+}
+
+int ActiveTranx::clear_active_tranx_nodes(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx::::clear_active_tranx_nodes";
+ TranxNode *new_front;
+
+ function_enter(kWho);
+
+ if (log_file_name != NULL)
+ {
+ new_front = trx_front_;
+
+ while (new_front)
+ {
+ if (compare(new_front, log_file_name, log_file_pos) > 0)
+ break;
+ new_front = new_front->next_;
+ }
+ }
+ else
+ {
+ /* If log_file_name is NULL, clear everything. */
+ new_front = NULL;
+ }
+
+ if (new_front == NULL)
+ {
+ /* No active transaction nodes after the call. */
+
+ /* Clear the hash table. */
+ memset(trx_htb_, 0, num_entries_ * sizeof(TranxNode *));
+
+ /* Clear the active transaction list. */
+ if (trx_front_ != NULL)
+ {
+ trx_rear_->next_ = free_pool_;
+ free_pool_ = trx_front_;
+ trx_front_ = NULL;
+ trx_rear_ = NULL;
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: free all nodes back to free list", kWho);
+ }
+ else if (new_front != trx_front_)
+ {
+ TranxNode *curr_node, *next_node;
+
+ /* Delete all transaction nodes before the confirmation point. */
+ int n_frees = 0;
+ curr_node = trx_front_;
+ while (curr_node != new_front)
+ {
+ next_node = curr_node->next_;
+
+ /* Put the node in the memory pool. */
+ curr_node->next_ = free_pool_;
+ free_pool_ = curr_node;
+ n_frees++;
+
+ /* Remove the node from the hash table. */
+ unsigned int hash_val = get_hash_value(curr_node->log_name_, curr_node->log_pos_);
+ TranxNode **hash_ptr = &(trx_htb_[hash_val]);
+ while ((*hash_ptr) != NULL)
+ {
+ if ((*hash_ptr) == curr_node)
+ {
+ (*hash_ptr) = curr_node->hash_next_;
+ break;
+ }
+ hash_ptr = &((*hash_ptr)->hash_next_);
+ }
+
+ curr_node = next_node;
+ }
+
+ trx_front_ = new_front;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: free %d nodes back until pos (%s, %lu)",
+ kWho, n_frees,
+ trx_front_->log_name_, (unsigned long)trx_front_->log_pos_);
+ }
+
+ return function_exit(kWho, 0);
+}
+
+
+/*******************************************************************************
+ *
+ * <ReplSemiSyncMaster> class: the basic code layer for sync-replication master.
+ * <ReplSemiSyncSlave> class: the basic code layer for sync-replication slave.
+ *
+ * The most important functions during semi-syn replication listed:
+ *
+ * Master:
+ * . reportReplyBinlog(): called by the binlog dump thread when it receives
+ * the slave's status information.
+ * . updateSyncHeader(): based on transaction waiting information, decide
+ * whether to request the slave to reply.
+ * . readSlaveReply(): read the slave's sync reply and decide how to
+ * resume the waiting transaction threads.
+ * . writeTraxInBinlog(): called by the transaction thread when it finishes
+ * writing all transaction events in binlog.
+ * . commitTrx(): transaction thread wait for the slave reply.
+ *
+ * Slave:
+ * . slaveReadSyncHeader(): read the semi-sync header from the master, get the
+ * sync status and get the payload for events.
+ * . slaveReply(): reply to the master about the replication progress.
+ *
+ ******************************************************************************/
+
+ReplSemiSyncMaster::ReplSemiSyncMaster()
+ : active_tranxs_(NULL),
+ init_done_(false),
+ reply_file_name_inited_(false),
+ reply_file_pos_(0L),
+ wait_file_name_inited_(false),
+ wait_file_pos_(0),
+ master_enabled_(false),
+ wait_timeout_(0L),
+ state_(0),
+ enabled_transactions_(0),
+ disabled_transactions_(0),
+ switched_off_times_(0),
+ timefunc_fails_(0),
+ wait_sessions_(0),
+ wait_backtraverse_(0),
+ total_trx_wait_num_(0),
+ total_trx_wait_time_(0),
+ total_net_wait_num_(0),
+ total_net_wait_time_(0),
+ max_transactions_(0L)
+{
+ strcpy(reply_file_name_, "");
+ strcpy(wait_file_name_, "");
+}
+
+int ReplSemiSyncMaster::initObject()
+{
+ int result;
+ const char *kWho = "ReplSemiSyncMaster::initObject";
+
+ if (init_done_)
+ {
+ fprintf(stderr, "%s called twice\n", kWho);
+ return 1;
+ }
+ init_done_ = true;
+
+ /* References to the parameter works after set_options(). */
+ setWaitTimeout(rpl_semi_sync_master_timeout);
+ setTraceLevel(rpl_semi_sync_master_trace_level);
+ max_transactions_ = (int)max_connections;
+
+ /* Mutex initialization can only be done after MY_INIT(). */
+ pthread_mutexattr_t mutexattr;
+ pthread_mutexattr_init(&mutexattr);
+ pthread_mutexattr_settype(&mutexattr,
+ PTHREAD_MUTEX_FAST_NP);
+// pthread_mutex_init(&LOCK_binlog_, MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&LOCK_binlog_, &mutexattr);
+ pthread_cond_init(&COND_binlog_send_, NULL);
+
+ if (rpl_semi_sync_master_enabled)
+ result = enableMaster();
+ else
+ result = disableMaster();
+
+ return result;
+}
+
+int ReplSemiSyncMaster::enableMaster()
+{
+ int result = 0;
+
+ /* Must have the lock when we do enable of disable. */
+ lock();
+
+ if (!getMasterEnabled())
+ {
+ active_tranxs_ = new ActiveTranx(max_connections,
+ &LOCK_binlog_,
+ trace_level_);
+ if (active_tranxs_ != NULL)
+ {
+ commit_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ wait_file_name_inited_ = false;
+
+ set_master_enabled(true);
+ state_ = true;
+ sql_print_information("Semi-sync replication enabled on the master.");
+ }
+ else
+ {
+ sql_print_information("Semi-sync replication not able to allocate memory.");
+ result = -1;
+ }
+ }
+
+ unlock();
+
+ return result;
+}
+
+int ReplSemiSyncMaster::disableMaster()
+{
+ /* Must have the lock when we do enable of disable. */
+ lock();
+
+ if (getMasterEnabled())
+ {
+ /* Switch off the semi-sync first so that waiting transaction will be
+ * waken up.
+ */
+ switch_off();
+
+ assert(active_tranxs_ != NULL);
+ delete active_tranxs_;
+ active_tranxs_ = NULL;
+
+ reply_file_name_inited_ = false;
+ wait_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+
+ set_master_enabled(false);
+ sql_print_information("Semi-sync replication disabled on the master.");
+ }
+
+ unlock();
+
+ return 0;
+}
+
+ReplSemiSyncMaster::~ReplSemiSyncMaster()
+{
+ if (init_done_)
+ {
+ pthread_mutex_destroy(&LOCK_binlog_);
+ pthread_cond_destroy(&COND_binlog_send_);
+ }
+
+ delete active_tranxs_;
+}
+
+void ReplSemiSyncMaster::lock()
+{
+ pthread_mutex_lock(&LOCK_binlog_);
+}
+
+void ReplSemiSyncMaster::unlock()
+{
+ pthread_mutex_unlock(&LOCK_binlog_);
+}
+
+void ReplSemiSyncMaster::cond_broadcast()
+{
+ pthread_cond_broadcast(&COND_binlog_send_);
+}
+
+int ReplSemiSyncMaster::cond_timewait(struct timespec *wait_time)
+{
+ const char *kWho = "ReplSemiSyncMaster::cond_timewait()";
+ int wait_res;
+
+ function_enter(kWho);
+ wait_res = pthread_cond_timedwait(&COND_binlog_send_,
+ &LOCK_binlog_, wait_time);
+ return function_exit(kWho, wait_res);
+}
+
+void ReplSemiSyncMaster::add_slave()
+{
+ lock();
+ rpl_semi_sync_master_clients++;
+ unlock();
+}
+
+void ReplSemiSyncMaster::remove_slave()
+{
+ lock();
+ rpl_semi_sync_master_clients--;
+ unlock();
+}
+
+bool ReplSemiSyncMaster::is_semi_sync_slave()
+{
+ int null_value;
+ long long val= 0;
+ get_user_var_int("rpl_semi_sync_slave", &val, &null_value);
+ return val;
+}
+
+int ReplSemiSyncMaster::reportReplyBinlog(const char *log_file_pos)
+{
+ char log_name[FN_REFLEN];
+ char *endptr;
+ my_off_t log_pos= strtoull(log_file_pos, &endptr, 10);
+ if (!log_pos || !endptr || *endptr != ':' )
+ return 1;
+ endptr++; // skip the ':' seperator
+ strncpy(log_name, endptr, FN_REFLEN);
+ uint32 server_id= 0;
+ return reportReplyBinlog(server_id, log_name, log_pos);
+}
+
+int ReplSemiSyncMaster::reportReplyBinlog(uint32 server_id,
+ const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::reportReplyBinlog";
+ int cmp;
+ bool can_release_threads = false;
+ bool need_copy_send_pos = true;
+
+ if (!(getMasterEnabled()))
+ return 0;
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ goto l_end;
+
+ if (!is_on())
+ /* We check to see whether we can switch semi-sync ON. */
+ try_switch_on(server_id, log_file_name, log_file_pos);
+
+ /* The position should increase monotonically, if there is only one
+ * thread sending the binlog to the slave.
+ * In reality, to improve the transaction availability, we allow multiple
+ * sync replication slaves. So, if any one of them get the transaction,
+ * the transaction session in the primary can move forward.
+ */
+ if (reply_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ reply_file_name_, reply_file_pos_);
+
+ /* If the requested position is behind the sending binlog position,
+ * would not adjust sending binlog position.
+ * We based on the assumption that there are multiple semi-sync slave,
+ * and at least one of them shou/ld be up to date.
+ * If all semi-sync slaves are behind, at least initially, the primary
+ * can find the situation after the waiting timeout. After that, some
+ * slaves should catch up quickly.
+ */
+ if (cmp < 0)
+ {
+ /* If the position is behind, do not copy it. */
+ need_copy_send_pos = false;
+ }
+ }
+
+ if (need_copy_send_pos)
+ {
+ strcpy(reply_file_name_, log_file_name);
+ reply_file_pos_ = log_file_pos;
+ reply_file_name_inited_ = true;
+
+ /* Remove all active transaction nodes before this point. */
+ assert(active_tranxs_ != NULL);
+ active_tranxs_->clear_active_tranx_nodes(log_file_name, log_file_pos);
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: Got reply at (%s, %lu)", kWho,
+ log_file_name, (unsigned long)log_file_pos);
+ }
+
+ if (wait_sessions_ > 0)
+ {
+ /* Let us check if some of the waiting threads doing a trx
+ * commit can now proceed.
+ */
+ cmp = ActiveTranx::compare(reply_file_name_, reply_file_pos_,
+ wait_file_name_, wait_file_pos_);
+ if (cmp >= 0)
+ {
+ /* Yes, at least one waiting thread can now proceed:
+ * let us release all waiting threads with a broadcast
+ */
+ can_release_threads = true;
+ wait_file_name_inited_ = false;
+ }
+ }
+
+ l_end:
+ unlock();
+
+ if (can_release_threads)
+ {
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: signal all waiting threads.", kWho);
+
+ cond_broadcast();
+ }
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::commitTrx(const char* trx_wait_binlog_name,
+ my_off_t trx_wait_binlog_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::commitTrx";
+
+ function_enter(kWho);
+
+ if (getMasterEnabled() && trx_wait_binlog_name)
+ {
+ struct timeval start_tv;
+ struct timespec abstime;
+ int wait_result, start_time_err;
+
+ start_time_err = gettimeofday(&start_tv, 0);
+
+ /* Acquire the mutex. */
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled() || !is_on() || !rpl_semi_sync_master_clients)
+ goto l_end;
+
+ if (trace_level_ & kTraceDetail)
+ {
+ sql_print_information("%s: wait pos (%s, %lu), repl(%d)\n", kWho,
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos,
+ (int)is_on());
+ }
+
+ while (is_on())
+ {
+ int cmp = ActiveTranx::compare(reply_file_name_, reply_file_pos_,
+ trx_wait_binlog_name, trx_wait_binlog_pos);
+ if (cmp >= 0)
+ {
+ /* We have already sent the relevant binlog to the slave: no need to
+ * wait here.
+ */
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: Binlog reply is ahead (%s, %lu),",
+ kWho, reply_file_name_, (unsigned long)reply_file_pos_);
+ break;
+ }
+
+ /* Let us update the info about the minimum binlog position of waiting
+ * threads.
+ */
+ if (wait_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(trx_wait_binlog_name, trx_wait_binlog_pos,
+ wait_file_name_, wait_file_pos_);
+ if (cmp <= 0)
+ {
+ /* This thd has a lower position, let's update the minimum info. */
+ strcpy(wait_file_name_, trx_wait_binlog_name);
+ wait_file_pos_ = trx_wait_binlog_pos;
+
+ wait_backtraverse_++;
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: move back wait position (%s, %lu),",
+ kWho, wait_file_name_, (unsigned long)wait_file_pos_);
+ }
+ }
+ else
+ {
+ strcpy(wait_file_name_, trx_wait_binlog_name);
+ wait_file_pos_ = trx_wait_binlog_pos;
+ wait_file_name_inited_ = true;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: init wait position (%s, %lu),",
+ kWho, wait_file_name_, (unsigned long)wait_file_pos_);
+ }
+
+ if (start_time_err == 0)
+ {
+ int diff_usecs = start_tv.tv_usec + wait_timeout_ * TIME_THOUSAND;
+
+ /* Calcuate the waiting period. */
+ abstime.tv_sec = start_tv.tv_sec;
+ if (diff_usecs < TIME_MILLION)
+ {
+ abstime.tv_nsec = diff_usecs * TIME_THOUSAND;
+ }
+ else
+ {
+ while (diff_usecs >= TIME_MILLION)
+ {
+ abstime.tv_sec++;
+ diff_usecs -= TIME_MILLION;
+ }
+ abstime.tv_nsec = diff_usecs * TIME_THOUSAND;
+ }
+
+ /* In semi-synchronous replication, we wait until the binlog-dump
+ * thread has received the reply on the relevant binlog segment from the
+ * replication slave.
+ *
+ * Let us suspend this thread to wait on the condition;
+ * when replication has progressed far enough, we will release
+ * these waiting threads.
+ */
+ wait_sessions_++;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: wait %lu ms for binlog sent (%s, %lu)",
+ kWho, wait_timeout_,
+ wait_file_name_, (unsigned long)wait_file_pos_);
+
+ wait_result = cond_timewait(&abstime);
+ wait_sessions_--;
+
+ if (wait_result != 0)
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a real wait timeout. */
+ sql_print_warning("Replication semi-sync not sent binlog to "
+ "slave within the timeout %lu ms - OFF.",
+ wait_timeout_);
+ sql_print_warning(" semi-sync up to file %s, position %lu",
+ reply_file_name_, (unsigned long)reply_file_pos_);
+ sql_print_warning(" transaction needs file %s, position %lu",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ total_wait_timeouts_++;
+
+ /* switch semi-sync off */
+ switch_off();
+ }
+ else
+ {
+ int wait_time;
+
+ wait_time = getWaitTime(start_tv);
+ if (wait_time < 0)
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a time/gettimeofday function call error. */
+ sql_print_error("Replication semi-sync gettimeofday fail1 at "
+ "wait position (%s, %lu)",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ timefunc_fails_++;
+ }
+ else
+ {
+ total_trx_wait_num_++;
+ total_trx_wait_time_ += wait_time;
+ }
+ }
+ }
+ else
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a gettimeofday function call error. */
+ sql_print_error("Replication semi-sync gettimeofday fail2 at "
+ "wait position (%s, %lu)",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ timefunc_fails_++;
+
+ /* switch semi-sync off */
+ switch_off();
+ }
+ }
+
+ l_end:
+ /* Update the status counter. */
+ if (is_on())
+ enabled_transactions_++;
+ else
+ disabled_transactions_++;
+
+ unlock();
+ }
+
+ return function_exit(kWho, 0);
+}
+
+/* Indicate that semi-sync replication is OFF now.
+ *
+ * What should we do when it is disabled? The problem is that we want
+ * the semi-sync replication enabled again when the slave catches up
+ * later. But, it is not that easy to detect that the slave has caught
+ * up. This is caused by the fact that MySQL's replication protocol is
+ * asynchronous, meaning that if the master does not use the semi-sync
+ * protocol, the slave would not send anything to the master.
+ * Still, if the master is sending (N+1)-th event, we assume that it is
+ * an indicator that the slave has received N-th event and earlier ones.
+ *
+ * If semi-sync is disabled, all transactions still update the wait
+ * position with the last position in binlog. But no transactions will
+ * wait for confirmations and the active transaction list would not be
+ * maintained. In binlog dump thread, updateSyncHeader() checks whether
+ * the current sending event catches up with last wait position. If it
+ * does match, semi-sync will be switched on again.
+ */
+int ReplSemiSyncMaster::switch_off()
+{
+ const char *kWho = "ReplSemiSyncMaster::switch_off";
+ int result;
+
+ function_enter(kWho);
+ state_ = false;
+
+ /* Clear the active transaction list. */
+ assert(active_tranxs_ != NULL);
+ result = active_tranxs_->clear_active_tranx_nodes(NULL, 0);
+
+ switched_off_times_++;
+ wait_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+ cond_broadcast(); /* wake up all waiting threads */
+
+ return function_exit(kWho, result);
+}
+
+int ReplSemiSyncMaster::try_switch_on(int server_id,
+ const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::try_switch_on";
+ bool semi_sync_on = false;
+
+ function_enter(kWho);
+
+ /* If the current sending event's position is larger than or equal to the
+ * 'largest' commit transaction binlog position, the slave is already
+ * catching up now and we can switch semi-sync on here.
+ * If commit_file_name_inited_ indicates there are no recent transactions,
+ * we can enable semi-sync immediately.
+ */
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ semi_sync_on = (cmp >= 0);
+ }
+ else
+ {
+ semi_sync_on = true;
+ }
+
+ if (semi_sync_on)
+ {
+ /* Switch semi-sync replication on. */
+ state_ = true;
+
+ if (trace_level_ & kTraceGeneral)
+ sql_print_information("%s switch semi-sync ON with server(%d) "
+ "at (%s, %lu), repl(%d)",
+ kWho, server_id, log_file_name,
+ (unsigned long)log_file_pos, (int)is_on());
+ }
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::reserveSyncHeader(unsigned char *header,
+ unsigned long size)
+{
+ const char *kWho = "ReplSemiSyncMaster::reserveSyncHeader";
+ function_enter(kWho);
+
+ int hlen=0;
+ if (!is_semi_sync_slave())
+ {
+ hlen= 0;
+ }
+ else
+ {
+ /* No enough space for the extra header, disable semi-sync master */
+ if (sizeof(kSyncHeader) > size)
+ {
+ disableMaster();
+ return 0;
+ }
+
+ /* Set the magic number and the sync status. By default, no sync
+ * is required.
+ */
+ memcpy(header, kSyncHeader, sizeof(kSyncHeader));
+ hlen= sizeof(kSyncHeader);
+ }
+ return function_exit(kWho, hlen);
+}
+
+int ReplSemiSyncMaster::updateSyncHeader(unsigned char *packet,
+ const char *log_file_name,
+ my_off_t log_file_pos,
+ uint32 server_id)
+{
+ const char *kWho = "ReplSemiSyncMaster::updateSyncHeader";
+ int cmp = 0;
+ bool sync = false;
+
+ /* If the semi-sync master is not enabled, or the slave is not a semi-sync
+ * target, do not request replies from the slave.
+ */
+ if (!getMasterEnabled() || !is_semi_sync_slave())
+ {
+ sync = false;
+ return 0;
+ }
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ {
+ sync = false;
+ goto l_end;
+ }
+
+ if (is_on())
+ {
+ /* semi-sync is ON */
+ sync = false; /* No sync unless a transaction is involved. */
+
+ if (reply_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ reply_file_name_, reply_file_pos_);
+ if (cmp <= 0)
+ {
+ /* If we have already got the reply for the event, then we do
+ * not need to sync the transaction again.
+ */
+ goto l_end;
+ }
+ }
+
+ if (wait_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ wait_file_name_, wait_file_pos_);
+ }
+ else
+ {
+ cmp = 1;
+ }
+
+ /* If we are already waiting for some transaction replies which
+ * are later in binlog, do not wait for this one event.
+ */
+ if (cmp >= 0)
+ {
+ /*
+ * We only wait if the event is a transaction's ending event.
+ */
+ assert(active_tranxs_ != NULL);
+ sync = active_tranxs_->is_tranx_end_pos(log_file_name,
+ log_file_pos);
+ }
+ }
+ else
+ {
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ sync = (cmp >= 0);
+ }
+ else
+ {
+ sync = true;
+ }
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: server(%d), (%s, %lu) sync(%d), repl(%d)",
+ kWho, server_id, log_file_name,
+ (unsigned long)log_file_pos, sync, (int)is_on());
+
+ l_end:
+ unlock();
+
+ /* We do not need to clear sync flag because we set it to 0 when we
+ * reserve the packet header.
+ */
+ if (sync)
+ (packet)[2] = kPacketFlagSync;
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::writeTranxInBinlog(const char* log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::writeTranxInBinlog";
+ int result = 0;
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ goto l_end;
+
+ /* Update the 'largest' transaction commit position seen so far even
+ * though semi-sync is switched off.
+ * It is much better that we update commit_file_* here, instead of
+ * inside commitTrx(). This is mostly because updateSyncHeader()
+ * will watch for commit_file_* to decide whether to switch semi-sync
+ * on. The detailed reason is explained in function updateSyncHeader().
+ */
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ if (cmp > 0)
+ {
+ /* This is a larger position, let's update the maximum info. */
+ strcpy(commit_file_name_, log_file_name);
+ commit_file_pos_ = log_file_pos;
+ }
+ }
+ else
+ {
+ strcpy(commit_file_name_, log_file_name);
+ commit_file_pos_ = log_file_pos;
+ commit_file_name_inited_ = true;
+ }
+
+ if (is_on())
+ {
+ assert(active_tranxs_ != NULL);
+ result = active_tranxs_->insert_tranx_node(log_file_name, log_file_pos);
+ }
+
+ l_end:
+ unlock();
+
+ return function_exit(kWho, result);
+}
+
+int ReplSemiSyncMaster::resetMaster()
+{
+ const char *kWho = "ReplSemiSyncMaster::resetMaster";
+ int result = 0;
+
+ function_enter(kWho);
+
+
+ lock();
+
+ state_ = getMasterEnabled()? 1 : 0;
+
+ wait_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+
+ enabled_transactions_ = 0;
+ disabled_transactions_ = 0;
+ switched_off_times_ = 0;
+ timefunc_fails_ = 0;
+ wait_sessions_ = 0;
+ wait_backtraverse_ = 0;
+ total_trx_wait_num_ = 0;
+ total_trx_wait_time_ = 0;
+ total_net_wait_num_ = 0;
+ total_net_wait_time_ = 0;
+
+ unlock();
+
+ return function_exit(kWho, result);
+}
+
+void ReplSemiSyncMaster::setExportStats()
+{
+ lock();
+
+ rpl_semi_sync_master_status = state_ ? 1 : 0;
+ rpl_semi_sync_master_yes_transactions = enabled_transactions_;
+ rpl_semi_sync_master_no_transactions = disabled_transactions_;
+ rpl_semi_sync_master_off_times = switched_off_times_;
+ rpl_semi_sync_master_timefunc_fails = timefunc_fails_;
+ rpl_semi_sync_master_num_timeouts = total_wait_timeouts_;
+ rpl_semi_sync_master_wait_sessions = wait_sessions_;
+ rpl_semi_sync_master_back_wait_pos = wait_backtraverse_;
+ rpl_semi_sync_master_trx_wait_num = total_trx_wait_num_;
+ rpl_semi_sync_master_trx_wait_time =
+ ((total_trx_wait_num_) ?
+ (unsigned long)((double)total_trx_wait_time_ /
+ ((double)total_trx_wait_num_)) : 0);
+ rpl_semi_sync_master_net_wait_num = total_net_wait_num_;
+ rpl_semi_sync_master_net_wait_time =
+ ((total_net_wait_num_) ?
+ (unsigned long)((double)total_net_wait_time_ /
+ ((double)total_net_wait_num_)) : 0);
+
+ rpl_semi_sync_master_net_wait_total_time = total_net_wait_time_;
+ rpl_semi_sync_master_trx_wait_total_time = total_trx_wait_time_;
+
+ unlock();
+}
+
+/* Get the waiting time given the wait's staring time.
+ *
+ * Return:
+ * >= 0: the waiting time in microsecons(us)
+ * < 0: error in gettimeofday or time back traverse
+ */
+static int getWaitTime(const struct timeval& start_tv)
+{
+ unsigned long long start_usecs, end_usecs;
+ struct timeval end_tv;
+ int end_time_err;
+
+ /* Starting time in microseconds(us). */
+ start_usecs = start_tv.tv_sec * TIME_MILLION + start_tv.tv_usec;
+
+ /* Get the wait time interval. */
+ end_time_err = gettimeofday(&end_tv, 0);
+
+ /* Ending time in microseconds(us). */
+ end_usecs = end_tv.tv_sec * TIME_MILLION + end_tv.tv_usec;
+
+ if (end_time_err != 0 || end_usecs < start_usecs)
+ return -1;
+
+ return (int)(end_usecs - start_usecs);
+}
=== added file 'plugin/semisync/semisync_master.h'
--- a/plugin/semisync/semisync_master.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master.h 2009-05-31 08:35:06 +0000
@@ -0,0 +1,380 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_MASTER_H
+#define SEMISYNC_MASTER_H
+
+#include "semisync.h"
+
+/**
+ This class manages memory for active transaction list.
+
+ We record each active transaction with a TranxNode. Because each
+ session can only have only one open transaction, the total active
+ transaction nodes can not exceed the maximum sessions. Currently
+ in MySQL, sessions are the same as connections.
+*/
+class ActiveTranx
+ :public Trace {
+private:
+ struct TranxNode {
+ char *log_name_;
+ my_off_t log_pos_;
+ struct TranxNode *next_; /* the next node in the sorted list */
+ struct TranxNode *hash_next_; /* the next node during hash collision */
+ };
+
+ /* The following data structure maintains an active transaction list. */
+ TranxNode *node_array_;
+ TranxNode *free_pool_;
+
+ /* These two record the active transaction list in sort order. */
+ TranxNode *trx_front_, *trx_rear_;
+
+ TranxNode **trx_htb_; /* A hash table on active transactions. */
+
+ int num_transactions_; /* maximum transactions */
+ int num_entries_; /* maximum hash table entries */
+ pthread_mutex_t *lock_; /* mutex lock */
+
+ inline void assert_lock_owner();
+
+ inline TranxNode* alloc_tranx_node();
+
+ inline unsigned int calc_hash(const unsigned char *key,unsigned int length);
+ unsigned int get_hash_value(const char *log_file_name, my_off_t log_file_pos);
+
+ int compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const TranxNode *node2) {
+ return compare(log_file_name1, log_file_pos1,
+ node2->log_name_, node2->log_pos_);
+ }
+ int compare(const TranxNode *node1,
+ const char *log_file_name2, my_off_t log_file_pos2) {
+ return compare(node1->log_name_, node1->log_pos_,
+ log_file_name2, log_file_pos2);
+ }
+ int compare(const TranxNode *node1, const TranxNode *node2) {
+ return compare(node1->log_name_, node1->log_pos_,
+ node2->log_name_, node2->log_pos_);
+ }
+
+public:
+ ActiveTranx(int max_connections, pthread_mutex_t *lock,
+ unsigned long trace_level);
+ ~ActiveTranx();
+
+ /* Insert an active transaction node with the specified position.
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int insert_tranx_node(const char *log_file_name, my_off_t log_file_pos);
+
+ /* Clear the active transaction nodes until(inclusive) the specified
+ * position.
+ * If log_file_name is NULL, everything will be cleared: the sorted
+ * list and the hash table will be reset to empty.
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int clear_active_tranx_nodes(const char *log_file_name,
+ my_off_t log_file_pos);
+
+ /* Given a position, check to see whether the position is an active
+ * transaction's ending position by probing the hash table.
+ */
+ bool is_tranx_end_pos(const char *log_file_name, my_off_t log_file_pos);
+
+ /* Given two binlog positions, compare which one is bigger based on
+ * (file_name, file_position).
+ */
+ static int compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const char *log_file_name2, my_off_t log_file_pos2);
+
+};
+
+/**
+ The extension class for the master of semi-synchronous replication
+*/
+class ReplSemiSyncMaster
+ :public ReplSemiSyncBase {
+ private:
+ ActiveTranx *active_tranxs_; /* active transaction list: the list will
+ be cleared when semi-sync switches off. */
+
+ /* True when initObject has been called */
+ bool init_done_;
+
+ /* This cond variable is signaled when enough binlog has been sent to slave,
+ * so that a waiting trx can return the 'ok' to the client for a commit.
+ */
+ pthread_cond_t COND_binlog_send_;
+
+ /* Mutex that protects the following state variables and the active
+ * transaction list.
+ * Under no cirumstances we can acquire mysql_bin_log.LOCK_log if we are
+ * already holding LOCK_binlog_ because it can cause deadlocks.
+ */
+ pthread_mutex_t LOCK_binlog_;
+
+ /* This is set to true when reply_file_name_ contains meaningful data. */
+ bool reply_file_name_inited_;
+
+ /* The binlog name up to which we have received replies from any slaves. */
+ char reply_file_name_[FN_REFLEN];
+
+ /* The position in that file up to which we have the reply from any slaves. */
+ my_off_t reply_file_pos_;
+
+ /* This is set to true when we know the 'smallest' wait position. */
+ bool wait_file_name_inited_;
+
+ /* NULL, or the 'smallest' filename that a transaction is waiting for
+ * slave replies.
+ */
+ char wait_file_name_[FN_REFLEN];
+
+ /* The smallest position in that file that a trx is waiting for: the trx
+ * can proceed and send an 'ok' to the client when the master has got the
+ * reply from the slave indicating that it already got the binlog events.
+ */
+ my_off_t wait_file_pos_;
+
+ /* This is set to true when we know the 'largest' transaction commit
+ * position in the binlog file.
+ * We always maintain the position no matter whether semi-sync is switched
+ * on switched off. When a transaction wait timeout occurs, semi-sync will
+ * switch off. Binlog-dump thread can use the three fields to detect when
+ * slaves catch up on replication so that semi-sync can switch on again.
+ */
+ bool commit_file_name_inited_;
+
+ /* The 'largest' binlog filename that a commit transaction is seeing. */
+ char commit_file_name_[FN_REFLEN];
+
+ /* The 'largest' position in that file that a commit transaction is seeing. */
+ my_off_t commit_file_pos_;
+
+ /* All global variables which can be set by parameters. */
+ bool master_enabled_; /* semi-sync is enabled on the master */
+ unsigned long wait_timeout_; /* timeout period(ms) during tranx wait */
+
+ /* All status variables. */
+ bool state_; /* whether semi-sync is switched */
+ unsigned long enabled_transactions_; /* semi-sync'ed tansactions */
+ unsigned long disabled_transactions_; /* non-semi-sync'ed tansactions */
+ unsigned long switched_off_times_; /* how many times are switched off? */
+ unsigned long timefunc_fails_; /* how many time function fails? */
+ unsigned long total_wait_timeouts_; /* total number of wait timeouts */
+ unsigned long wait_sessions_; /* how many sessions wait for replies? */
+ unsigned long wait_backtraverse_; /* wait position back traverses */
+ unsigned long long total_trx_wait_num_; /* total trx waits: non-timeout ones */
+ unsigned long long total_trx_wait_time_; /* total trx wait time: in us */
+ unsigned long long total_net_wait_num_; /* total network waits */
+ unsigned long long total_net_wait_time_; /* total network wait time */
+
+ /* The number of maximum active transactions. This should be the same as
+ * maximum connections because MySQL does not do connection sharing now.
+ */
+ int max_transactions_;
+
+ void lock();
+ void unlock();
+ void cond_broadcast();
+ int cond_timewait(struct timespec *wait_time);
+
+ /* Is semi-sync replication on? */
+ bool is_on() {
+ return (state_);
+ }
+
+ void set_master_enabled(bool enabled) {
+ master_enabled_ = enabled;
+ }
+
+ /* Switch semi-sync off because of timeout in transaction waiting. */
+ int switch_off();
+
+ /* Switch semi-sync on when slaves catch up. */
+ int try_switch_on(int server_id,
+ const char *log_file_name, my_off_t log_file_pos);
+
+ public:
+ ReplSemiSyncMaster();
+ ~ReplSemiSyncMaster();
+
+ bool getMasterEnabled() {
+ return master_enabled_;
+ }
+ void setTraceLevel(unsigned long trace_level) {
+ trace_level_ = trace_level;
+ if (active_tranxs_)
+ active_tranxs_->trace_level_ = trace_level;
+ }
+
+ /* Set the transaction wait timeout period, in milliseconds. */
+ void setWaitTimeout(unsigned long wait_timeout) {
+ wait_timeout_ = wait_timeout;
+ }
+
+ /* Initialize this class after MySQL parameters are initialized. this
+ * function should be called once at bootstrap time.
+ */
+ int initObject();
+
+ /* Enable the object to enable semi-sync replication inside the master. */
+ int enableMaster();
+
+ /* Enable the object to enable semi-sync replication inside the master. */
+ int disableMaster();
+
+ /* Add a semi-sync replication slave */
+ void add_slave();
+
+ /* Remove a semi-sync replication slave */
+ void remove_slave();
+
+ /* Is the slave servered by the thread requested semi-sync */
+ bool is_semi_sync_slave();
+
+ int reportReplyBinlog(const char *log_file_pos);
+
+ /* In semi-sync replication, reports up to which binlog position we have
+ * received replies from the slave indicating that it already get the events.
+ *
+ * Input:
+ * server_id - (IN) master server id number
+ * log_file_name - (IN) binlog file name
+ * end_offset - (IN) the offset in the binlog file up to which we have
+ * the replies from the slave
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int reportReplyBinlog(uint32 server_id,
+ const char* log_file_name,
+ my_off_t end_offset);
+
+ /* Commit a transaction in the final step. This function is called from
+ * InnoDB before returning from the low commit. If semi-sync is switch on,
+ * the function will wait to see whether binlog-dump thread get the reply for
+ * the events of the transaction. Remember that this is not a direct wait,
+ * instead, it waits to see whether the binlog-dump thread has reached the
+ * point. If the wait times out, semi-sync status will be switched off and
+ * all other transaction would not wait either.
+ *
+ * Input: (the transaction events' ending binlog position)
+ * trx_wait_binlog_name - (IN) ending position's file name
+ * trx_wait_binlog_pos - (IN) ending position's file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int commitTrx(const char* trx_wait_binlog_name,
+ my_off_t trx_wait_binlog_pos);
+
+ /* Reserve space in the replication event packet header:
+ * . slave semi-sync off: 1 byte - (0)
+ * . slave semi-sync on: 3 byte - (0, 0xef, 0/1}
+ *
+ * Input:
+ * header - (IN) the header buffer
+ * size - (IN) size of the header buffer
+ *
+ * Return:
+ * size of the bytes reserved for header
+ */
+ int reserveSyncHeader(unsigned char *header, unsigned long size);
+
+ /* Update the sync bit in the packet header to indicate to the slave whether
+ * the master will wait for the reply of the event. If semi-sync is switched
+ * off and we detect that the slave is catching up, we switch semi-sync on.
+ *
+ * Input:
+ * packet - (IN) the packet containing the replication event
+ * log_file_name - (IN) the event ending position's file name
+ * log_file_pos - (IN) the event ending position's file offset
+ * server_id - (IN) master server id number
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int updateSyncHeader(unsigned char *packet,
+ const char *log_file_name,
+ my_off_t log_file_pos,
+ uint32 server_id);
+
+ /* Read the slave's reply so that we know how much progress the slave makes
+ * on receive replication events.
+ *
+ * Input:
+ * server_id - (IN) master server id number
+ * read_errmsg - (OUT) error message if an error occurs
+ * read_errno - (OUT) error number if an error occurs
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int readSlaveReply(uint32 server_id, const char **read_errmsg,
+ int *read_errno);
+
+ /* Called when a transaction finished writing binlog events.
+ * . update the 'largest' transactions' binlog event position
+ * . insert the ending position in the active transaction list if
+ * semi-sync is on
+ *
+ * Input: (the transaction events' ending binlog position)
+ * log_file_name - (IN) transaction ending position's file name
+ * log_file_pos - (IN) transaction ending position's file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int writeTranxInBinlog(const char* log_file_name, my_off_t log_file_pos);
+
+ /* Export internal statistics for semi-sync replication. */
+ void setExportStats();
+
+ /* 'reset master' command is issued from the user and semi-sync need to
+ * go off for that.
+ */
+ int resetMaster();
+};
+
+/* System and status variables for the master component */
+extern char rpl_semi_sync_master_enabled;
+extern unsigned long rpl_semi_sync_master_timeout;
+extern unsigned long rpl_semi_sync_master_trace_level;
+extern unsigned long rpl_semi_sync_master_status;
+extern unsigned long rpl_semi_sync_master_yes_transactions;
+extern unsigned long rpl_semi_sync_master_no_transactions;
+extern unsigned long rpl_semi_sync_master_off_times;
+extern unsigned long rpl_semi_sync_master_timefunc_fails;
+extern unsigned long rpl_semi_sync_master_num_timeouts;
+extern unsigned long rpl_semi_sync_master_wait_sessions;
+extern unsigned long rpl_semi_sync_master_back_wait_pos;
+extern unsigned long rpl_semi_sync_master_trx_wait_time;
+extern unsigned long rpl_semi_sync_master_net_wait_time;
+extern unsigned long long rpl_semi_sync_master_net_wait_num;
+extern unsigned long long rpl_semi_sync_master_trx_wait_num;
+extern unsigned long long rpl_semi_sync_master_net_wait_total_time;
+extern unsigned long long rpl_semi_sync_master_trx_wait_total_time;
+extern unsigned long rpl_semi_sync_master_clients;
+
+#endif /* SEMISYNC_MASTER_H */
=== added file 'plugin/semisync/semisync_master_plugin.cc'
--- a/plugin/semisync/semisync_master_plugin.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master_plugin.cc 2009-05-31 08:35:06 +0000
@@ -0,0 +1,373 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_master.h"
+
+ReplSemiSyncMaster repl_semisync;
+
+int repl_semi_report_binlog_update(Binlog_storage_param *param,
+ const char *log_file,
+ my_off_t log_pos, uint32 flags)
+{
+ int error= 0;
+
+ if (repl_semisync.getMasterEnabled())
+ {
+ /*
+ Let us store the binlog file name and the position, so that
+ we know how long to wait for the binlog to the replicated to
+ the slave in synchronous replication.
+ */
+ error= repl_semisync.writeTranxInBinlog(log_file,
+ log_pos);
+ }
+
+ return error;
+}
+
+int repl_semi_request_commit(Trans_param *param)
+{
+ return 0;
+}
+
+int repl_semi_report_commit(Trans_param *param)
+{
+
+ bool is_real_trans= param->flags & TRANS_IS_REAL_TRANS;
+
+ if (is_real_trans && param->log_pos)
+ {
+ const char *binlog_name= param->log_file;
+ return repl_semisync.commitTrx(binlog_name, param->log_pos);
+ }
+ return 0;
+}
+
+int repl_semi_report_rollback(Trans_param *param)
+{
+ return repl_semi_report_commit(param);
+}
+
+int repl_semi_binlog_dump_start(Binlog_transmit_param *param,
+ const char *log_file,
+ my_off_t log_pos)
+{
+ bool semi_sync_slave= repl_semisync.is_semi_sync_slave();
+
+ if (semi_sync_slave)
+ /* One more semi-sync slave */
+ repl_semisync.add_slave();
+ sql_print_information("Start %s binlog_dump to slave_server(%d), pos(%s, %lu)",
+ semi_sync_slave ? "semi-sync" : "asynchronous",
+ param->server_id, log_file, (unsigned long)log_pos);
+
+ return 0;
+}
+
+int repl_semi_binlog_dump_end(Binlog_transmit_param *param)
+{
+ if (repl_semisync.is_semi_sync_slave())
+ {
+ /* One less semi-sync slave */
+ repl_semisync.remove_slave();
+ }
+ return 0;
+}
+
+int repl_semi_reserve_header(Binlog_transmit_param *param,
+ unsigned char *header,
+ unsigned long size, unsigned long *len)
+{
+ *len += repl_semisync.reserveSyncHeader(header, size);
+ return 0;
+}
+
+int repl_semi_before_send_event(Binlog_transmit_param *param,
+ unsigned char *packet, unsigned long len,
+ const char *log_file, my_off_t log_pos)
+{
+ return repl_semisync.updateSyncHeader(packet,
+ log_file,
+ log_pos,
+ param->server_id);
+}
+
+int repl_semi_after_send_event(Binlog_transmit_param *param,
+ const char *event_buf, unsigned long len)
+{
+ return 0;
+}
+
+int repl_semi_reset_master(Binlog_transmit_param *param)
+{
+ if (repl_semisync.resetMaster())
+ return 1;
+ return 0;
+}
+
+static char *rpl_semi_sync_master_reply_log_file_pos;
+
+/*
+ semisync system variables
+ */
+static void fix_rpl_semi_sync_master_timeout(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_master_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_master_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_master_reply_log_file_pos(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static MYSQL_SYSVAR_BOOL(enabled, rpl_semi_sync_master_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable semi-synchronous replication master (disabled by default). ",
+ NULL, // check
+ &fix_rpl_semi_sync_master_enabled, // update
+ 0);
+
+static MYSQL_SYSVAR_ULONG(timeout, rpl_semi_sync_master_timeout,
+ PLUGIN_VAR_OPCMDARG,
+ "The timeout value (in ms) for semi-synchronous replication in the master",
+ NULL, // check
+ fix_rpl_semi_sync_master_timeout, // update
+ 10, 0, ~0L, 1);
+
+static MYSQL_SYSVAR_ULONG(trace_level, rpl_semi_sync_master_trace_level,
+ PLUGIN_VAR_OPCMDARG,
+ "The tracing level for semi-sync replication.",
+ NULL, // check
+ &fix_rpl_semi_sync_master_trace_level, // update
+ 32, 0, ~0L, 1);
+
+static MYSQL_SYSVAR_STR(reply_log_file_pos, rpl_semi_sync_master_reply_log_file_pos,
+ PLUGIN_VAR_NOCMDOPT,
+ "The log filename and position slave has queued to relay log.",
+ NULL, // check
+ &fix_rpl_semi_sync_master_reply_log_file_pos,
+ "");
+
+static SYS_VAR* semi_sync_master_system_vars[]= {
+ MYSQL_SYSVAR(enabled),
+ MYSQL_SYSVAR(timeout),
+ MYSQL_SYSVAR(trace_level),
+ MYSQL_SYSVAR(reply_log_file_pos),
+ NULL,
+};
+
+
+static void fix_rpl_semi_sync_master_timeout(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setWaitTimeout(rpl_semi_sync_master_timeout);
+ return;
+}
+
+static void fix_rpl_semi_sync_master_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setTraceLevel(rpl_semi_sync_master_trace_level);
+ return;
+}
+
+static void fix_rpl_semi_sync_master_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(char *)ptr= *(char *)val;
+ if (rpl_semi_sync_master_enabled)
+ {
+ if (repl_semisync.enableMaster() != 0)
+ rpl_semi_sync_master_enabled = false;
+ }
+ else
+ {
+ if (repl_semisync.disableMaster() != 0)
+ rpl_semi_sync_master_enabled = true;
+ }
+
+ return;
+}
+
+static void fix_rpl_semi_sync_master_reply_log_file_pos(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ const char *log_file_pos= *(char **)val;
+
+ if (repl_semisync.reportReplyBinlog(log_file_pos))
+ sql_print_error("report slave binlog reply failed.");
+
+ return;
+}
+
+Trans_observer trans_observer = {
+ sizeof(Trans_observer), // len
+
+ repl_semi_report_commit, // after_commit
+ repl_semi_report_rollback, // after_rollback
+};
+
+Binlog_storage_observer storage_observer = {
+ sizeof(Binlog_storage_observer), // len
+
+ repl_semi_report_binlog_update, // report_update
+};
+
+Binlog_transmit_observer transmit_observer = {
+ sizeof(Binlog_transmit_observer), // len
+
+ repl_semi_binlog_dump_start, // start
+ repl_semi_binlog_dump_end, // stop
+ repl_semi_reserve_header, // reserve_header
+ repl_semi_before_send_event, // before_send_event
+ repl_semi_after_send_event, // after_send_event
+ repl_semi_reset_master, // reset
+};
+
+
+#define SHOW_FNAME(name) \
+ rpl_semi_sync_master_show_##name
+
+#define DEF_SHOW_FUNC(name, show_type) \
+ static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR *var, char *buff) \
+ { \
+ repl_semisync.setExportStats(); \
+ var->type= show_type; \
+ var->value= (char *)&rpl_semi_sync_master_##name; \
+ return 0; \
+ }
+
+DEF_SHOW_FUNC(clients, SHOW_LONG)
+DEF_SHOW_FUNC(net_wait_time, SHOW_LONG)
+DEF_SHOW_FUNC(net_wait_total_time, SHOW_LONGLONG)
+DEF_SHOW_FUNC(net_wait_num, SHOW_LONGLONG)
+DEF_SHOW_FUNC(off_times, SHOW_LONG)
+DEF_SHOW_FUNC(no_transactions, SHOW_LONG)
+DEF_SHOW_FUNC(status, SHOW_BOOL)
+DEF_SHOW_FUNC(timefunc_fails, SHOW_LONG)
+DEF_SHOW_FUNC(trx_wait_time, SHOW_LONG)
+DEF_SHOW_FUNC(trx_wait_total_time, SHOW_LONGLONG)
+DEF_SHOW_FUNC(trx_wait_num, SHOW_LONGLONG)
+DEF_SHOW_FUNC(back_wait_pos, SHOW_LONG)
+DEF_SHOW_FUNC(wait_sessions, SHOW_LONG)
+DEF_SHOW_FUNC(yes_transactions, SHOW_LONG)
+
+
+/* plugin status variables */
+static SHOW_VAR semi_sync_master_status_vars[]= {
+ {"Rpl_semi_sync_master_clients", (char*) &SHOW_FNAME(clients), SHOW_FUNC},
+ {"Rpl_semi_sync_master_net_avg_wait_time",
+ (char*) &SHOW_FNAME(net_wait_time), SHOW_FUNC},
+ {"Rpl_semi_sync_master_net_wait_time",
+ (char*) &SHOW_FNAME(net_wait_total_time), SHOW_FUNC},
+ {"Rpl_semi_sync_master_net_waits", (char*) &SHOW_FNAME(net_wait_num), SHOW_FUNC},
+ {"Rpl_semi_sync_master_no_times", (char*) &SHOW_FNAME(off_times), SHOW_FUNC},
+ {"Rpl_semi_sync_master_no_tx", (char*) &SHOW_FNAME(no_transactions), SHOW_FUNC},
+ {"Rpl_semi_sync_master_status", (char*) &SHOW_FNAME(status), SHOW_FUNC},
+ {"Rpl_semi_sync_master_timefunc_failures",
+ (char*) &SHOW_FNAME(timefunc_fails), SHOW_FUNC},
+ {"Rpl_semi_sync_master_tx_avg_wait_time",
+ (char*) &SHOW_FNAME(trx_wait_time), SHOW_FUNC},
+ {"Rpl_semi_sync_master_tx_wait_time",
+ (char*) &SHOW_FNAME(trx_wait_total_time), SHOW_FUNC},
+ {"Rpl_semi_sync_master_tx_waits", (char*) &SHOW_FNAME(trx_wait_num), SHOW_FUNC},
+ {"Rpl_semi_sync_master_wait_pos_backtraverse",
+ (char*) &SHOW_FNAME(back_wait_pos), SHOW_FUNC},
+ {"Rpl_semi_sync_master_wait_sessions",
+ (char*) &SHOW_FNAME(wait_sessions), SHOW_FUNC},
+ {"Rpl_semi_sync_master_yes_tx", (char*) &SHOW_FNAME(yes_transactions), SHOW_FUNC},
+ {NULL, NULL, SHOW_LONG},
+};
+
+
+static int semi_sync_master_plugin_init(void *p)
+{
+ if (repl_semisync.initObject())
+ return 1;
+ if (register_trans_observer(&trans_observer, p))
+ return 1;
+ if (register_binlog_storage_observer(&storage_observer, p))
+ return 1;
+ if (register_binlog_transmit_observer(&transmit_observer, p))
+ return 1;
+ return 0;
+}
+
+static int semi_sync_master_plugin_deinit(void *p)
+{
+ if (unregister_trans_observer(&trans_observer, p))
+ {
+ sql_print_error("unregister_trans_observer failed");
+ return 1;
+ }
+ if (unregister_binlog_storage_observer(&storage_observer, p))
+ {
+ sql_print_error("unregister_binlog_storage_observer failed");
+ return 1;
+ }
+ if (unregister_binlog_transmit_observer(&transmit_observer, p))
+ {
+ sql_print_error("unregister_binlog_transmit_observer failed");
+ return 1;
+ }
+ sql_print_information("unregister_replicator OK");
+ return 0;
+}
+
+struct Mysql_replication semi_sync_master_plugin= {
+ MYSQL_REPLICATION_INTERFACE_VERSION
+};
+
+/*
+ Plugin library descriptor
+*/
+mysql_declare_plugin(semi_sync_master)
+{
+ MYSQL_REPLICATION_PLUGIN,
+ &semi_sync_master_plugin,
+ "rpl_semi_sync_master",
+ "He Zhenxing",
+ "Semi-synchronous replication master",
+ PLUGIN_LICENSE_GPL,
+ semi_sync_master_plugin_init, /* Plugin Init */
+ semi_sync_master_plugin_deinit, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ semi_sync_master_status_vars, /* status variables */
+ semi_sync_master_system_vars, /* system variables */
+ NULL /* config options */
+}
+mysql_declare_plugin_end;
=== added file 'plugin/semisync/semisync_slave.cc'
--- a/plugin/semisync/semisync_slave.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave.cc 2009-05-31 08:35:06 +0000
@@ -0,0 +1,114 @@
+/* Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_slave.h"
+
+char rpl_semi_sync_slave_enabled;
+unsigned long rpl_semi_sync_slave_status= 0;
+unsigned long rpl_semi_sync_slave_trace_level;
+
+int ReplSemiSyncSlave::initObject()
+{
+ int result= 0;
+ const char *kWho = "ReplSemiSyncSlave::initObject";
+
+ if (init_done_)
+ {
+ fprintf(stderr, "%s called twice\n", kWho);
+ return 1;
+ }
+ init_done_ = true;
+
+ /* References to the parameter works after set_options(). */
+ setSlaveEnabled(rpl_semi_sync_slave_enabled);
+ setTraceLevel(rpl_semi_sync_slave_trace_level);
+
+ return result;
+}
+
+int ReplSemiSyncSlave::slaveReadSyncHeader(const char *header,
+ unsigned long total_len,
+ bool *need_reply,
+ const char **payload,
+ unsigned long *payload_len)
+{
+ const char *kWho = "ReplSemiSyncSlave::slaveReadSyncHeader";
+ int read_res = 0;
+ function_enter(kWho);
+
+ if ((unsigned char)(header[0]) == kPacketMagicNum)
+ {
+ *need_reply = (header[1] & kPacketFlagSync);
+ *payload_len = total_len - 2;
+ *payload = header + 2;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: reply - %d", kWho, *need_reply);
+ }
+ else
+ {
+ sql_print_error("Missing magic number for semi-sync packet, packet "
+ "len: %lu", total_len);
+ read_res = -1;
+ }
+
+ return function_exit(kWho, read_res);
+}
+
+int ReplSemiSyncSlave::slaveStart(Binlog_relay_IO_param *param)
+{
+ bool semi_sync= getSlaveEnabled();
+
+ sql_print_information("Slave I/O thread: Start %s replication to\
+ master '%s@%s:%d' in log '%s' at position %lu",
+ semi_sync ? "semi-sync" : "asynchronous",
+ param->user, param->host, param->port,
+ param->master_log_name[0] ? param->master_log_name : "FIRST",
+ (unsigned long)param->master_log_pos);
+
+ if (semi_sync && !rpl_semi_sync_slave_status)
+ rpl_semi_sync_slave_status= 1;
+ if (!(mysql= rpl_connect_master(NULL)))
+ return 1;
+ return 0;
+}
+
+int ReplSemiSyncSlave::slaveStop(Binlog_relay_IO_param *param)
+{
+ if (rpl_semi_sync_slave_status)
+ rpl_semi_sync_slave_status= 0;
+ if (mysql)
+ mysql_close(mysql);
+ mysql= 0;
+ return 0;
+}
+
+int ReplSemiSyncSlave::slaveReply(const char *log_name, my_off_t log_pos)
+{
+ char query[FN_REFLEN + 100];
+ sprintf(query, "SET GLOBAL rpl_semi_sync_master_reply_log_file_pos='%llu:%s'",
+ (unsigned long long)log_pos, log_name);
+ if (mysql_real_query(mysql, query, strlen(query)))
+ {
+ sql_print_error("Set 'rpl_semi_sync_master_reply_log_file_pos' on master failed");
+ mysql_free_result(mysql_store_result(mysql));
+ mysql_close(mysql);
+ mysql= 0;
+ return 1;
+ }
+ mysql_free_result(mysql_store_result(mysql));
+ return 0;
+}
=== added file 'plugin/semisync/semisync_slave.h'
--- a/plugin/semisync/semisync_slave.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave.h 2009-05-31 08:35:06 +0000
@@ -0,0 +1,98 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_SLAVE_H
+#define SEMISYNC_SLAVE_H
+
+#include "semisync.h"
+
+/**
+ The extension class for the slave of semi-synchronous replication
+*/
+class ReplSemiSyncSlave
+ :public ReplSemiSyncBase {
+public:
+ ReplSemiSyncSlave()
+ :slave_enabled_(false)
+ {}
+ ~ReplSemiSyncSlave() {}
+
+ void setTraceLevel(unsigned long trace_level) {
+ trace_level_ = trace_level;
+ }
+
+ /* Initialize this class after MySQL parameters are initialized. this
+ * function should be called once at bootstrap time.
+ */
+ int initObject();
+
+ bool getSlaveEnabled() {
+ return slave_enabled_;
+ }
+ void setSlaveEnabled(bool enabled) {
+ slave_enabled_ = enabled;
+ }
+
+ /* A slave reads the semi-sync packet header and separate the metadata
+ * from the payload data.
+ *
+ * Input:
+ * header - (IN) packet header pointer
+ * total_len - (IN) total packet length: metadata + payload
+ * need_reply - (IN) whether the master is waiting for the reply
+ * payload - (IN) payload: the replication event
+ * payload_len - (IN) payload length
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int slaveReadSyncHeader(const char *header, unsigned long total_len, bool *need_reply,
+ const char **payload, unsigned long *payload_len);
+
+ /* A slave replies to the master indicating its replication process. It
+ * indicates that the slave has received all events before the specified
+ * binlog position.
+ *
+ * Input:
+ * mysql - (IN) the mysql network connection
+ * binlog_filename - (IN) the reply point's binlog file name
+ * binlog_filepos - (IN) the reply point's binlog file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+/* int slaveReply(MYSQL *mysql, const char *binlog_filename, */
+/* my_off_t binlog_filepos); */
+
+ int slaveReply(const char *log_name, my_off_t log_pos);
+
+ int slaveStart(Binlog_relay_IO_param *param);
+ int slaveStop(Binlog_relay_IO_param *param);
+
+private:
+ /* True when initObject has been called */
+ bool init_done_;
+ bool slave_enabled_; /* semi-sycn is enabled on the slave */
+ MYSQL *mysql; /* connection to send reply */
+};
+
+
+/* System and status variables for the slave component */
+extern char rpl_semi_sync_slave_enabled;
+extern unsigned long rpl_semi_sync_slave_trace_level;
+extern unsigned long rpl_semi_sync_slave_status;
+
+#endif /* SEMISYNC_SLAVE_H */
=== added file 'plugin/semisync/semisync_slave_plugin.cc'
--- a/plugin/semisync/semisync_slave_plugin.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave_plugin.cc 2009-05-31 08:35:06 +0000
@@ -0,0 +1,189 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_slave.h"
+
+ReplSemiSyncSlave repl_semisync;
+
+/*
+ indicate whether or not the slave should send a reply to the master.
+
+ This is set to true in repl_semi_slave_read_event if the current
+ event read is the last event of a transaction. And the value is
+ checked in repl_semi_slave_queue_event.
+*/
+bool semi_sync_need_reply= false;
+
+int repl_semi_reset_slave(Binlog_relay_IO_param *param)
+{
+ // TODO: reset semi-sync slave status here
+ return 0;
+}
+
+int repl_semi_slave_request_dump(Binlog_relay_IO_param *param,
+ uint32 flags)
+{
+ MYSQL *mysql= param->mysql;
+ if (!repl_semisync.getSlaveEnabled())
+ return 0;
+
+ const char query[]= "SET @rpl_semi_sync_slave= 1";
+ if (mysql_real_query(mysql, query, strlen(query)))
+ {
+ sql_print_error("Set 'rpl_semi_sync_slave=1' on master failed");
+ mysql_free_result(mysql_store_result(mysql));
+ return 1;
+ }
+ mysql_free_result(mysql_store_result(mysql));
+ return 0;
+}
+
+int repl_semi_slave_read_event(Binlog_relay_IO_param *param,
+ const char *packet, unsigned long len,
+ const char **event_buf, unsigned long *event_len)
+{
+ if (repl_semisync.getSlaveEnabled())
+ return repl_semisync.slaveReadSyncHeader(packet, len,
+ &semi_sync_need_reply,
+ event_buf, event_len);
+ *event_buf= packet;
+ *event_len= len;
+ return 0;
+}
+
+int repl_semi_slave_queue_event(Binlog_relay_IO_param *param,
+ const char *event_buf,
+ unsigned long event_len,
+ uint32 flags)
+{
+ if (semi_sync_need_reply)
+ return repl_semisync.slaveReply(param->master_log_name,
+ param->master_log_pos);
+ return 0;
+}
+
+int repl_semi_slave_io_start(Binlog_relay_IO_param *param)
+{
+ return repl_semisync.slaveStart(param);
+}
+
+int repl_semi_slave_io_end(Binlog_relay_IO_param *param)
+{
+ return repl_semisync.slaveStop(param);
+}
+
+
+static void fix_rpl_semi_sync_slave_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(char *)ptr= *(char *)val;
+ repl_semisync.setSlaveEnabled(rpl_semi_sync_slave_enabled != 0);
+ return;
+}
+
+static void fix_rpl_semi_sync_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setTraceLevel(rpl_semi_sync_slave_trace_level);
+ return;
+}
+
+/* plugin system variables */
+static MYSQL_SYSVAR_BOOL(enabled, rpl_semi_sync_slave_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable semi-synchronous replication slave (disabled by default). ",
+ NULL, // check
+ &fix_rpl_semi_sync_slave_enabled, // update
+ 0);
+
+static MYSQL_SYSVAR_ULONG(trace_level, rpl_semi_sync_slave_trace_level,
+ PLUGIN_VAR_OPCMDARG,
+ "The tracing level for semi-sync replication.",
+ NULL, // check
+ &fix_rpl_semi_sync_trace_level, // update
+ 32, 0, ~0L, 1);
+
+static SYS_VAR* semi_sync_slave_system_vars[]= {
+ MYSQL_SYSVAR(enabled),
+ MYSQL_SYSVAR(trace_level),
+ NULL,
+};
+
+
+/* plugin status variables */
+static SHOW_VAR semi_sync_slave_status_vars[]= {
+ {"Rpl_semi_sync_slave_status",
+ (char*) &rpl_semi_sync_slave_status, SHOW_BOOL},
+ {NULL, NULL, SHOW_BOOL},
+};
+
+Binlog_relay_IO_observer relay_io_observer = {
+ sizeof(Binlog_relay_IO_observer), // len
+
+ repl_semi_slave_io_start, // start
+ repl_semi_slave_io_end, // stop
+ repl_semi_slave_request_dump, // request_transmit
+ repl_semi_slave_read_event, // after_read_event
+ repl_semi_slave_queue_event, // after_queue_event
+ repl_semi_reset_slave, // reset
+};
+
+static int semi_sync_slave_plugin_init(void *p)
+{
+ if (repl_semisync.initObject())
+ return 1;
+ if (register_binlog_relay_io_observer(&relay_io_observer, p))
+ return 1;
+ return 0;
+}
+
+static int semi_sync_slave_plugin_deinit(void *p)
+{
+ if (unregister_binlog_relay_io_observer(&relay_io_observer, p))
+ return 1;
+ return 0;
+}
+
+
+struct Mysql_replication semi_sync_slave_plugin= {
+ MYSQL_REPLICATION_INTERFACE_VERSION
+};
+
+/*
+ Plugin library descriptor
+*/
+mysql_declare_plugin(semi_sync_slave)
+{
+ MYSQL_REPLICATION_PLUGIN,
+ &semi_sync_slave_plugin,
+ "rpl_semi_sync_slave",
+ "He Zhenxing",
+ "Semi-synchronous replication slave",
+ PLUGIN_LICENSE_GPL,
+ semi_sync_slave_plugin_init, /* Plugin Init */
+ semi_sync_slave_plugin_deinit, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ semi_sync_slave_status_vars, /* status variables */
+ semi_sync_slave_system_vars, /* system variables */
+ NULL /* config options */
+}
+mysql_declare_plugin_end;
Attachment: [text/bzr-bundle] bzr/zhenxing.he@sun.com-20090531083506-2126zny86o92ea83.bundle