#At file:///media/sda3/work/mysql/bzrwork/semisync/mysql-6.0-semi-sync-1.0/
2638 He Zhenxing 2008-07-14
WL#1720 Semi-synchronous replication
added:
mysql-test/suite/rpl_team/
mysql-test/suite/rpl_team/combinations
mysql-test/suite/rpl_team/r/
mysql-test/suite/rpl_team/r/rpl_semi_sync.result
mysql-test/suite/rpl_team/t/
mysql-test/suite/rpl_team/t/rpl_semi_sync-master.opt
mysql-test/suite/rpl_team/t/rpl_semi_sync-slave.opt
mysql-test/suite/rpl_team/t/rpl_semi_sync.test
plugin/semisync/
plugin/semisync/Makefile.am
plugin/semisync/configure.in
plugin/semisync/plug.in
plugin/semisync/semisync.cc
plugin/semisync/semisync.h
plugin/semisync/semisync_master.cc
plugin/semisync/semisync_master.h
plugin/semisync/semisync_master_plugin.cc
plugin/semisync/semisync_slave.cc
plugin/semisync/semisync_slave.h
plugin/semisync/semisync_slave_plugin.cc
modified:
plugin/Makefile.am
=== added directory 'mysql-test/suite/rpl_team'
=== added file 'mysql-test/suite/rpl_team/combinations'
--- a/mysql-test/suite/rpl_team/combinations 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl_team/combinations 2008-07-14 13:03:54 +0000
@@ -0,0 +1,8 @@
+[row]
+--binlog-format=row
+
+[stmt]
+--binlog-format=statement
+
+[mix]
+--binlog-format=mixed
=== added directory 'mysql-test/suite/rpl_team/r'
=== added file 'mysql-test/suite/rpl_team/r/rpl_semi_sync.result'
--- a/mysql-test/suite/rpl_team/r/rpl_semi_sync.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl_team/r/rpl_semi_sync.result 2008-07-14 13:03:54 +0000
@@ -0,0 +1,445 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 0
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 0
+show variables like 'rpl_semi_sync_enabled';
+Variable_name Value
+rpl_semi_sync_enabled ON
+stop slave;
+show variables like 'rpl_semi_sync_slave_enabled';
+Variable_name Value
+rpl_semi_sync_slave_enabled ON
+set global rpl_semi_sync_slave_enabled = 1;
+start slave;
+show status like 'Rpl_semi_sync_clients';
+Variable_name Value
+Rpl_semi_sync_clients 1
+drop table if exists t1;
+create table t1(n int) engine = InnoDB;
+insert into t1 values (300);
+insert into t1 values (299);
+insert into t1 values (298);
+insert into t1 values (297);
+insert into t1 values (296);
+insert into t1 values (295);
+insert into t1 values (294);
+insert into t1 values (293);
+insert into t1 values (292);
+insert into t1 values (291);
+insert into t1 values (290);
+insert into t1 values (289);
+insert into t1 values (288);
+insert into t1 values (287);
+insert into t1 values (286);
+insert into t1 values (285);
+insert into t1 values (284);
+insert into t1 values (283);
+insert into t1 values (282);
+insert into t1 values (281);
+insert into t1 values (280);
+insert into t1 values (279);
+insert into t1 values (278);
+insert into t1 values (277);
+insert into t1 values (276);
+insert into t1 values (275);
+insert into t1 values (274);
+insert into t1 values (273);
+insert into t1 values (272);
+insert into t1 values (271);
+insert into t1 values (270);
+insert into t1 values (269);
+insert into t1 values (268);
+insert into t1 values (267);
+insert into t1 values (266);
+insert into t1 values (265);
+insert into t1 values (264);
+insert into t1 values (263);
+insert into t1 values (262);
+insert into t1 values (261);
+insert into t1 values (260);
+insert into t1 values (259);
+insert into t1 values (258);
+insert into t1 values (257);
+insert into t1 values (256);
+insert into t1 values (255);
+insert into t1 values (254);
+insert into t1 values (253);
+insert into t1 values (252);
+insert into t1 values (251);
+insert into t1 values (250);
+insert into t1 values (249);
+insert into t1 values (248);
+insert into t1 values (247);
+insert into t1 values (246);
+insert into t1 values (245);
+insert into t1 values (244);
+insert into t1 values (243);
+insert into t1 values (242);
+insert into t1 values (241);
+insert into t1 values (240);
+insert into t1 values (239);
+insert into t1 values (238);
+insert into t1 values (237);
+insert into t1 values (236);
+insert into t1 values (235);
+insert into t1 values (234);
+insert into t1 values (233);
+insert into t1 values (232);
+insert into t1 values (231);
+insert into t1 values (230);
+insert into t1 values (229);
+insert into t1 values (228);
+insert into t1 values (227);
+insert into t1 values (226);
+insert into t1 values (225);
+insert into t1 values (224);
+insert into t1 values (223);
+insert into t1 values (222);
+insert into t1 values (221);
+insert into t1 values (220);
+insert into t1 values (219);
+insert into t1 values (218);
+insert into t1 values (217);
+insert into t1 values (216);
+insert into t1 values (215);
+insert into t1 values (214);
+insert into t1 values (213);
+insert into t1 values (212);
+insert into t1 values (211);
+insert into t1 values (210);
+insert into t1 values (209);
+insert into t1 values (208);
+insert into t1 values (207);
+insert into t1 values (206);
+insert into t1 values (205);
+insert into t1 values (204);
+insert into t1 values (203);
+insert into t1 values (202);
+insert into t1 values (201);
+insert into t1 values (200);
+insert into t1 values (199);
+insert into t1 values (198);
+insert into t1 values (197);
+insert into t1 values (196);
+insert into t1 values (195);
+insert into t1 values (194);
+insert into t1 values (193);
+insert into t1 values (192);
+insert into t1 values (191);
+insert into t1 values (190);
+insert into t1 values (189);
+insert into t1 values (188);
+insert into t1 values (187);
+insert into t1 values (186);
+insert into t1 values (185);
+insert into t1 values (184);
+insert into t1 values (183);
+insert into t1 values (182);
+insert into t1 values (181);
+insert into t1 values (180);
+insert into t1 values (179);
+insert into t1 values (178);
+insert into t1 values (177);
+insert into t1 values (176);
+insert into t1 values (175);
+insert into t1 values (174);
+insert into t1 values (173);
+insert into t1 values (172);
+insert into t1 values (171);
+insert into t1 values (170);
+insert into t1 values (169);
+insert into t1 values (168);
+insert into t1 values (167);
+insert into t1 values (166);
+insert into t1 values (165);
+insert into t1 values (164);
+insert into t1 values (163);
+insert into t1 values (162);
+insert into t1 values (161);
+insert into t1 values (160);
+insert into t1 values (159);
+insert into t1 values (158);
+insert into t1 values (157);
+insert into t1 values (156);
+insert into t1 values (155);
+insert into t1 values (154);
+insert into t1 values (153);
+insert into t1 values (152);
+insert into t1 values (151);
+insert into t1 values (150);
+insert into t1 values (149);
+insert into t1 values (148);
+insert into t1 values (147);
+insert into t1 values (146);
+insert into t1 values (145);
+insert into t1 values (144);
+insert into t1 values (143);
+insert into t1 values (142);
+insert into t1 values (141);
+insert into t1 values (140);
+insert into t1 values (139);
+insert into t1 values (138);
+insert into t1 values (137);
+insert into t1 values (136);
+insert into t1 values (135);
+insert into t1 values (134);
+insert into t1 values (133);
+insert into t1 values (132);
+insert into t1 values (131);
+insert into t1 values (130);
+insert into t1 values (129);
+insert into t1 values (128);
+insert into t1 values (127);
+insert into t1 values (126);
+insert into t1 values (125);
+insert into t1 values (124);
+insert into t1 values (123);
+insert into t1 values (122);
+insert into t1 values (121);
+insert into t1 values (120);
+insert into t1 values (119);
+insert into t1 values (118);
+insert into t1 values (117);
+insert into t1 values (116);
+insert into t1 values (115);
+insert into t1 values (114);
+insert into t1 values (113);
+insert into t1 values (112);
+insert into t1 values (111);
+insert into t1 values (110);
+insert into t1 values (109);
+insert into t1 values (108);
+insert into t1 values (107);
+insert into t1 values (106);
+insert into t1 values (105);
+insert into t1 values (104);
+insert into t1 values (103);
+insert into t1 values (102);
+insert into t1 values (101);
+insert into t1 values (100);
+insert into t1 values (99);
+insert into t1 values (98);
+insert into t1 values (97);
+insert into t1 values (96);
+insert into t1 values (95);
+insert into t1 values (94);
+insert into t1 values (93);
+insert into t1 values (92);
+insert into t1 values (91);
+insert into t1 values (90);
+insert into t1 values (89);
+insert into t1 values (88);
+insert into t1 values (87);
+insert into t1 values (86);
+insert into t1 values (85);
+insert into t1 values (84);
+insert into t1 values (83);
+insert into t1 values (82);
+insert into t1 values (81);
+insert into t1 values (80);
+insert into t1 values (79);
+insert into t1 values (78);
+insert into t1 values (77);
+insert into t1 values (76);
+insert into t1 values (75);
+insert into t1 values (74);
+insert into t1 values (73);
+insert into t1 values (72);
+insert into t1 values (71);
+insert into t1 values (70);
+insert into t1 values (69);
+insert into t1 values (68);
+insert into t1 values (67);
+insert into t1 values (66);
+insert into t1 values (65);
+insert into t1 values (64);
+insert into t1 values (63);
+insert into t1 values (62);
+insert into t1 values (61);
+insert into t1 values (60);
+insert into t1 values (59);
+insert into t1 values (58);
+insert into t1 values (57);
+insert into t1 values (56);
+insert into t1 values (55);
+insert into t1 values (54);
+insert into t1 values (53);
+insert into t1 values (52);
+insert into t1 values (51);
+insert into t1 values (50);
+insert into t1 values (49);
+insert into t1 values (48);
+insert into t1 values (47);
+insert into t1 values (46);
+insert into t1 values (45);
+insert into t1 values (44);
+insert into t1 values (43);
+insert into t1 values (42);
+insert into t1 values (41);
+insert into t1 values (40);
+insert into t1 values (39);
+insert into t1 values (38);
+insert into t1 values (37);
+insert into t1 values (36);
+insert into t1 values (35);
+insert into t1 values (34);
+insert into t1 values (33);
+insert into t1 values (32);
+insert into t1 values (31);
+insert into t1 values (30);
+insert into t1 values (29);
+insert into t1 values (28);
+insert into t1 values (27);
+insert into t1 values (26);
+insert into t1 values (25);
+insert into t1 values (24);
+insert into t1 values (23);
+insert into t1 values (22);
+insert into t1 values (21);
+insert into t1 values (20);
+insert into t1 values (19);
+insert into t1 values (18);
+insert into t1 values (17);
+insert into t1 values (16);
+insert into t1 values (15);
+insert into t1 values (14);
+insert into t1 values (13);
+insert into t1 values (12);
+insert into t1 values (11);
+insert into t1 values (10);
+insert into t1 values (9);
+insert into t1 values (8);
+insert into t1 values (7);
+insert into t1 values (6);
+insert into t1 values (5);
+insert into t1 values (4);
+insert into t1 values (3);
+insert into t1 values (2);
+insert into t1 values (1);
+show master status;
+File Position Binlog_Do_DB Binlog_Ignore_DB
+master-bin.000001 # <Binlog_Do_DB> <Binlog_Ignore_DB>
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status 1
+select count(distinct n) from t1;
+count(distinct n)
+300
+select min(n) from t1;
+min(n)
+1
+select max(n) from t1;
+max(n)
+300
+stop slave;
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 1
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 0
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 302
+show status like 'Rpl_semi_sync_clients';
+Variable_name Value
+Rpl_semi_sync_clients 1
+begin;
+insert into t1 values (500);
+delete from t1 where n < 500;
+commit;
+insert into t1 values (100);
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 0
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 2
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 302
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status 0
+start slave;
+show status like 'Rpl_semi_sync_slave_status';
+Variable_name Value
+Rpl_semi_sync_slave_status 1
+select count(distinct n) from t1;
+count(distinct n)
+2
+select min(n) from t1;
+min(n)
+100
+select max(n) from t1;
+max(n)
+500
+drop table t1;
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 1
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 2
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 303
+stop slave;
+show master logs;
+Log_name master-bin.000001
+File_size #
+show variables like 'rpl_semi_sync_enabled';
+Variable_name Value
+rpl_semi_sync_enabled ON
+set global rpl_semi_sync_enabled=0;
+show variables like 'rpl_semi_sync_enabled';
+Variable_name Value
+rpl_semi_sync_enabled OFF
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 0
+set global rpl_semi_sync_enabled=1;
+start slave;
+create table t1 (a int) engine = InnoDB;
+drop table t1;
+show status like 'Rpl_relay%';
+Variable_name Value
+reset master;
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 1
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 0
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 0
+stop slave;
+reset slave;
+start slave;
+create table t1 (a int) engine = InnoDB;
+insert into t1 values (1);
+insert into t1 values (2), (3);
+select * from t1;
+a
+1
+2
+3
+show status like 'Rpl_semi_sync_status';
+Variable_name Value
+Rpl_semi_sync_status 1
+show status like 'Rpl_semi_sync_no_tx';
+Variable_name Value
+Rpl_semi_sync_no_tx 0
+show status like 'Rpl_semi_sync_yes_tx';
+Variable_name Value
+Rpl_semi_sync_yes_tx 3
+drop table t1;
=== added directory 'mysql-test/suite/rpl_team/t'
=== added file 'mysql-test/suite/rpl_team/t/rpl_semi_sync-master.opt'
--- a/mysql-test/suite/rpl_team/t/rpl_semi_sync-master.opt 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl_team/t/rpl_semi_sync-master.opt 2008-07-14 13:03:54 +0000
@@ -0,0 +1 @@
+--plugin_dir=../plugin/semisync/.libs
\ No newline at end of file
=== added file 'mysql-test/suite/rpl_team/t/rpl_semi_sync-slave.opt'
--- a/mysql-test/suite/rpl_team/t/rpl_semi_sync-slave.opt 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl_team/t/rpl_semi_sync-slave.opt 2008-07-14 13:03:54 +0000
@@ -0,0 +1 @@
+--plugin_dir=../plugin/semisync/.libs
=== added file 'mysql-test/suite/rpl_team/t/rpl_semi_sync.test'
--- a/mysql-test/suite/rpl_team/t/rpl_semi_sync.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl_team/t/rpl_semi_sync.test 2008-07-14 13:03:54 +0000
@@ -0,0 +1,182 @@
+source include/have_innodb.inc;
+source include/master-slave.inc;
+
+let $engine_type= InnoDB;
+
+# connection slave;
+# stop slave;
+connection master;
+
+disable_query_log;
+let $value = query_get_value(show variables like 'rpl_semi_sync_enabled', Value, 1);
+if (`select '$value' = 'No such row'`)
+{
+ set sql_log_bin=0;
+ INSTALL PLUGIN rpl_semi_sync SONAME 'libsemisync_master.so';
+ set global rpl_semi_sync_timeout= 2000; /* 2s */
+ set sql_log_bin=1;
+}
+enable_query_log;
+
+# SET global rpl_semi_sync_reply_log_file_pos='123:master-bin.000001';
+
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+show variables like 'rpl_semi_sync_enabled';
+
+connection slave;
+# Restart I/O thread to make sure that semi-sync is caught up.
+stop slave;
+
+disable_query_log;
+let $value= query_get_value(show variables like 'rpl_semi_sync_slave_enabled', Value, 1);
+if (`select '$value' = 'No such row'`)
+{
+ set sql_log_bin=0;
+ INSTALL PLUGIN rpl_semi_sync_slave SONAME 'libsemisync_slave.so';
+ set sql_log_bin=1;
+}
+enable_query_log;
+
+show variables like 'rpl_semi_sync_slave_enabled';
+set global rpl_semi_sync_slave_enabled = 1;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+show status like 'Rpl_semi_sync_clients';
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+eval create table t1(n int) engine = $engine_type;
+let $i=300;
+while ($i)
+{
+ eval insert into t1 values ($i);
+ dec $i;
+}
+
+source include/show_master_status.inc;
+sync_slave_with_master;
+
+show status like 'Rpl_semi_sync_slave_status';
+
+select count(distinct n) from t1;
+select min(n) from t1;
+select max(n) from t1;
+# stop slave to let semi-sync replication fail.
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+
+# The first semi-sync check should be on because after slave stop,
+# there are no transactions on the master.
+show status like 'Rpl_semi_sync_status';
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+show status like 'Rpl_semi_sync_clients';
+
+begin;
+insert into t1 values (500);
+delete from t1 where n < 500;
+commit;
+insert into t1 values (100);
+
+# The second semi-sync check should be off because one transaction
+# times out during waiting.
+show status like 'Rpl_semi_sync_status';
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+save_master_pos;
+
+connection slave;
+show status like 'Rpl_semi_sync_slave_status';
+start slave;
+sync_with_master;
+show status like 'Rpl_semi_sync_slave_status';
+
+select count(distinct n) from t1;
+select min(n) from t1;
+select max(n) from t1;
+
+connection master;
+drop table t1;
+
+# The third semi-sync check should be on again.
+show status like 'Rpl_semi_sync_status';
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+
+sync_slave_with_master;
+stop slave;
+
+connection master;
+source include/show_master_logs.inc;
+show variables like 'rpl_semi_sync_enabled';
+
+# disable semi-sync on the fly
+set global rpl_semi_sync_enabled=0;
+show variables like 'rpl_semi_sync_enabled';
+show status like 'Rpl_semi_sync_status';
+
+# enable semi-sync on the fly
+set global rpl_semi_sync_enabled=1;
+
+connection slave;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+eval create table t1 (a int) engine = $engine_type;
+drop table t1;
+
+##show status like 'Rpl_semi_sync_status';
+
+sync_slave_with_master;
+--replace_column 2 #
+show status like 'Rpl_relay%';
+
+# Test reset master;
+connection master;
+reset master;
+show status like 'Rpl_semi_sync_status';
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+
+connection slave;
+stop slave;
+reset slave;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+eval create table t1 (a int) engine = $engine_type;
+insert into t1 values (1);
+insert into t1 values (2), (3);
+sync_slave_with_master;
+select * from t1;
+connection master;
+show status like 'Rpl_semi_sync_status';
+show status like 'Rpl_semi_sync_no_tx';
+show status like 'Rpl_semi_sync_yes_tx';
+
+# Cleanup
+drop table t1;
+sync_slave_with_master;
+
+# disable_query_log;
+# stop slave;
+# reset slave;
+# UNINSTALL PLUGIN rpl_semi_sync_slave;
+
+# connection master;
+# reset master;
+# set sql_log_bin=0;
+# disable_warnings;
+# UNINSTALL PLUGIN rpl_semi_sync;
+# enable_warnings;
+# set sql_log_bin=1;
+# enable_query_log;
=== modified file 'plugin/Makefile.am'
--- a/plugin/Makefile.am 2008-03-28 10:53:03 +0000
+++ b/plugin/Makefile.am 2008-07-14 13:03:54 +0000
@@ -22,7 +22,7 @@ AUTOMAKE_OPTIONS = foreign
EXTRA_DIST = fulltext/configure.in
SUBDIRS = @mysql_pg_dirs@
-DIST_SUBDIRS = audit_null daemon_example fulltext
+DIST_SUBDIRS = audit_null daemon_example fulltext semisync
# Don't update the files from bitkeeper
%::SCCS/s.%
=== added directory 'plugin/semisync'
=== added file 'plugin/semisync/Makefile.am'
--- a/plugin/semisync/Makefile.am 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/Makefile.am 2008-07-14 13:03:54 +0000
@@ -0,0 +1,51 @@
+# Copyright (C) 2006 MySQL AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#Makefile.am for semi-synchronous replication
+MYSQLDATAdir = $(localstatedir)
+MYSQLSHAREdir = $(pkgdatadir)
+MYSQLBASEdir= $(prefix)
+MYSQLLIBdir= $(pkglibdir)
+INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \
+ -I$(top_srcdir)/regex \
+ -I$(top_srcdir)/sql \
+ -I$(srcdir) @ZLIB_INCLUDES@
+
+EXTRA_LTLIBRARIES = libsemisync_master.la libsemisync_slave.la
+pkglib_LTLIBRARIES = @plugin_semisync_shared_target@
+
+libsemisync_master_la_LDFLAGS = -module -rpath $(MYSQLLIBdir)
+libsemisync_master_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_master_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_master_la_SOURCES = semisync.cc semisync_master.cc semisync_master_plugin.cc
+
+libsemisync_slave_la_LDFLAGS = -module -rpath $(MYSQLLIBdir)
+libsemisync_slave_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_slave_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
+libsemisync_slave_la_SOURCES = semisync.cc semisync_slave.cc semisync_slave_plugin.cc
+
+EXTRA_LIBRARIES = libsemisync_master.a libsemisync_slave.a
+noinst_LIBRARIES = @plugin_semisync_static_target@
+
+libsemisync_master_a_CXXFLAGS = $(AM_CFLAGS)
+libsemisync_master_a_CFLAGS = $(AM_CFLAGS)
+libsemisync_master_a_SOURCES= semisync.cc semisync_master.cc semisync_master_plugin.cc
+
+libsemisync_slave_a_CXXFLAGS = $(AM_CFLAGS)
+libsemisync_slave_a_CFLAGS = $(AM_CFLAGS)
+libsemisync_slave_a_SOURCES= semisync.cc semisync_slave.cc semisync_slave_plugin.cc
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
=== added file 'plugin/semisync/configure.in'
--- a/plugin/semisync/configure.in 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/configure.in 2008-07-14 13:03:54 +0000
@@ -0,0 +1,9 @@
+# configure.in for semi-synchronous replication
+
+AC_INIT(semisync, 0.1)
+AM_INIT_AUTOMAKE
+AC_DISABLE_STATIC
+AC_PROG_LIBTOOL
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
+
=== added file 'plugin/semisync/plug.in'
--- a/plugin/semisync/plug.in 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/plug.in 2008-07-14 13:03:54 +0000
@@ -0,0 +1,3 @@
+MYSQL_PLUGIN(semisync,[Semi-synchronous Replication Plugin],
+ [Semi-synchronous replication plugin.])
+MYSQL_PLUGIN_DYNAMIC(semisync, [libsemisync_master.la libsemisync_slave.la])
=== added file 'plugin/semisync/semisync.cc'
--- a/plugin/semisync/semisync.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync.cc 2008-07-14 13:03:54 +0000
@@ -0,0 +1,30 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync.h"
+
+const unsigned char ReplSemiSyncBase::kPacketMagicNum = 0xef;
+const unsigned char ReplSemiSyncBase::kPacketFlagSync = 0x01;
+
+
+const unsigned long Trace::kTraceGeneral = 0x0001;
+const unsigned long Trace::kTraceDetail = 0x0010;
+const unsigned long Trace::kTraceNetWait = 0x0020;
+const unsigned long Trace::kTraceFunction = 0x0040;
+
+const char ReplSemiSyncBase::kSyncHeader[2] =
+ {ReplSemiSyncBase::kPacketMagicNum, 0};
=== added file 'plugin/semisync/semisync.h'
--- a/plugin/semisync/semisync.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync.h 2008-07-14 13:03:54 +0000
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_H
+#define SEMISYNC_H
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <mysql.h>
+
+typedef uint32_t uint32;
+typedef unsigned long long my_off_t;
+#define FN_REFLEN 512 /* Max length of full path-name */
+void sql_print_error(const char *format, ...);
+void sql_print_warning(const char *format, ...);
+void sql_print_information(const char *format, ...);
+extern unsigned long max_connections;
+
+#define MYSQL_SERVER 1
+#include <mysql/plugin.h>
+#include <replication.h>
+
+typedef struct st_mysql_show_var SHOW_VAR;
+typedef struct st_mysql_sys_var SYS_VAR;
+
+
+/**
+ This class is used to trace function calls and other process
+ information
+*/
+class Trace {
+public:
+ static const unsigned long kTraceFunction;
+ static const unsigned long kTraceGeneral;
+ static const unsigned long kTraceDetail;
+ static const unsigned long kTraceNetWait;
+
+ unsigned long trace_level_; /* the level for tracing */
+
+ inline void function_enter(const char *func_name)
+ {
+ if (trace_level_ & kTraceFunction)
+ sql_print_information("---> %s enter", func_name);
+ }
+ inline int function_exit(const char *func_name, int exit_code)
+ {
+ if (trace_level_ & kTraceFunction)
+ sql_print_information("<--- %s exit (%d)", func_name, exit_code);
+ return exit_code;
+ }
+
+ Trace()
+ :trace_level_(0L)
+ {}
+ Trace(unsigned long trace_level)
+ :trace_level_(trace_level)
+ {}
+};
+
+/**
+ Base class for semi-sync master and slave classes
+*/
+class ReplSemiSyncBase
+ :public Trace {
+public:
+ static const char kSyncHeader[2]; /* three byte packet header */
+
+ /* Constants in network packet header. */
+ static const unsigned char kPacketMagicNum;
+ static const unsigned char kPacketFlagSync;
+};
+
+#endif /* SEMISYNC_H */
=== added file 'plugin/semisync/semisync_master.cc'
--- a/plugin/semisync/semisync_master.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master.cc 2008-07-14 13:03:54 +0000
@@ -0,0 +1,1183 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_master.h"
+
+#define TIME_THOUSAND 1000
+#define TIME_MILLION 1000000
+#define TIME_BILLION 1000000000
+
+/* This indicates whether semi-synchronous replication is enabled. */
+char rpl_semi_sync_enabled;
+unsigned long rpl_semi_sync_timeout;
+unsigned long rpl_semi_sync_trace_level;
+unsigned long rpl_semi_sync_status = 0;
+unsigned long rpl_semi_sync_yes_transactions = 0;
+unsigned long rpl_semi_sync_no_transactions = 0;
+unsigned long rpl_semi_sync_off_times = 0;
+unsigned long rpl_semi_sync_timefunc_fails = 0;
+unsigned long rpl_semi_sync_num_timeouts = 0;
+unsigned long rpl_semi_sync_wait_sessions = 0;
+unsigned long rpl_semi_sync_back_wait_pos = 0;
+unsigned long rpl_semi_sync_trx_wait_time = 0;
+unsigned long long rpl_semi_sync_trx_wait_num = 0;
+unsigned long rpl_semi_sync_net_wait_time = 0;
+unsigned long long rpl_semi_sync_net_wait_num = 0;
+unsigned long rpl_semi_sync_clients = 0;
+unsigned long long rpl_semi_sync_net_wait_total_time = 0;
+unsigned long long rpl_semi_sync_trx_wait_total_time = 0;
+
+
+static int getWaitTime(const struct timeval& start_tv);
+
+/*******************************************************************************
+ *
+ * <ActiveTranx> class : manage all active transaction nodes
+ *
+ ******************************************************************************/
+
+ActiveTranx::ActiveTranx(int max_connections,
+ pthread_mutex_t *lock,
+ unsigned long trace_level)
+ : Trace(trace_level), num_transactions_(max_connections),
+ num_entries_(max_connections << 1),
+ lock_(lock)
+{
+ /* Allocate the memory for the array */
+ node_array_ = new TranxNode[num_transactions_];
+ for (int idx = 0; idx < num_transactions_; ++idx)
+ {
+ node_array_[idx].log_pos_ = 0;
+ node_array_[idx].hash_next_ = NULL;
+ node_array_[idx].next_ = node_array_ + idx + 1;
+
+ node_array_[idx].log_name_ = new char[FN_REFLEN];
+ node_array_[idx].log_name_[0] = '\x0';
+ }
+ node_array_[num_transactions_-1].next_ = NULL;
+
+ /* All nodes in the array go to the pool initially. */
+ free_pool_ = node_array_;
+
+ /* No transactions are in the list initially. */
+ trx_front_ = NULL;
+ trx_rear_ = NULL;
+
+ /* Create the hash table to find a transaction's ending event. */
+ trx_htb_ = new TranxNode *[num_entries_];
+ for (int idx = 0; idx < num_entries_; ++idx)
+ trx_htb_[idx] = NULL;
+
+ sql_print_information("Semi-sync replication initialized for %d "
+ "transactions.", num_transactions_);
+}
+
+ActiveTranx::~ActiveTranx()
+{
+ for (int idx = 0; idx < num_transactions_; ++idx)
+ {
+ delete node_array_[idx].log_name_;
+ node_array_[idx].log_name_ = NULL;
+ }
+
+ delete [] node_array_;
+ delete [] trx_htb_;
+
+ node_array_ = NULL;
+ trx_htb_ = NULL;
+ num_transactions_ = 0;
+ num_entries_ = 0;
+}
+
+unsigned int ActiveTranx::calc_hash(const unsigned char *key,
+ unsigned int length)
+{
+ unsigned int nr = 1, nr2 = 4;
+
+ /* The hash implementation comes from calc_hashnr() in mysys/hash.c. */
+ while (length--)
+ {
+ nr ^= (((nr & 63)+nr2)*((unsigned int) (unsigned char) *key++))+ (nr << 8);
+ nr2 += 3;
+ }
+ return((unsigned int) nr);
+}
+
+unsigned int ActiveTranx::get_hash_value(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ unsigned int hash1 = calc_hash((const unsigned char *)log_file_name,
+ strlen(log_file_name));
+ unsigned int hash2 = calc_hash((const unsigned char *)(&log_file_pos),
+ sizeof(log_file_pos));
+
+ return (hash1 + hash2) % num_entries_;
+}
+
+ActiveTranx::TranxNode* ActiveTranx::alloc_tranx_node()
+{
+ TranxNode *ptr = free_pool_;
+
+ if (free_pool_)
+ {
+ free_pool_ = free_pool_->next_;
+ ptr->next_ = NULL;
+ ptr->hash_next_ = NULL;
+ }
+
+ return ptr;
+}
+
+int ActiveTranx::compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const char *log_file_name2, my_off_t log_file_pos2)
+{
+ int cmp = strcmp(log_file_name1, log_file_name2);
+
+ if (cmp != 0)
+ return cmp;
+
+ if (log_file_pos1 > log_file_pos2)
+ return 1;
+ else if (log_file_pos1 < log_file_pos2)
+ return -1;
+ return 0;
+}
+
+int ActiveTranx::insert_tranx_node(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx:insert_tranx_node";
+ TranxNode *ins_node;
+ int result = 0;
+ unsigned int hash_val;
+
+ function_enter(kWho);
+
+ ins_node = alloc_tranx_node();
+ if (!ins_node)
+ {
+ sql_print_error("%s: transaction node allocation failed for: (%s, %lu)",
+ kWho, log_file_name, (unsigned long)log_file_pos);
+ result = -1;
+ goto l_end;
+ }
+
+ /* insert the binlog position in the active transaction list. */
+ strcpy(ins_node->log_name_, log_file_name);
+ ins_node->log_pos_ = log_file_pos;
+
+ if (!trx_front_)
+ {
+ /* The list is empty. */
+ trx_front_ = trx_rear_ = ins_node;
+ }
+ else
+ {
+ int cmp = compare(ins_node, trx_rear_);
+ if (cmp > 0)
+ {
+ /* Compare with the tail first. If the transaction happens later in
+ * binlog, then make it the new tail.
+ */
+ trx_rear_->next_ = ins_node;
+ trx_rear_ = ins_node;
+ }
+ else
+ {
+ /* Otherwise, it is an error because the transaction should hold the
+ * mysql_bin_log.LOCK_log when appending events.
+ */
+ sql_print_error("%s: binlog write out-of-order, tail (%s, %lu), "
+ "new node (%s, %lu)", kWho,
+ trx_rear_->log_name_, (unsigned long)trx_rear_->log_pos_,
+ ins_node->log_name_, (unsigned long)ins_node->log_pos_);
+ result = -1;
+ goto l_end;
+ }
+ }
+
+ hash_val = get_hash_value(ins_node->log_name_, ins_node->log_pos_);
+ ins_node->hash_next_ = trx_htb_[hash_val];
+ trx_htb_[hash_val] = ins_node;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: insert (%s, %lu) in entry(%u)", kWho,
+ ins_node->log_name_, (unsigned long)ins_node->log_pos_,
+ hash_val);
+
+ l_end:
+ return function_exit(kWho, result);
+}
+
+bool ActiveTranx::is_tranx_end_pos(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx::is_tranx_end_pos";
+ function_enter(kWho);
+
+ unsigned int hash_val = get_hash_value(log_file_name, log_file_pos);
+ TranxNode *entry = trx_htb_[hash_val];
+
+ while (entry != NULL)
+ {
+ if (compare(entry, log_file_name, log_file_pos) == 0)
+ break;
+
+ entry = entry->hash_next_;
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: probe (%s, %lu) in entry(%u)", kWho,
+ log_file_name, (unsigned long)log_file_pos, hash_val);
+
+ function_exit(kWho, (entry != NULL));
+ return (entry != NULL);
+}
+
+int ActiveTranx::clear_active_tranx_nodes(const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ActiveTranx::::clear_active_tranx_nodes";
+ TranxNode *new_front;
+
+ function_enter(kWho);
+
+ if (log_file_name != NULL)
+ {
+ new_front = trx_front_;
+
+ while (new_front)
+ {
+ if (compare(new_front, log_file_name, log_file_pos) > 0)
+ break;
+ new_front = new_front->next_;
+ }
+ }
+ else
+ {
+ /* If log_file_name is NULL, clear everything. */
+ new_front = NULL;
+ }
+
+ if (new_front == NULL)
+ {
+ /* No active transaction nodes after the call. */
+
+ /* Clear the hash table. */
+ memset(trx_htb_, 0, num_entries_ * sizeof(TranxNode *));
+
+ /* Clear the active transaction list. */
+ if (trx_front_ != NULL)
+ {
+ trx_rear_->next_ = free_pool_;
+ free_pool_ = trx_front_;
+ trx_front_ = NULL;
+ trx_rear_ = NULL;
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: free all nodes back to free list", kWho);
+ }
+ else if (new_front != trx_front_)
+ {
+ TranxNode *curr_node, *next_node;
+
+ /* Delete all transaction nodes before the confirmation point. */
+ int n_frees = 0;
+ curr_node = trx_front_;
+ while (curr_node != new_front)
+ {
+ next_node = curr_node->next_;
+
+ /* Put the node in the memory pool. */
+ curr_node->next_ = free_pool_;
+ free_pool_ = curr_node;
+ n_frees++;
+
+ /* Remove the node from the hash table. */
+ unsigned int hash_val = get_hash_value(curr_node->log_name_, curr_node->log_pos_);
+ TranxNode **hash_ptr = &(trx_htb_[hash_val]);
+ while ((*hash_ptr) != NULL)
+ {
+ if ((*hash_ptr) == curr_node)
+ {
+ (*hash_ptr) = curr_node->hash_next_;
+ break;
+ }
+ hash_ptr = &((*hash_ptr)->hash_next_);
+ }
+
+ curr_node = next_node;
+ }
+
+ trx_front_ = new_front;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: free %d nodes back until pos (%s, %lu)",
+ kWho, n_frees,
+ trx_front_->log_name_, (unsigned long)trx_front_->log_pos_);
+ }
+
+ return function_exit(kWho, 0);
+}
+
+
+/*******************************************************************************
+ *
+ * <ReplSemiSyncMaster> class: the basic code layer for sync-replication master.
+ * <ReplSemiSyncSlave> class: the basic code layer for sync-replication slave.
+ *
+ * The most important functions during semi-syn replication listed:
+ *
+ * Master:
+ * . reportReplyBinlog(): called by the binlog dump thread when it receives
+ * the slave's status information.
+ * . updateSyncHeader(): based on transaction waiting information, decide
+ * whether to request the slave to reply.
+ * . readSlaveReply(): read the slave's sync reply and decide how to
+ * resume the waiting transaction threads.
+ * . writeTraxInBinlog(): called by the transaction thread when it finishes
+ * writing all transaction events in binlog.
+ * . commitTrx(): transaction thread wait for the slave reply.
+ *
+ * Slave:
+ * . slaveReadSyncHeader(): read the semi-sync header from the master, get the
+ * sync status and get the payload for events.
+ * . slaveReply(): reply to the master about the replication progress.
+ *
+ ******************************************************************************/
+
+ReplSemiSyncMaster::ReplSemiSyncMaster()
+ : active_tranxs_(NULL),
+ init_done_(false),
+ reply_file_name_inited_(false),
+ reply_file_pos_(0L),
+ wait_file_name_inited_(false),
+ wait_file_pos_(0),
+ master_enabled_(false),
+ wait_timeout_(0L),
+ state_(0),
+ enabled_transactions_(0),
+ disabled_transactions_(0),
+ switched_off_times_(0),
+ timefunc_fails_(0),
+ wait_sessions_(0),
+ wait_backtraverse_(0),
+ total_trx_wait_num_(0),
+ total_trx_wait_time_(0),
+ total_net_wait_num_(0),
+ total_net_wait_time_(0),
+ max_transactions_(0L)
+{
+ strcpy(reply_file_name_, "");
+ strcpy(wait_file_name_, "");
+}
+
+int ReplSemiSyncMaster::initObject()
+{
+ int result;
+ const char *kWho = "ReplSemiSyncMaster::initObject";
+
+ if (init_done_)
+ {
+ fprintf(stderr, "%s called twice\n", kWho);
+ return 1;
+ }
+ init_done_ = true;
+
+ /* References to the parameter works after set_options(). */
+ setWaitTimeout(rpl_semi_sync_timeout);
+ setTraceLevel(rpl_semi_sync_trace_level);
+ max_transactions_ = (int)max_connections;
+
+ /* Mutex initialization can only be done after MY_INIT(). */
+ pthread_mutexattr_t mutexattr;
+ pthread_mutexattr_init(&mutexattr);
+ pthread_mutexattr_settype(&mutexattr,
+ PTHREAD_MUTEX_FAST_NP);
+// pthread_mutex_init(&LOCK_binlog_, MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&LOCK_binlog_, &mutexattr);
+ pthread_cond_init(&COND_binlog_send_, NULL);
+
+ if (rpl_semi_sync_enabled)
+ result = enableMaster();
+ else
+ result = disableMaster();
+
+ return result;
+}
+
+int ReplSemiSyncMaster::enableMaster()
+{
+ int result = 0;
+
+ /* Must have the lock when we do enable of disable. */
+ lock();
+
+ if (!getMasterEnabled())
+ {
+ active_tranxs_ = new ActiveTranx(max_connections,
+ &LOCK_binlog_,
+ trace_level_);
+ if (active_tranxs_ != NULL)
+ {
+ commit_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ wait_file_name_inited_ = false;
+
+ set_master_enabled(true);
+ sql_print_information("Semi-sync replication enabled on the master.");
+ }
+ else
+ {
+ sql_print_information("Semi-sync replication not able to allocate memory.");
+ result = -1;
+ }
+ }
+
+ unlock();
+
+ return result;
+}
+
+int ReplSemiSyncMaster::disableMaster()
+{
+ /* Must have the lock when we do enable of disable. */
+ lock();
+
+ if (getMasterEnabled())
+ {
+ /* Switch off the semi-sync first so that waiting transaction will be
+ * waken up.
+ */
+ switch_off();
+
+ assert(active_tranxs_ != NULL);
+ delete active_tranxs_;
+ active_tranxs_ = NULL;
+
+ reply_file_name_inited_ = false;
+ wait_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+
+ set_master_enabled(false);
+ sql_print_information("Semi-sync replication disabled on the master.");
+ }
+
+ unlock();
+
+ return 0;
+}
+
+ReplSemiSyncMaster::~ReplSemiSyncMaster()
+{
+ if (init_done_)
+ {
+ pthread_mutex_destroy(&LOCK_binlog_);
+ pthread_cond_destroy(&COND_binlog_send_);
+ }
+
+ delete active_tranxs_;
+}
+
+void ReplSemiSyncMaster::lock()
+{
+ pthread_mutex_lock(&LOCK_binlog_);
+}
+
+void ReplSemiSyncMaster::unlock()
+{
+ pthread_mutex_unlock(&LOCK_binlog_);
+}
+
+void ReplSemiSyncMaster::cond_broadcast()
+{
+ pthread_cond_broadcast(&COND_binlog_send_);
+}
+
+int ReplSemiSyncMaster::cond_timewait(struct timespec *wait_time)
+{
+ const char *kWho = "ReplSemiSyncMaster::cond_timewait()";
+ int wait_res;
+
+ function_enter(kWho);
+ wait_res = pthread_cond_timedwait(&COND_binlog_send_,
+ &LOCK_binlog_, wait_time);
+ return function_exit(kWho, wait_res);
+}
+
+void ReplSemiSyncMaster::add_slave()
+{
+ lock();
+ rpl_semi_sync_clients++;
+ unlock();
+}
+
+void ReplSemiSyncMaster::remove_slave()
+{
+ lock();
+ rpl_semi_sync_clients--;
+ unlock();
+}
+
+bool ReplSemiSyncMaster::is_semi_sync_slave()
+{
+ int null_value;
+ long long val= 0;
+ get_user_var_int("rpl_semi_sync_slave", &val, &null_value);
+ return val;
+}
+
+int ReplSemiSyncMaster::reportReplyBinlog(const char *log_file_pos)
+{
+ char log_name[FN_REFLEN];
+ char *endptr;
+ my_off_t log_pos= strtoull(log_file_pos, &endptr, 10);
+ if (!log_pos || !endptr || *endptr != ':' )
+ return 1;
+ endptr++; // skip the ':' seperator
+ strncpy(log_name, endptr, FN_REFLEN);
+ uint32 server_id= 0;
+ return reportReplyBinlog(server_id, log_name, log_pos);
+}
+
+int ReplSemiSyncMaster::reportReplyBinlog(uint32 server_id,
+ const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::reportReplyBinlog";
+ int cmp;
+ bool can_release_threads = false;
+ bool need_copy_send_pos = true;
+
+ if (!(getMasterEnabled()))
+ return 0;
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ goto l_end;
+
+ if (!is_on())
+ /* We check to see whether we can switch semi-sync ON. */
+ try_switch_on(server_id, log_file_name, log_file_pos);
+
+ /* The position should increase monotonically, if there is only one
+ * thread sending the binlog to the slave.
+ * In reality, to improve the transaction availability, we allow multiple
+ * sync replication slaves. So, if any one of them get the transaction,
+ * the transaction session in the primary can move forward.
+ */
+ if (reply_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ reply_file_name_, reply_file_pos_);
+
+ /* If the requested position is behind the sending binlog position,
+ * would not adjust sending binlog position.
+ * We based on the assumption that there are multiple semi-sync slave,
+ * and at least one of them shou/ld be up to date.
+ * If all semi-sync slaves are behind, at least initially, the primary
+ * can find the situation after the waiting timeout. After that, some
+ * slaves should catch up quickly.
+ */
+ if (cmp < 0)
+ {
+ /* If the position is behind, do not copy it. */
+ need_copy_send_pos = false;
+ }
+ }
+
+ if (need_copy_send_pos)
+ {
+ strcpy(reply_file_name_, log_file_name);
+ reply_file_pos_ = log_file_pos;
+ reply_file_name_inited_ = true;
+
+ /* Remove all active transaction nodes before this point. */
+ assert(active_tranxs_ != NULL);
+ active_tranxs_->clear_active_tranx_nodes(log_file_name, log_file_pos);
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: Got reply at (%s, %lu)", kWho,
+ log_file_name, (unsigned long)log_file_pos);
+ }
+
+ if (wait_sessions_ > 0)
+ {
+ /* Let us check if some of the waiting threads doing a trx
+ * commit can now proceed.
+ */
+ cmp = ActiveTranx::compare(reply_file_name_, reply_file_pos_,
+ wait_file_name_, wait_file_pos_);
+ if (cmp >= 0)
+ {
+ /* Yes, at least one waiting thread can now proceed:
+ * let us release all waiting threads with a broadcast
+ */
+ can_release_threads = true;
+ wait_file_name_inited_ = false;
+ }
+ }
+
+ l_end:
+ unlock();
+
+ if (can_release_threads)
+ {
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: signal all waiting threads.", kWho);
+
+ cond_broadcast();
+ }
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::commitTrx(const char* trx_wait_binlog_name,
+ my_off_t trx_wait_binlog_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::commitTrx";
+
+ function_enter(kWho);
+
+ if (getMasterEnabled() && trx_wait_binlog_name)
+ {
+ struct timeval start_tv;
+ struct timespec abstime;
+ int wait_result, start_time_err;
+
+ start_time_err = gettimeofday(&start_tv, 0);
+
+ /* Acquire the mutex. */
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ goto l_end;
+
+ if (trace_level_ & kTraceDetail)
+ {
+ sql_print_information("%s: wait pos (%s, %lu), repl(%d)\n", kWho,
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos,
+ (int)is_on());
+ }
+
+ while (is_on())
+ {
+ int cmp = ActiveTranx::compare(reply_file_name_, reply_file_pos_,
+ trx_wait_binlog_name, trx_wait_binlog_pos);
+ if (cmp >= 0)
+ {
+ /* We have already sent the relevant binlog to the slave: no need to
+ * wait here.
+ */
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: Binlog reply is ahead (%s, %lu),",
+ kWho, reply_file_name_, (unsigned long)reply_file_pos_);
+ break;
+ }
+
+ /* Let us update the info about the minimum binlog position of waiting
+ * threads.
+ */
+ if (wait_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(trx_wait_binlog_name, trx_wait_binlog_pos,
+ wait_file_name_, wait_file_pos_);
+ if (cmp <= 0)
+ {
+ /* This thd has a lower position, let's update the minimum info. */
+ strcpy(wait_file_name_, trx_wait_binlog_name);
+ wait_file_pos_ = trx_wait_binlog_pos;
+
+ wait_backtraverse_++;
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: move back wait position (%s, %lu),",
+ kWho, wait_file_name_, (unsigned long)wait_file_pos_);
+ }
+ }
+ else
+ {
+ strcpy(wait_file_name_, trx_wait_binlog_name);
+ wait_file_pos_ = trx_wait_binlog_pos;
+ wait_file_name_inited_ = true;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: init wait position (%s, %lu),",
+ kWho, wait_file_name_, (unsigned long)wait_file_pos_);
+ }
+
+ if (start_time_err == 0)
+ {
+ int diff_usecs = start_tv.tv_usec + wait_timeout_ * TIME_THOUSAND;
+
+ /* Calcuate the waiting period. */
+ abstime.tv_sec = start_tv.tv_sec;
+ if (diff_usecs < TIME_MILLION)
+ {
+ abstime.tv_nsec = diff_usecs * TIME_THOUSAND;
+ }
+ else
+ {
+ while (diff_usecs >= TIME_MILLION)
+ {
+ abstime.tv_sec++;
+ diff_usecs -= TIME_MILLION;
+ }
+ abstime.tv_nsec = diff_usecs * TIME_THOUSAND;
+ }
+
+ /* In semi-synchronous replication, we wait until the binlog-dump
+ * thread has received the reply on the relevant binlog segment from the
+ * replication slave.
+ *
+ * Let us suspend this thread to wait on the condition;
+ * when replication has progressed far enough, we will release
+ * these waiting threads.
+ */
+ wait_sessions_++;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: wait %lu ms for binlog sent (%s, %lu)",
+ kWho, wait_timeout_,
+ wait_file_name_, (unsigned long)wait_file_pos_);
+
+ wait_result = cond_timewait(&abstime);
+ wait_sessions_--;
+
+ if (wait_result != 0)
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a real wait timeout. */
+ sql_print_warning("Replication semi-sync not sent binlog to "
+ "slave within the timeout %lu ms - OFF.",
+ wait_timeout_);
+ sql_print_warning(" semi-sync up to file %s, position %lu",
+ reply_file_name_, (unsigned long)reply_file_pos_);
+ sql_print_warning(" transaction needs file %s, position %lu",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ total_wait_timeouts_++;
+
+ /* switch semi-sync off */
+ switch_off();
+ }
+ else
+ {
+ int wait_time;
+
+ wait_time = getWaitTime(start_tv);
+ if (wait_time < 0)
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a time/gettimeofday function call error. */
+ sql_print_error("Replication semi-sync gettimeofday fail1 at "
+ "wait position (%s, %lu)",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ timefunc_fails_++;
+ }
+ else
+ {
+ total_trx_wait_num_++;
+ total_trx_wait_time_ += wait_time;
+ }
+ }
+ }
+ else
+ {
+ if (trace_level_ & kTraceGeneral)
+ {
+ /* This is a gettimeofday function call error. */
+ sql_print_error("Replication semi-sync gettimeofday fail2 at "
+ "wait position (%s, %lu)",
+ trx_wait_binlog_name, (unsigned long)trx_wait_binlog_pos);
+ }
+ timefunc_fails_++;
+
+ /* switch semi-sync off */
+ switch_off();
+ }
+ }
+
+ l_end:
+ /* Update the status counter. */
+ if (is_on())
+ enabled_transactions_++;
+ else
+ disabled_transactions_++;
+
+ unlock();
+ }
+
+ return function_exit(kWho, 0);
+}
+
+/* Indicate that semi-sync replication is OFF now.
+ *
+ * What should we do when it is disabled? The problem is that we want
+ * the semi-sync replication enabled again when the slave catches up
+ * later. But, it is not that easy to detect that the slave has caught
+ * up. This is caused by the fact that MySQL's replication protocol is
+ * asynchronous, meaning that if the master does not use the semi-sync
+ * protocol, the slave would not send anything to the master.
+ * Still, if the master is sending (N+1)-th event, we assume that it is
+ * an indicator that the slave has received N-th event and earlier ones.
+ *
+ * If semi-sync is disabled, all transactions still update the wait
+ * position with the last position in binlog. But no transactions will
+ * wait for confirmations and the active transaction list would not be
+ * maintained. In binlog dump thread, updateSyncHeader() checks whether
+ * the current sending event catches up with last wait position. If it
+ * does match, semi-sync will be switched on again.
+ */
+int ReplSemiSyncMaster::switch_off()
+{
+ const char *kWho = "ReplSemiSyncMaster::switch_off";
+ int result;
+
+ function_enter(kWho);
+ state_ = false;
+
+ /* Clear the active transaction list. */
+ assert(active_tranxs_ != NULL);
+ result = active_tranxs_->clear_active_tranx_nodes(NULL, 0);
+
+ switched_off_times_++;
+ wait_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+ cond_broadcast(); /* wake up all waiting threads */
+
+ return function_exit(kWho, result);
+}
+
+int ReplSemiSyncMaster::try_switch_on(int server_id,
+ const char *log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::try_switch_on";
+ bool semi_sync_on = false;
+
+ function_enter(kWho);
+
+ /* If the current sending event's position is larger than or equal to the
+ * 'largest' commit transaction binlog position, the slave is already
+ * catching up now and we can switch semi-sync on here.
+ * If commit_file_name_inited_ indicates there are no recent transactions,
+ * we can enable semi-sync immediately.
+ */
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ semi_sync_on = (cmp >= 0);
+ }
+ else
+ {
+ semi_sync_on = true;
+ }
+
+ if (semi_sync_on)
+ {
+ /* Switch semi-sync replication on. */
+ state_ = true;
+
+ if (trace_level_ & kTraceGeneral)
+ sql_print_information("%s switch semi-sync ON with server(%d) "
+ "at (%s, %lu), repl(%d)",
+ kWho, server_id, log_file_name,
+ (unsigned long)log_file_pos, (int)is_on());
+ }
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::reserveSyncHeader(unsigned char *header,
+ unsigned long size)
+{
+ const char *kWho = "ReplSemiSyncMaster::reserveSyncHeader";
+ function_enter(kWho);
+
+ int hlen=0;
+ if (!is_semi_sync_slave())
+ {
+ hlen= 0;
+ }
+ else
+ {
+ /* No enough space for the extra header, disable semi-sync master */
+ if (sizeof(kSyncHeader) > size)
+ {
+ disableMaster();
+ return 0;
+ }
+
+ /* Set the magic number and the sync status. By default, no sync
+ * is required.
+ */
+ memcpy(header, kSyncHeader, sizeof(kSyncHeader));
+ hlen= sizeof(kSyncHeader);
+ }
+ return function_exit(kWho, hlen);
+}
+
+int ReplSemiSyncMaster::updateSyncHeader(unsigned char *packet,
+ const char *log_file_name,
+ my_off_t log_file_pos,
+ uint32 server_id)
+{
+ const char *kWho = "ReplSemiSyncMaster::updateSyncHeader";
+ int cmp = 0;
+ bool sync = false;
+
+ /* If the semi-sync master is not enabled, or the slave is not a semi-sync
+ * target, do not request replies from the slave.
+ */
+ if (!getMasterEnabled() || !is_semi_sync_slave())
+ {
+ sync = false;
+ return 0;
+ }
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ {
+ sync = false;
+ goto l_end;
+ }
+
+ if (is_on())
+ {
+ /* semi-sync is ON */
+ sync = false; /* No sync unless a transaction is involved. */
+
+ if (reply_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ reply_file_name_, reply_file_pos_);
+ if (cmp <= 0)
+ {
+ /* If we have already got the reply for the event, then we do
+ * not need to sync the transaction again.
+ */
+ goto l_end;
+ }
+ }
+
+ if (wait_file_name_inited_)
+ {
+ cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ wait_file_name_, wait_file_pos_);
+ }
+ else
+ {
+ cmp = 1;
+ }
+
+ /* If we are already waiting for some transaction replies which
+ * are later in binlog, do not wait for this one event.
+ */
+ if (cmp >= 0)
+ {
+ /*
+ * We only wait if the event is a transaction's ending event.
+ */
+ assert(active_tranxs_ != NULL);
+ sync = active_tranxs_->is_tranx_end_pos(log_file_name,
+ log_file_pos);
+ }
+ }
+ else
+ {
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ sync = (cmp >= 0);
+ }
+ else
+ {
+ sync = true;
+ }
+ }
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: server(%d), (%s, %lu) sync(%d), repl(%d)",
+ kWho, server_id, log_file_name,
+ (unsigned long)log_file_pos, sync, (int)is_on());
+
+ l_end:
+ unlock();
+
+ /* We do not need to clear sync flag because we set it to 0 when we
+ * reserve the packet header.
+ */
+ if (sync)
+ (packet)[2] = kPacketFlagSync;
+
+ return function_exit(kWho, 0);
+}
+
+int ReplSemiSyncMaster::writeTranxInBinlog(const char* log_file_name,
+ my_off_t log_file_pos)
+{
+ const char *kWho = "ReplSemiSyncMaster::writeTranxInBinlog";
+ int result = 0;
+
+ function_enter(kWho);
+
+ lock();
+
+ /* This is the real check inside the mutex. */
+ if (!getMasterEnabled())
+ goto l_end;
+
+ /* Update the 'largest' transaction commit position seen so far even
+ * though semi-sync is switched off.
+ * It is much better that we update commit_file_* here, instead of
+ * inside commitTrx(). This is mostly because updateSyncHeader()
+ * will watch for commit_file_* to decide whether to switch semi-sync
+ * on. The detailed reason is explained in function updateSyncHeader().
+ */
+ if (commit_file_name_inited_)
+ {
+ int cmp = ActiveTranx::compare(log_file_name, log_file_pos,
+ commit_file_name_, commit_file_pos_);
+ if (cmp > 0)
+ {
+ /* This is a larger position, let's update the maximum info. */
+ strcpy(commit_file_name_, log_file_name);
+ commit_file_pos_ = log_file_pos;
+ }
+ }
+ else
+ {
+ strcpy(commit_file_name_, log_file_name);
+ commit_file_pos_ = log_file_pos;
+ commit_file_name_inited_ = true;
+ }
+
+ if (is_on())
+ {
+ assert(active_tranxs_ != NULL);
+ result = active_tranxs_->insert_tranx_node(log_file_name, log_file_pos);
+ }
+
+ l_end:
+ unlock();
+
+ return function_exit(kWho, result);
+}
+
+int ReplSemiSyncMaster::resetMaster()
+{
+ const char *kWho = "ReplSemiSyncMaster::resetMaster";
+ int result = 0;
+
+ function_enter(kWho);
+
+
+ lock();
+
+ state_ = getMasterEnabled()? 1 : 0;
+
+ wait_file_name_inited_ = false;
+ reply_file_name_inited_ = false;
+ commit_file_name_inited_ = false;
+
+ enabled_transactions_ = 0;
+ disabled_transactions_ = 0;
+ switched_off_times_ = 0;
+ timefunc_fails_ = 0;
+ wait_sessions_ = 0;
+ wait_backtraverse_ = 0;
+ total_trx_wait_num_ = 0;
+ total_trx_wait_time_ = 0;
+ total_net_wait_num_ = 0;
+ total_net_wait_time_ = 0;
+
+ unlock();
+
+ return function_exit(kWho, result);
+}
+
+void ReplSemiSyncMaster::setExportStats()
+{
+ lock();
+
+ rpl_semi_sync_status = state_ ? 1 : 0;
+ rpl_semi_sync_yes_transactions = enabled_transactions_;
+ rpl_semi_sync_no_transactions = disabled_transactions_;
+ rpl_semi_sync_off_times = switched_off_times_;
+ rpl_semi_sync_timefunc_fails = timefunc_fails_;
+ rpl_semi_sync_num_timeouts = total_wait_timeouts_;
+ rpl_semi_sync_wait_sessions = wait_sessions_;
+ rpl_semi_sync_back_wait_pos = wait_backtraverse_;
+ rpl_semi_sync_trx_wait_num = total_trx_wait_num_;
+ rpl_semi_sync_trx_wait_time =
+ ((total_trx_wait_num_) ?
+ (unsigned long)((double)total_trx_wait_time_ /
+ ((double)total_trx_wait_num_)) : 0);
+ rpl_semi_sync_net_wait_num = total_net_wait_num_;
+ rpl_semi_sync_net_wait_time =
+ ((total_net_wait_num_) ?
+ (unsigned long)((double)total_net_wait_time_ /
+ ((double)total_net_wait_num_)) : 0);
+
+ rpl_semi_sync_net_wait_total_time = total_net_wait_time_;
+ rpl_semi_sync_trx_wait_total_time = total_trx_wait_time_;
+
+ unlock();
+}
+
+/* Get the waiting time given the wait's staring time.
+ *
+ * Return:
+ * >= 0: the waiting time in microsecons(us)
+ * < 0: error in gettimeofday or time back traverse
+ */
+static int getWaitTime(const struct timeval& start_tv)
+{
+ unsigned long long start_usecs, end_usecs;
+ struct timeval end_tv;
+ int end_time_err;
+
+ /* Starting time in microseconds(us). */
+ start_usecs = start_tv.tv_sec * TIME_MILLION + start_tv.tv_usec;
+
+ /* Get the wait time interval. */
+ end_time_err = gettimeofday(&end_tv, 0);
+
+ /* Ending time in microseconds(us). */
+ end_usecs = end_tv.tv_sec * TIME_MILLION + end_tv.tv_usec;
+
+ if (end_time_err != 0 || end_usecs < start_usecs)
+ return -1;
+
+ return (int)(end_usecs - start_usecs);
+}
=== added file 'plugin/semisync/semisync_master.h'
--- a/plugin/semisync/semisync_master.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master.h 2008-07-14 13:03:54 +0000
@@ -0,0 +1,382 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_MASTER_H
+#define SEMISYNC_MASTER_H
+
+#include "semisync.h"
+
+/**
+ This class manages memory for active transaction list.
+
+ We record each active transaction with a TranxNode. Because each
+ session can only have only one open transaction, the total active
+ transaction nodes can not exceed the maximum sessions. Currently
+ in MySQL, sessions are the same as connections.
+*/
+class ActiveTranx
+ :public Trace {
+private:
+ struct TranxNode {
+ char *log_name_;
+ my_off_t log_pos_;
+ struct TranxNode *next_; /* the next node in the sorted list */
+ struct TranxNode *hash_next_; /* the next node during hash collision */
+ };
+
+ /* The following data structure maintains an active transaction list. */
+ TranxNode *node_array_;
+ TranxNode *free_pool_;
+
+ /* These two record the active transaction list in sort order. */
+ TranxNode *trx_front_, *trx_rear_;
+
+ TranxNode **trx_htb_; /* A hash table on active transactions. */
+
+ int num_transactions_; /* maximum transactions */
+ int num_entries_; /* maximum hash table entries */
+ pthread_mutex_t *lock_; /* mutex lock */
+
+ inline void assert_lock_owner();
+
+ inline TranxNode* alloc_tranx_node();
+
+ inline unsigned int calc_hash(const unsigned char *key,unsigned int length);
+ unsigned int get_hash_value(const char *log_file_name, my_off_t log_file_pos);
+
+ int compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const TranxNode *node2) {
+ return compare(log_file_name1, log_file_pos1,
+ node2->log_name_, node2->log_pos_);
+ }
+ int compare(const TranxNode *node1,
+ const char *log_file_name2, my_off_t log_file_pos2) {
+ return compare(node1->log_name_, node1->log_pos_,
+ log_file_name2, log_file_pos2);
+ }
+ int compare(const TranxNode *node1, const TranxNode *node2) {
+ return compare(node1->log_name_, node1->log_pos_,
+ node2->log_name_, node2->log_pos_);
+ }
+
+public:
+ ActiveTranx(int max_connections, pthread_mutex_t *lock,
+ unsigned long trace_level);
+ ~ActiveTranx();
+
+ /* Insert an active transaction node with the specified position.
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int insert_tranx_node(const char *log_file_name, my_off_t log_file_pos);
+
+ /* Clear the active transaction nodes until(inclusive) the specified
+ * position.
+ * If log_file_name is NULL, everything will be cleared: the sorted
+ * list and the hash table will be reset to empty.
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int clear_active_tranx_nodes(const char *log_file_name,
+ my_off_t log_file_pos);
+
+ /* Given a position, check to see whether the position is an active
+ * transaction's ending position by probing the hash table.
+ */
+ bool is_tranx_end_pos(const char *log_file_name, my_off_t log_file_pos);
+
+ /* Given two binlog positions, compare which one is bigger based on
+ * (file_name, file_position).
+ */
+ static int compare(const char *log_file_name1, my_off_t log_file_pos1,
+ const char *log_file_name2, my_off_t log_file_pos2);
+
+};
+
+/**
+ The extension class for the master of semi-synchronous replication
+*/
+class ReplSemiSyncMaster
+ :public ReplSemiSyncBase {
+ private:
+ ActiveTranx *active_tranxs_; /* active transaction list: the list will
+ be cleared when semi-sync switches off. */
+
+ /* True when initObject has been called */
+ bool init_done_;
+
+ /* This cond variable is signaled when enough binlog has been sent to slave,
+ * so that a waiting trx can return the 'ok' to the client for a commit.
+ */
+ pthread_cond_t COND_binlog_send_;
+
+ /* Mutex that protects the following state variables and the active
+ * transaction list.
+ * Under no cirumstances we can acquire mysql_bin_log.LOCK_log if we are
+ * already holding LOCK_binlog_ because it can cause deadlocks.
+ */
+ pthread_mutex_t LOCK_binlog_;
+
+ /* This is set to true when reply_file_name_ contains meaningful data. */
+ bool reply_file_name_inited_;
+
+ /* The binlog name up to which we have received replies from any slaves. */
+ char reply_file_name_[FN_REFLEN];
+
+ /* The position in that file up to which we have the reply from any slaves. */
+ my_off_t reply_file_pos_;
+
+ /* This is set to true when we know the 'smallest' wait position. */
+ bool wait_file_name_inited_;
+
+ /* NULL, or the 'smallest' filename that a transaction is waiting for
+ * slave replies.
+ */
+ char wait_file_name_[FN_REFLEN];
+
+ /* The smallest position in that file that a trx is waiting for: the trx
+ * can proceed and send an 'ok' to the client when the master has got the
+ * reply from the slave indicating that it already got the binlog events.
+ */
+ my_off_t wait_file_pos_;
+
+ /* This is set to true when we know the 'largest' transaction commit
+ * position in the binlog file.
+ * We always maintain the position no matter whether semi-sync is switched
+ * on switched off. When a transaction wait timeout occurs, semi-sync will
+ * switch off. Binlog-dump thread can use the three fields to detect when
+ * slaves catch up on replication so that semi-sync can switch on again.
+ */
+ bool commit_file_name_inited_;
+
+ /* The 'largest' binlog filename that a commit transaction is seeing. */
+ char commit_file_name_[FN_REFLEN];
+
+ /* The 'largest' position in that file that a commit transaction is seeing. */
+ my_off_t commit_file_pos_;
+
+ /* All global variables which can be set by parameters. */
+ bool master_enabled_; /* semi-sync is enabled on the master */
+ unsigned long wait_timeout_; /* timeout period(ms) during tranx wait */
+
+ /* All status variables. */
+ bool state_; /* whether semi-sync is switched */
+ unsigned long enabled_transactions_; /* semi-sync'ed tansactions */
+ unsigned long disabled_transactions_; /* non-semi-sync'ed tansactions */
+ unsigned long switched_off_times_; /* how many times are switched off? */
+ unsigned long timefunc_fails_; /* how many time function fails? */
+ unsigned long total_wait_timeouts_; /* total number of wait timeouts */
+ unsigned long wait_sessions_; /* how many sessions wait for replies? */
+ unsigned long wait_backtraverse_; /* wait position back traverses */
+ unsigned long long total_trx_wait_num_; /* total trx waits: non-timeout ones */
+ unsigned long long total_trx_wait_time_; /* total trx wait time: in us */
+ unsigned long long total_net_wait_num_; /* total network waits */
+ unsigned long long total_net_wait_time_; /* total network wait time */
+
+ /* The number of maximum active transactions. This should be the same as
+ * maximum connections because MySQL does not do connection sharing now.
+ */
+ int max_transactions_;
+
+ void lock();
+ void unlock();
+ void cond_broadcast();
+ int cond_timewait(struct timespec *wait_time);
+
+ /* Is semi-sync replication on? */
+ bool is_on() {
+ return (state_);
+ }
+
+ void set_master_enabled(bool enabled) {
+ master_enabled_ = enabled;
+ }
+
+ /* Switch semi-sync off because of timeout in transaction waiting. */
+ int switch_off();
+
+ /* Switch semi-sync on when slaves catch up. */
+ int try_switch_on(int server_id,
+ const char *log_file_name, my_off_t log_file_pos);
+
+ public:
+ ReplSemiSyncMaster();
+ ~ReplSemiSyncMaster();
+
+ bool getMasterEnabled() {
+ return master_enabled_;
+ }
+ void setTraceLevel(unsigned long trace_level) {
+ trace_level_ = trace_level;
+ if (active_tranxs_)
+ active_tranxs_->trace_level_ = trace_level;
+ }
+
+ /* Set the transaction wait timeout period, in milliseconds. */
+ void setWaitTimeout(unsigned long wait_timeout) {
+ wait_timeout_ = wait_timeout;
+ }
+
+ /* Initialize this class after MySQL parameters are initialized. this
+ * function should be called once at bootstrap time.
+ */
+ int initObject();
+
+ /* Enable the object to enable semi-sync replication inside the master. */
+ int enableMaster();
+
+ /* Enable the object to enable semi-sync replication inside the master. */
+ int disableMaster();
+
+ /* Add a semi-sync replication slave */
+ void add_slave();
+
+ /* Remove a semi-sync replication slave */
+ void remove_slave();
+
+ /* Is the slave servered by the thread requested semi-sync */
+ bool is_semi_sync_slave();
+
+ int reportReplyBinlog(const char *log_file_pos);
+
+ /* In semi-sync replication, reports up to which binlog position we have
+ * received replies from the slave indicating that it already get the events.
+ *
+ * Input:
+ * server_id - (IN) master server id number
+ * log_file_name - (IN) binlog file name
+ * end_offset - (IN) the offset in the binlog file up to which we have
+ * the replies from the slave
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int reportReplyBinlog(uint32 server_id,
+ const char* log_file_name,
+ my_off_t end_offset);
+
+ /* Commit a transaction in the final step. This function is called from
+ * InnoDB before returning from the low commit. If semi-sync is switch on,
+ * the function will wait to see whether binlog-dump thread get the reply for
+ * the events of the transaction. Remember that this is not a direct wait,
+ * instead, it waits to see whether the binlog-dump thread has reached the
+ * point. If the wait times out, semi-sync status will be switched off and
+ * all other transaction would not wait either.
+ *
+ * Input: (the transaction events' ending binlog position)
+ * trx_wait_binlog_name - (IN) ending position's file name
+ * trx_wait_binlog_pos - (IN) ending position's file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int commitTrx(const char* trx_wait_binlog_name,
+ my_off_t trx_wait_binlog_pos);
+
+ /* Reserve space in the replication event packet header:
+ * . slave semi-sync off: 1 byte - (0)
+ * . slave semi-sync on: 3 byte - (0, 0xef, 0/1}
+ *
+ * Input:
+ * header - (IN) the header buffer
+ * size - (IN) size of the header buffer
+ *
+ * Return:
+ * size of the bytes reserved for header
+ */
+ int reserveSyncHeader(unsigned char *header, unsigned long size);
+
+ /* Update the sync bit in the packet header to indicate to the slave whether
+ * the master will wait for the reply of the event. If semi-sync is switched
+ * off and we detect that the slave is catching up, we switch semi-sync on.
+ *
+ * Input:
+ * packet - (IN) the packet containing the replication event
+ * log_file_name - (IN) the event ending position's file name
+ * log_file_pos - (IN) the event ending position's file offset
+ * server_id - (IN) master server id number
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int updateSyncHeader(unsigned char *packet,
+ const char *log_file_name,
+ my_off_t log_file_pos,
+ uint32 server_id);
+
+ /* Read the slave's reply so that we know how much progress the slave makes
+ * on receive replication events.
+ *
+ * Input:
+ * server_id - (IN) master server id number
+ * read_errmsg - (OUT) error message if an error occurs
+ * read_errno - (OUT) error number if an error occurs
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int readSlaveReply(uint32 server_id, const char **read_errmsg,
+ int *read_errno);
+
+ /* Called when a transaction finished writing binlog events.
+ * . update the 'largest' transactions' binlog event position
+ * . insert the ending position in the active transaction list if
+ * semi-sync is on
+ *
+ * Input: (the transaction events' ending binlog position)
+ * log_file_name - (IN) transaction ending position's file name
+ * log_file_pos - (IN) transaction ending position's file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int writeTranxInBinlog(const char* log_file_name, my_off_t log_file_pos);
+
+ /* Export internal statistics for semi-sync replication. */
+ void setExportStats();
+
+ /* 'reset master' command is issued from the user and semi-sync need to
+ * go off for that.
+ */
+ int resetMaster();
+};
+
+extern bool semi_sync_slave;
+extern bool semi_sync_need_sync;
+
+extern char rpl_semi_sync_enabled;
+extern unsigned long rpl_semi_sync_timeout;
+extern unsigned long rpl_semi_sync_trace_level;
+extern unsigned long rpl_semi_sync_status;
+extern unsigned long rpl_semi_sync_yes_transactions;
+extern unsigned long rpl_semi_sync_no_transactions;
+extern unsigned long rpl_semi_sync_off_times;
+extern unsigned long rpl_semi_sync_timefunc_fails;
+extern unsigned long rpl_semi_sync_num_timeouts;
+extern unsigned long rpl_semi_sync_wait_sessions;
+extern unsigned long rpl_semi_sync_back_wait_pos;
+extern unsigned long rpl_semi_sync_trx_wait_time;
+extern unsigned long rpl_semi_sync_net_wait_time;
+extern unsigned long long rpl_semi_sync_net_wait_num;
+extern unsigned long long rpl_semi_sync_trx_wait_num;
+extern unsigned long long rpl_semi_sync_net_wait_total_time;
+extern unsigned long long rpl_semi_sync_trx_wait_total_time;
+extern unsigned long rpl_semi_sync_clients;
+
+#endif /* SEMISYNC_MASTER_H */
=== added file 'plugin/semisync/semisync_master_plugin.cc'
--- a/plugin/semisync/semisync_master_plugin.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_master_plugin.cc 2008-07-14 13:03:54 +0000
@@ -0,0 +1,373 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_master.h"
+
+ReplSemiSyncMaster repl_semisync;
+
+int repl_semi_report_binlog_update(Binlog_storage_param *param,
+ const char *log_file,
+ my_off_t log_pos, uint32 flags)
+{
+ int error= 0;
+
+ if (repl_semisync.getMasterEnabled())
+ {
+ /*
+ Let us store the binlog file name and the position, so that
+ we know how long to wait for the binlog to the replicated to
+ the slave in synchronous replication.
+ */
+ error= repl_semisync.writeTranxInBinlog(log_file,
+ log_pos);
+ }
+
+ return error;
+}
+
+int repl_semi_request_commit(Trans_param *param)
+{
+ return 0;
+}
+
+int repl_semi_report_commit(Trans_param *param)
+{
+
+ bool is_real_trans= param->flags & TRANS_IS_REAL_TRANS;
+
+ if (is_real_trans && param->log_pos)
+ {
+ const char *binlog_name= param->log_file;
+ return repl_semisync.commitTrx(binlog_name, param->log_pos);
+ }
+ return 0;
+}
+
+int repl_semi_report_rollback(Trans_param *param)
+{
+ return repl_semi_report_commit(param);
+}
+
+int repl_semi_binlog_dump_start(Binlog_transmit_param *param,
+ const char *log_file,
+ my_off_t log_pos)
+{
+ bool semi_sync_slave= repl_semisync.is_semi_sync_slave();
+
+ if (semi_sync_slave)
+ /* One more semi-sync slave */
+ repl_semisync.add_slave();
+ sql_print_information("Start %s binlog_dump to slave_server(%d), pos(%s, %lu)",
+ semi_sync_slave ? "semi-sync" : "asynchronous",
+ param->server_id, log_file, (unsigned long)log_pos);
+
+ return 0;
+}
+
+int repl_semi_binlog_dump_end(Binlog_transmit_param *param)
+{
+ if (repl_semisync.is_semi_sync_slave())
+ {
+ /* One less semi-sync slave */
+ repl_semisync.remove_slave();
+ }
+ return 0;
+}
+
+int repl_semi_reserve_header(Binlog_transmit_param *param,
+ unsigned char *header,
+ unsigned long size, unsigned long *len)
+{
+ *len += repl_semisync.reserveSyncHeader(header, size);
+ return 0;
+}
+
+int repl_semi_before_send_event(Binlog_transmit_param *param,
+ unsigned char *packet, unsigned long len,
+ const char *log_file, my_off_t log_pos)
+{
+ return repl_semisync.updateSyncHeader(packet,
+ log_file,
+ log_pos,
+ param->server_id);
+}
+
+int repl_semi_after_send_event(Binlog_transmit_param *param,
+ const char *event_buf, unsigned long len)
+{
+ return 0;
+}
+
+int repl_semi_reset_master(Binlog_transmit_param *param)
+{
+ if (repl_semisync.resetMaster())
+ return 1;
+ return 0;
+}
+
+static char *rpl_semi_sync_reply_log_file_pos;
+
+/*
+ semisync system variables
+ */
+static void fix_rpl_semi_sync_timeout(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static void fix_rpl_semi_sync_reply_log_file_pos(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val);
+
+static MYSQL_SYSVAR_BOOL(enabled, rpl_semi_sync_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable semi-synchronous replication master (enabled by default). ",
+ NULL, // check
+ &fix_rpl_semi_sync_enabled, // update
+ 1);
+
+static MYSQL_SYSVAR_ULONG(timeout, rpl_semi_sync_timeout,
+ PLUGIN_VAR_OPCMDARG,
+ "The timeout value (in ms) for semi-synchronous replication in the master",
+ NULL, // check
+ fix_rpl_semi_sync_timeout, // update
+ 10, 0, ~0L, 1);
+
+static MYSQL_SYSVAR_ULONG(trace_level, rpl_semi_sync_trace_level,
+ PLUGIN_VAR_OPCMDARG,
+ "The tracing level for semi-sync replication.",
+ NULL, // check
+ &fix_rpl_semi_sync_trace_level, // update
+ 32, 0, ~0L, 1);
+
+static MYSQL_SYSVAR_STR(reply_log_file_pos, rpl_semi_sync_reply_log_file_pos,
+ PLUGIN_VAR_NOCMDOPT,
+ "The log filename and position slave has queued to relay log.",
+ NULL, // check
+ &fix_rpl_semi_sync_reply_log_file_pos,
+ "");
+
+static SYS_VAR* semi_sync_master_system_vars[]= {
+ MYSQL_SYSVAR(enabled),
+ MYSQL_SYSVAR(timeout),
+ MYSQL_SYSVAR(trace_level),
+ MYSQL_SYSVAR(reply_log_file_pos),
+ NULL,
+};
+
+
+static void fix_rpl_semi_sync_timeout(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setWaitTimeout(rpl_semi_sync_timeout);
+ return;
+}
+
+static void fix_rpl_semi_sync_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setTraceLevel(rpl_semi_sync_trace_level);
+ return;
+}
+
+static void fix_rpl_semi_sync_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(char *)ptr= *(char *)val;
+ if (rpl_semi_sync_enabled)
+ {
+ if (repl_semisync.enableMaster() != 0)
+ rpl_semi_sync_enabled = false;
+ }
+ else
+ {
+ if (repl_semisync.disableMaster() != 0)
+ rpl_semi_sync_enabled = true;
+ }
+
+ return;
+}
+
+static void fix_rpl_semi_sync_reply_log_file_pos(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ const char *log_file_pos= *(char **)val;
+
+ if (repl_semisync.reportReplyBinlog(log_file_pos))
+ sql_print_error("report slave binlog reply failed.");
+
+ return;
+}
+
+Trans_observer trans_observer = {
+ sizeof(Trans_observer), // len
+
+ repl_semi_report_commit, // after_commit
+ repl_semi_report_rollback, // after_rollback
+};
+
+Binlog_storage_observer storage_observer = {
+ sizeof(Binlog_storage_observer), // len
+
+ repl_semi_report_binlog_update, // report_update
+};
+
+Binlog_transmit_observer transmit_observer = {
+ sizeof(Binlog_transmit_observer), // len
+
+ repl_semi_binlog_dump_start, // start
+ repl_semi_binlog_dump_end, // stop
+ repl_semi_reserve_header, // reserve_header
+ repl_semi_before_send_event, // before_send_event
+ repl_semi_after_send_event, // after_send_event
+ repl_semi_reset_master, // reset
+};
+
+
+#define SHOW_FNAME(name) \
+ rpl_semi_sync_show_##name
+
+#define DEF_SHOW_FUNC(name, show_type) \
+ static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR *var, char *buff) \
+ { \
+ repl_semisync.setExportStats(); \
+ var->type= show_type; \
+ var->value= (char *)&rpl_semi_sync_##name; \
+ return 0; \
+ }
+
+DEF_SHOW_FUNC(clients, SHOW_LONG)
+DEF_SHOW_FUNC(net_wait_time, SHOW_LONG)
+DEF_SHOW_FUNC(net_wait_total_time, SHOW_LONGLONG)
+DEF_SHOW_FUNC(net_wait_num, SHOW_LONGLONG)
+DEF_SHOW_FUNC(off_times, SHOW_LONG)
+DEF_SHOW_FUNC(no_transactions, SHOW_LONG)
+DEF_SHOW_FUNC(status, SHOW_LONG)
+DEF_SHOW_FUNC(timefunc_fails, SHOW_LONG)
+DEF_SHOW_FUNC(trx_wait_time, SHOW_LONG)
+DEF_SHOW_FUNC(trx_wait_total_time, SHOW_LONGLONG)
+DEF_SHOW_FUNC(trx_wait_num, SHOW_LONGLONG)
+DEF_SHOW_FUNC(back_wait_pos, SHOW_LONG)
+DEF_SHOW_FUNC(wait_sessions, SHOW_LONG)
+DEF_SHOW_FUNC(yes_transactions, SHOW_LONG)
+
+
+/* plugin status variables */
+static SHOW_VAR semi_sync_master_status_vars[]= {
+ {"Rpl_semi_sync_clients", (char*) &SHOW_FNAME(clients), SHOW_FUNC},
+ {"Rpl_semi_sync_net_avg_wait_time(us)",
+ (char*) &SHOW_FNAME(net_wait_time), SHOW_FUNC},
+ {"Rpl_semi_sync_net_wait_time",
+ (char*) &SHOW_FNAME(net_wait_total_time), SHOW_FUNC},
+ {"Rpl_semi_sync_net_waits", (char*) &SHOW_FNAME(net_wait_num), SHOW_FUNC},
+ {"Rpl_semi_sync_no_times", (char*) &SHOW_FNAME(off_times), SHOW_FUNC},
+ {"Rpl_semi_sync_no_tx", (char*) &SHOW_FNAME(no_transactions), SHOW_FUNC},
+ {"Rpl_semi_sync_status", (char*) &SHOW_FNAME(status), SHOW_FUNC},
+ {"Rpl_semi_sync_timefunc_failures",
+ (char*) &SHOW_FNAME(timefunc_fails), SHOW_FUNC},
+ {"Rpl_semi_sync_tx_avg_wait_time(us)",
+ (char*) &SHOW_FNAME(trx_wait_time), SHOW_FUNC},
+ {"Rpl_semi_sync_tx_wait_time",
+ (char*) &SHOW_FNAME(trx_wait_total_time), SHOW_FUNC},
+ {"Rpl_semi_sync_tx_waits", (char*) &SHOW_FNAME(trx_wait_num), SHOW_FUNC},
+ {"Rpl_semi_sync_wait_pos_backtraverse",
+ (char*) &SHOW_FNAME(back_wait_pos), SHOW_FUNC},
+ {"Rpl_semi_sync_wait_sessions",
+ (char*) &SHOW_FNAME(wait_sessions), SHOW_FUNC},
+ {"Rpl_semi_sync_yes_tx", (char*) &SHOW_FNAME(yes_transactions), SHOW_FUNC},
+ {NULL, NULL, SHOW_LONG},
+};
+
+
+static int semi_sync_master_plugin_init(void *p)
+{
+ if (repl_semisync.initObject())
+ return 1;
+ if (register_trans_observer(&trans_observer, p))
+ return 1;
+ if (register_binlog_storage_observer(&storage_observer, p))
+ return 1;
+ if (register_binlog_transmit_observer(&transmit_observer, p))
+ return 1;
+ return 0;
+}
+
+static int semi_sync_master_plugin_deinit(void *p)
+{
+ if (unregister_trans_observer(&trans_observer, p))
+ {
+ sql_print_error("unregister_trans_observer failed");
+ return 1;
+ }
+ if (unregister_binlog_storage_observer(&storage_observer, p))
+ {
+ sql_print_error("unregister_binlog_storage_observer failed");
+ return 1;
+ }
+ if (unregister_binlog_transmit_observer(&transmit_observer, p))
+ {
+ sql_print_error("unregister_binlog_transmit_observer failed");
+ return 1;
+ }
+ sql_print_information("unregister_replicator OK");
+ return 0;
+}
+
+struct Mysql_replication semi_sync_master_plugin= {
+ MYSQL_REPLICATION_INTERFACE_VERSION
+};
+
+/*
+ Plugin library descriptor
+*/
+mysql_declare_plugin(semi_sync_master)
+{
+ MYSQL_REPLICATION_PLUGIN,
+ &semi_sync_master_plugin,
+ "rpl_semi_sync",
+ "He Zhenxing",
+ "Semi-synchronous replication master",
+ PLUGIN_LICENSE_GPL,
+ semi_sync_master_plugin_init, /* Plugin Init */
+ semi_sync_master_plugin_deinit, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ semi_sync_master_status_vars, /* status variables */
+ semi_sync_master_system_vars, /* system variables */
+ NULL /* config options */
+}
+mysql_declare_plugin_end;
=== added file 'plugin/semisync/semisync_slave.cc'
--- a/plugin/semisync/semisync_slave.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave.cc 2008-07-14 13:03:54 +0000
@@ -0,0 +1,116 @@
+/* Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_slave.h"
+
+bool semi_sync_need_reply= false;
+
+char rpl_semi_sync_slave_enabled;
+unsigned long rpl_semi_sync_slave_status= 0;
+unsigned long rpl_semi_sync_slave_trace_level;
+
+int ReplSemiSyncSlave::initObject()
+{
+ int result= 0;
+ const char *kWho = "ReplSemiSyncSlave::initObject";
+
+ if (init_done_)
+ {
+ fprintf(stderr, "%s called twice\n", kWho);
+ return 1;
+ }
+ init_done_ = true;
+
+ /* References to the parameter works after set_options(). */
+ setSlaveEnabled(rpl_semi_sync_slave_enabled);
+ setTraceLevel(rpl_semi_sync_slave_trace_level);
+
+ return result;
+}
+
+int ReplSemiSyncSlave::slaveReadSyncHeader(const char *header,
+ unsigned long total_len,
+ bool *need_reply,
+ const char **payload,
+ unsigned long *payload_len)
+{
+ const char *kWho = "ReplSemiSyncSlave::slaveReadSyncHeader";
+ int read_res = 0;
+ function_enter(kWho);
+
+ if ((unsigned char)(header[0]) == kPacketMagicNum)
+ {
+ *need_reply = (header[1] & kPacketFlagSync);
+ *payload_len = total_len - 2;
+ *payload = header + 2;
+
+ if (trace_level_ & kTraceDetail)
+ sql_print_information("%s: reply - %d", kWho, *need_reply);
+ }
+ else
+ {
+ sql_print_error("Missing magic number for semi-sync packet, packet "
+ "len: %lu", total_len);
+ read_res = -1;
+ }
+
+ return function_exit(kWho, read_res);
+}
+
+int ReplSemiSyncSlave::slaveStart(Binlog_relay_IO_param *param)
+{
+ bool semi_sync= getSlaveEnabled();
+
+ sql_print_information("Slave I/O thread: Start %s replication to\
+ master '%s@%s:%d' in log '%s' at position %lu",
+ semi_sync ? "semi-sync" : "asynchronous",
+ param->user, param->host, param->port,
+ param->master_log_name[0] ? param->master_log_name : "FIRST",
+ (unsigned long)param->master_log_pos);
+
+ if (semi_sync && !rpl_semi_sync_slave_status)
+ rpl_semi_sync_slave_status= 1;
+ if (!(mysql= rpl_connect_master(NULL)))
+ return 1;
+ return 0;
+}
+
+int ReplSemiSyncSlave::slaveStop(Binlog_relay_IO_param *param)
+{
+ if (rpl_semi_sync_slave_status)
+ rpl_semi_sync_slave_status= 0;
+ if (mysql)
+ mysql_close(mysql);
+ mysql= 0;
+ return 0;
+}
+
+int ReplSemiSyncSlave::slaveReply(const char *log_name, my_off_t log_pos)
+{
+ char query[FN_REFLEN + 100];
+ sprintf(query, "SET GLOBAL rpl_semi_sync_reply_log_file_pos='%llu:%s'",
+ (unsigned long long)log_pos, log_name);
+ if (mysql_real_query(mysql, query, strlen(query)))
+ {
+ sql_print_error("Set 'rpl_semi_sync_reply_log_file' on master failed");
+ mysql_free_result(mysql_store_result(mysql));
+ mysql_close(mysql);
+ mysql= 0;
+ return 1;
+ }
+ mysql_free_result(mysql_store_result(mysql));
+ return 0;
+}
=== added file 'plugin/semisync/semisync_slave.h'
--- a/plugin/semisync/semisync_slave.h 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave.h 2008-07-14 13:03:54 +0000
@@ -0,0 +1,99 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#ifndef SEMISYNC_SLAVE_H
+#define SEMISYNC_SLAVE_H
+
+#include "semisync.h"
+
+/**
+ The extension class for the slave of semi-synchronous replication
+*/
+class ReplSemiSyncSlave
+ :public ReplSemiSyncBase {
+public:
+ ReplSemiSyncSlave()
+ :slave_enabled_(false)
+ {}
+ ~ReplSemiSyncSlave() {}
+
+ void setTraceLevel(unsigned long trace_level) {
+ trace_level_ = trace_level;
+ }
+
+ /* Initialize this class after MySQL parameters are initialized. this
+ * function should be called once at bootstrap time.
+ */
+ int initObject();
+
+ bool getSlaveEnabled() {
+ return slave_enabled_;
+ }
+ void setSlaveEnabled(bool enabled) {
+ slave_enabled_ = enabled;
+ }
+
+ /* A slave reads the semi-sync packet header and separate the metadata
+ * from the payload data.
+ *
+ * Input:
+ * header - (IN) packet header pointer
+ * total_len - (IN) total packet length: metadata + payload
+ * need_reply - (IN) whether the master is waiting for the reply
+ * payload - (IN) payload: the replication event
+ * payload_len - (IN) payload length
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+ int slaveReadSyncHeader(const char *header, unsigned long total_len, bool *need_reply,
+ const char **payload, unsigned long *payload_len);
+
+ /* A slave replies to the master indicating its replication process. It
+ * indicates that the slave has received all events before the specified
+ * binlog position.
+ *
+ * Input:
+ * mysql - (IN) the mysql network connection
+ * binlog_filename - (IN) the reply point's binlog file name
+ * binlog_filepos - (IN) the reply point's binlog file offset
+ *
+ * Return:
+ * 0: success; -1 or otherwise: error
+ */
+/* int slaveReply(MYSQL *mysql, const char *binlog_filename, */
+/* my_off_t binlog_filepos); */
+
+ int slaveReply(const char *log_name, my_off_t log_pos);
+
+ int slaveStart(Binlog_relay_IO_param *param);
+ int slaveStop(Binlog_relay_IO_param *param);
+
+private:
+ /* True when initObject has been called */
+ bool init_done_;
+ bool slave_enabled_; /* semi-sycn is enabled on the slave */
+ MYSQL *mysql; /* connection to send reply */
+};
+
+
+extern bool semi_sync_need_reply;
+
+extern char rpl_semi_sync_slave_enabled;
+extern unsigned long rpl_semi_sync_slave_trace_level;
+extern unsigned long rpl_semi_sync_slave_status;
+
+#endif /* SEMISYNC_SLAVE_H */
=== added file 'plugin/semisync/semisync_slave_plugin.cc'
--- a/plugin/semisync/semisync_slave_plugin.cc 1970-01-01 00:00:00 +0000
+++ b/plugin/semisync/semisync_slave_plugin.cc 2008-07-14 13:03:54 +0000
@@ -0,0 +1,180 @@
+/* Copyright (C) 2007 Google Inc.
+ Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include "semisync_slave.h"
+
+ReplSemiSyncSlave repl_semisync;
+
+int repl_semi_reset_slave(Binlog_relay_IO_param *param)
+{
+ // TODO: reset semi-sync slave status here
+ return 0;
+}
+
+int repl_semi_slave_request_dump(Binlog_relay_IO_param *param,
+ uint32 flags)
+{
+ MYSQL *mysql= param->mysql;
+ if (!repl_semisync.getSlaveEnabled())
+ return 0;
+
+ const char query[]= "SET @rpl_semi_sync_slave= 1";
+ if (mysql_real_query(mysql, query, strlen(query)))
+ {
+ sql_print_error("Set 'rpl_semi_sync_slave=1' on master failed");
+ mysql_free_result(mysql_store_result(mysql));
+ return 1;
+ }
+ mysql_free_result(mysql_store_result(mysql));
+ return 0;
+}
+
+int repl_semi_slave_read_event(Binlog_relay_IO_param *param,
+ const char *packet, unsigned long len,
+ const char **event_buf, unsigned long *event_len)
+{
+ if (repl_semisync.getSlaveEnabled())
+ return repl_semisync.slaveReadSyncHeader(packet, len,
+ &semi_sync_need_reply,
+ event_buf, event_len);
+ *event_buf= packet;
+ *event_len= len;
+ return 0;
+}
+
+int repl_semi_slave_queue_event(Binlog_relay_IO_param *param,
+ const char *event_buf,
+ unsigned long event_len,
+ uint32 flags)
+{
+ if (semi_sync_need_reply)
+ return repl_semisync.slaveReply(param->master_log_name,
+ param->master_log_pos);
+ return 0;
+}
+
+int repl_semi_slave_io_start(Binlog_relay_IO_param *param)
+{
+ return repl_semisync.slaveStart(param);
+}
+
+int repl_semi_slave_io_end(Binlog_relay_IO_param *param)
+{
+ return repl_semisync.slaveStop(param);
+}
+
+
+static void fix_rpl_semi_sync_slave_enabled(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(char *)ptr= *(char *)val;
+ repl_semisync.setSlaveEnabled(rpl_semi_sync_slave_enabled != 0);
+ return;
+}
+
+static void fix_rpl_semi_sync_trace_level(MYSQL_THD thd,
+ SYS_VAR *var,
+ void *ptr,
+ const void *val)
+{
+ *(unsigned long *)ptr= *(unsigned long *)val;
+ repl_semisync.setTraceLevel(rpl_semi_sync_slave_trace_level);
+ return;
+}
+
+/* plugin system variables */
+static MYSQL_SYSVAR_BOOL(enabled, rpl_semi_sync_slave_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable semi-synchronous replication slave (enabled by default). ",
+ NULL, // check
+ &fix_rpl_semi_sync_slave_enabled, // update
+ 1);
+
+static MYSQL_SYSVAR_ULONG(trace_level, rpl_semi_sync_slave_trace_level,
+ PLUGIN_VAR_OPCMDARG,
+ "The tracing level for semi-sync replication.",
+ NULL, // check
+ &fix_rpl_semi_sync_trace_level, // update
+ 32, 0, ~0L, 1);
+
+static SYS_VAR* semi_sync_slave_system_vars[]= {
+ MYSQL_SYSVAR(enabled),
+ MYSQL_SYSVAR(trace_level),
+ NULL,
+};
+
+
+/* plugin status variables */
+static SHOW_VAR semi_sync_slave_status_vars[]= {
+ {"Rpl_semi_sync_slave_status",
+ (char*) &rpl_semi_sync_slave_status, SHOW_LONG},
+ {NULL, NULL, SHOW_LONG},
+};
+
+Binlog_relay_IO_observer relay_io_observer = {
+ sizeof(Binlog_relay_IO_observer), // len
+
+ repl_semi_slave_io_start, // start
+ repl_semi_slave_io_end, // stop
+ repl_semi_slave_request_dump, // request_transmit
+ repl_semi_slave_read_event, // after_read_event
+ repl_semi_slave_queue_event, // after_queue_event
+ repl_semi_reset_slave, // reset
+};
+
+static int semi_sync_slave_plugin_init(void *p)
+{
+ if (repl_semisync.initObject())
+ return 1;
+ if (register_binlog_relay_io_observer(&relay_io_observer, p))
+ return 1;
+ return 0;
+}
+
+static int semi_sync_slave_plugin_deinit(void *p)
+{
+ if (unregister_binlog_relay_io_observer(&relay_io_observer, p))
+ return 1;
+ return 0;
+}
+
+
+struct Mysql_replication semi_sync_slave_plugin= {
+ MYSQL_REPLICATION_INTERFACE_VERSION
+};
+
+/*
+ Plugin library descriptor
+*/
+mysql_declare_plugin(semi_sync_slave)
+{
+ MYSQL_REPLICATION_PLUGIN,
+ &semi_sync_slave_plugin,
+ "rpl_semi_sync_slave",
+ "He Zhenxing",
+ "Semi-synchronous replication slave",
+ PLUGIN_LICENSE_GPL,
+ semi_sync_slave_plugin_init, /* Plugin Init */
+ semi_sync_slave_plugin_deinit, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ semi_sync_slave_status_vars, /* status variables */
+ semi_sync_slave_system_vars, /* system variables */
+ NULL /* config options */
+}
+mysql_declare_plugin_end;