From: Andrei Elkin Date: June 6 2011 10:51am Subject: bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3282) WL#5569 List-Archive: http://lists.mysql.com/commits/138688 Message-Id: <201106061051.p56ApZYc002292@mysql1000.dsl.inet.fi> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============0000510007==" --===============0000510007== MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline #At file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-mr-wl5569/ based on revid:andrei.elkin@stripped 3282 Andrei Elkin 2011-06-06 wl#5569 MTS STOP SLAVE now stops consistently w/o gaps, KILL shall be used for an urgent stop, an error case behaves like the killed. For instance, a Worker errors out, it sends KILL to Coordinator through THD::awake(), and Coordinator kill the rest through setting a special Worker-running status to killed (which breaks the read-exec loop of a Worker). @ sql/log_event.cc Changing style of computing mts-in-group bool arg into mts_async_exec_by_coordinator(). @ sql/rpl_rli.cc Changing style of computing mts-in-group arg of an if in stmt_done(). @ sql/rpl_rli.h Adding more states to Coordinator's MTS-group view. @ sql/rpl_rli_pdb.cc Relocating notification of a Worker's failure by the Worker into the error-branch of a functioning releasing common resources (entries of APH hash). The failed Worker trying awakening possibly waiting for the signal Coordinator. The latter's behaviour in it's turn is refined to not enter the waiting phase when it has been already killed. @ sql/rpl_slave.cc sql_slave_killed() is made of two flavors of the error branches. STOPped MTS coordinator does not give out too early and wait till its MTS-group state allows that. Notification with kill to Coordinator from the errored-out or killed worker is moved into a functioning releasing common resources (entries of APH hash). This case designates a hard stop. In case of the soft (SLAVE-STOPped) MTS, Coordinator is made to wait for Workers' assignements full completion before to mark their running status for stopping. modified: sql/log_event.cc sql/rpl_rli.cc sql/rpl_rli.h sql/rpl_rli_pdb.cc sql/rpl_slave.cc === modified file 'sql/log_event.cc' --- a/sql/log_event.cc 2011-06-05 17:01:51 +0000 +++ b/sql/log_event.cc 2011-06-06 10:51:19 +0000 @@ -2581,6 +2581,7 @@ Slave_worker *Log_event::get_slave_worke // special marking for T event of {p,g} B-less group if (num_dbs == OVER_MAX_DBS_IN_EVENT_MTS) mts_do_isolate_event(); + rli->mts_group_status= Relay_log_info::MTS_END_GROUP; ptr_g= (Slave_job_group *) dynamic_array_ptr(&rli->gaq->Q, rli->gaq->assigned_group_index); @@ -2850,7 +2851,7 @@ int Log_event::apply_event(Relay_log_inf if (!(parallel= rli->is_parallel_exec()) || (async_event= mts_async_exec_by_coordinator(::server_id, - rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)) || + rli->mts_group_status != Relay_log_info::MTS_NOT_IN_GROUP)) || (seq_event= mts_sequential_exec())) { if (parallel) === modified file 'sql/rpl_rli.cc' --- a/sql/rpl_rli.cc 2011-06-05 17:01:51 +0000 +++ b/sql/rpl_rli.cc 2011-06-06 10:51:19 +0000 @@ -1020,7 +1020,7 @@ void Relay_log_info::stmt_done(my_off_t while the MyISAM table has already been updated. */ if ((!is_parallel_exec() && is_in_group()) || - mts_group_status == MTS_IN_GROUP) + mts_group_status != MTS_NOT_IN_GROUP) { inc_event_relay_log_pos(); } === modified file 'sql/rpl_rli.h' --- a/sql/rpl_rli.h 2011-06-05 17:01:51 +0000 +++ b/sql/rpl_rli.h 2011-06-06 10:51:19 +0000 @@ -488,8 +488,14 @@ public: */ enum { - MTS_NOT_IN_GROUP, /* not in group includes Single-Threaded-Slave */ - MTS_IN_GROUP /* an event was scheduled to a Worker */ + /* + no new events were scheduled after last synchronization, + includes Single-Threaded-Slave case. + */ + MTS_NOT_IN_GROUP, + MTS_IN_GROUP, /* at least one event was scheduled to a Worker */ + MTS_END_GROUP, /* the last scheduled event is a terminal event */ + MTS_KILLED_GROUP /* Coordinator gave out to reach MTS_END_GROUP */ } mts_group_status; /* most of allocation in the coordinator rli is there */ === modified file 'sql/rpl_rli_pdb.cc' --- a/sql/rpl_rli_pdb.cc 2011-06-05 17:01:51 +0000 +++ b/sql/rpl_rli_pdb.cc 2011-06-06 10:51:19 +0000 @@ -816,6 +816,17 @@ void Slave_worker::slave_worker_ends_gro ep->elements= 0; curr_group_seen_begin= FALSE; + + if (error) + { + mysql_mutex_lock(&slave_worker_hash_lock); + mysql_mutex_lock(&c_rli->info_thd->LOCK_thd_data); + + c_rli->info_thd->awake(THD::KILL_QUERY); // notify Crdn + + mysql_mutex_unlock(&c_rli->info_thd->LOCK_thd_data); + mysql_mutex_unlock(&slave_worker_hash_lock); + } } @@ -1114,7 +1125,7 @@ int wait_for_workers_to_finish(Relay_log continue; } - if (entry->usage > 0) + if (entry->usage > 0 && !thd->killed) { sprintf(wait_info, info_format, entry->worker->id, entry->db); entry->worker= NULL; // mark Worker to signal when usage drops to 0 @@ -1125,7 +1136,7 @@ int wait_for_workers_to_finish(Relay_log thd->exit_cond(proc_info); ret++; - DBUG_ASSERT(entry->usage == 0 || thd->killed || rli->abort_slave); + DBUG_ASSERT(entry->usage == 0 || thd->killed); } else { === modified file 'sql/rpl_slave.cc' --- a/sql/rpl_slave.cc 2011-06-05 17:01:51 +0000 +++ b/sql/rpl_slave.cc 2011-06-06 10:51:19 +0000 @@ -1090,9 +1090,10 @@ static bool sql_slave_killed(THD* thd, R as well. Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped. */ - if ((thd->transaction.all.modified_non_trans_table || - (thd->variables.option_bits & OPTION_KEEP_LOG)) - && rli->is_in_group()) + if ((!rli->is_parallel_exec() && + (thd->transaction.all.modified_non_trans_table || + (thd->variables.option_bits & OPTION_KEEP_LOG)) && rli->is_in_group()) + || (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)) { char msg_stopped[]= "... The slave SQL is stopped, leaving the current group " @@ -1101,6 +1102,14 @@ static bool sql_slave_killed(THD* thd, R "restarting the slave with --slave-exec-mode=IDEMPOTENT, which " "ignores duplicate key, key not found, and similar errors (see " "documentation for details)."; + char msg_stopped_mts[]= + "... The slave Coordinator and Worker threads are stopped, possibly " + "leaving data in inconsistent state. The following restart shall " + "restore consistency automatically. There might be exceptional situations " + "in the recovery caused by combination of non-transactional storage for " + "either of Coordinator or Workers info tables and updating non-transactional " + "data tables or DDL queries. In such cases you have to examine your data " + "(see documentation for details)."; if (rli->abort_slave) { @@ -1137,7 +1146,9 @@ static bool sql_slave_killed(THD* thd, R else { rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, - ER(ER_SLAVE_FATAL_ERROR), msg_stopped); + ER(ER_SLAVE_FATAL_ERROR), + rli->mts_group_status == Relay_log_info::MTS_NOT_IN_GROUP ? + msg_stopped : msg_stopped_mts); } } else @@ -1153,7 +1164,13 @@ static bool sql_slave_killed(THD* thd, R } } if (ret) + { rli->last_event_start_time= 0; + if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) + { + rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP; + } + } DBUG_RETURN(ret); } @@ -3798,14 +3815,7 @@ pthread_handler_t handle_slave_worker(vo { error= slave_worker_exec_job(w, rli); } - w->cleanup_context(thd, error); - if (error) - { - mysql_mutex_lock(&rli->info_thd->LOCK_thd_data); - rli->info_thd->awake(THD::KILL_QUERY); // notify Crdn - mysql_mutex_unlock(&rli->info_thd->LOCK_thd_data); - } mysql_mutex_lock(&w->jobs_lock); @@ -4293,9 +4303,12 @@ err: /* Ending Worker threads. + Not in case Coordinator is killed itself, it first waits for + Workers have finished their assignements, and then updates checkpoint. Workers are notified with setting KILLED status and waited for their acknowledgment as specified by worker's running_status. + Coordinator finalizes with its MTS running status to reset few objects. */ void slave_stop_workers(Relay_log_info *rli) { @@ -4306,11 +4319,21 @@ void slave_stop_workers(Relay_log_info * return; /* - this is the soft stop. In order for waiting be successful Coordinator - needs (*TODO*) to guarantee Workers were assigned with full groups. + In case of the "soft" graceful stop Coordinator + guaranteed Workers were assigned with full groups so waiting + will be resultful. + "Hard" stop with KILLing Coordinator or erroring out by a Worker + can't wait for Workers' completion because those may not receive + commit-events of last assigned groups. */ - // (void) wait_for_workers_to_finish(rli); + if (rli->mts_group_status != Relay_log_info::MTS_KILLED_GROUP && + thd->killed == THD::NOT_KILLED) + { + DBUG_ASSERT(rli->mts_group_status != Relay_log_info::MTS_IN_GROUP); + (void) wait_for_workers_to_finish(rli); + (void) mts_checkpoint_routine(rli, 0, FALSE, FALSE); // todo: error branch + } for (i= rli->workers.elements - 1; i >= 0; i--) { Slave_worker *w; --===============0000510007== MIME-Version: 1.0 Content-Type: text/bzr-bundle; charset="us-ascii"; name="bzr/andrei.elkin@stripped" Content-Transfer-Encoding: 7bit Content-Disposition: inline # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: andrei.elkin@stripped # target_branch: file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-\ # mr-wl5569/ # testament_sha1: 573e01c3d85fe5a8059b918a62bf160a98b74de4 # timestamp: 2011-06-06 13:51:35 +0300 # source_branch: file:///home/andrei/MySQL/BZR/2a-23May/mysql-trunk/ # base_revision_id: andrei.elkin@stripped\ # eo9khhhrzf9op05z # # Begin bundle IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWWmH+kQABtZ/gF8QEAB59/// /+//qr////5gDyvs97eGXe82aNCgB093tblVRmW66CkjpqkqhSXZm2hwJJNQnojVR/iMptJsRKP1 TTzSjTagND1AaAPU9GoeUNqDVPIZDU1T8kap+qe1TRpoYjRkDIAAAAAGgDQBEJPU8qHqeo/VA2po AAyGIxBoAAGjQCQogQ0CZMSPUNNqjyaZSabSM9Rk0jEPUGQGJgc0xGRk0yaAZDRkMmQAAAyNMjQM IZAkkAmgTJoahpomAgIMpp6agGTJ6nqAADRFIPQwhb+y/z7N5hj7l5OzH/eMPGncPJvioeqg9nu/ Q7OcZrHhbdruENj75la3RpNTf8dTnUJNymrb6hXCUNfHqxmN/RbRCpx5s9V0yvc40bfDlWPKhewY XPAMLXCCqKNXH2PTLmzQwfB4uTjyR7sJ5Ouvvl7cQBavNBIBTo2MJBewO8FAdlyOB3g+PbVNL63p f8cLm7rJSBrY5tUdF00HUjp7EYobbaTaBsG/e5/T/wR06emGOFelwaxjTRFOTgwH09GlKQDk9eWW YKtritXCiw1Mno7jykjzMM0GcngTUW07L08Lrzj7cl/tQujm6M2W7HTE4Aq8/XvdzSD9jDaMAYwO E7tq7TMGLTq8rYW03HTSStkqyMiSRNlsy7MdUaJEXzKRzkEpBHoYdom8pi9tYKm/NXmRWp3H0IQE 9Z38iYaR3BSN5vcZJF8/zXF+HGDXt0K0R/oEv2wqqLPGEXBLK7lEEzIisrzK0sG0wJobpBWGZc8q H1OOQRbEm1u/waHFmGWFLxDohjBJCOwZtQg3GPu3zQ6OGqGU6AtazmNWp3GuRsei91tK6bn0RdhE RFasJi4X9Nssi4tlGpVCtyym4VyWSIZHBzs3K+U1d2zPrwt6KVtsMC66XuzisJ2US+hgT09/eZvP i6cYm+1FMfSj2Yf6g/4HqQeqfveBAGZkZsg4+KezBuDgbVbfJkE2qrk0k/bM7HpZa3d1w6/qtJOr dTCH50/HZJNBiEz/rCp1UXT9M75t90rf11dAVqyOyuBR5o8KdnLF+ONnishXJi/YJHWupFq0oaYD 7h6Xo+VeLyrwXcBpu3m4b9CtRLRQcsdtkRX4uT0VIVJ13Ht4XyhNn2Qn6rpO8ef3Uyx9mt9wMnJ4 M54CteCWWHtDzHX5mbzsK7baiepjysMGHwYGmgESsCdi0YWLwIHzizCkUe0Z6mKAnCQ0QNR/ZBC8 vpThKaqBJdQEBIgoCWRItjFNxGgJsQvZVejAm6jh337BE2FRoYyfCZOIECvKaD3wVjxYoBwkdMFu NIEK4O4SIRp1Bmll7CjyiV+FbqqIvutrr1yXTugYkyHc25KhJbT5Cqb8bJWTNg7YBwTj6rXUImL7 AGz36cJLts7qCUxUMSsB+iUDaX7UXGrgW5aZaUuHWXYJJeE166bpwVifKA9aw2HRP0J6wDjenK0u o/dcYvLypoz23xkBIgWHwWnIxIGV7yi6F4Jo8y/+s64alNCNdJm34WPHXZSRWy0UIRQhoWyjyRQe Z2JoVpW1ExpVkTYlpeXa2xKw33o3Wjw3DIzXBHOtVVc6pMyNN18EjACUDmHnEeORQ0HCcBAt2qeQ laq7dHVNYYq6xk6Jy/Qm5xPJDkOzxweZ/9RxV6viaZta7YrYoHICEMiCfABu3dfBcEOuU8T9vcg2 6x6i7EhuCrAybMWUUGbVHIlI7l4L1qC6BLxW4DXCuF73TDAmkg3OzobK1d+Cv1WZSLlDtvh5LA67 SmqriGeN8xiVqUyhNUXOZSIHNzOKEkpJ+hC2DsdwnAcYEqLnQ+LRJ0maOwyWavYIbh8D2JqFGMDN GBpvr2tFKwsLDaYkd5HbyUViqqOGY0ajqQzIbWiprWGhFpbh1GbIZjcCsMAahyMrxoFRIZhowaVR lAXyDBc89sXXYxQ8FaJMtl+0Gm8epsLRpizIUo7srXLc1hA7hKJws3m42FDccQIANrjTMPxxAYpz QbgIz0QOjEYzzEMZOxSJAThCJSCQ6KCphdFHaVVwAtTnucGepg2tPWL8vX/bvhCSPp4I1rsR54UC NoMiKepEPeXlYEQikj0nW56x/tWJpVLDCZmyPP3kpOCDmTekvhuNR+EzB8W8DuzAcOqUfVJHFwVu JzxQETjSWrZoAlAevVPIwwWw7yvU1SYPADtI+CkIpZ6AbbI5tVoUvA1243querPdvQxLYF7EYkzM XOdH5CT0WAX1FcbCHCtpicqCUnXEZwOFi/2CMUVizDWf8/KREqqQKTW+f8NpcstvMt4DLaRGbdJF rYW5MnV4T6uw7frR7Lfmrfeg/odiKOqGFbwHIP/H3FwFxcDT+SGvUElKqQ5fegtUsF5x2GJnNGTo AbbZ9eIL5w9h9HJfvD+P+PaZFuxz7YkjWqe8YOY+3bEkwmAfY3yAyTKm8uZM2h99lvFoo1uoqywK I+ED1nyCaJ+WUtR5Zx5/dT2ZaoTP68Nmi7a8kmCBMO1zH1UhF3Qiw3udciA+sGi3Ti9JWxBRKvK5 M4Br2dp8Jc6wiB7vu2I6x3LU+JEiJR+J8SZ4oconxNDwJElGpYRi42Aec5LLp9OQaHSqHo1h8rrO nU922YEtrvaDsCsOc0lEOJmjrXsUbvUfZpatK1yZ9p0gimfjnrOGoL8/T0FhyG7RXGhx/RZy++pS mO0V9CiSK7EkX8/NEqsyXXKpMJAbbEaXGMc88WUZ2AO57kM1Hk038TxwsMihtmA/IMM6UDOHk0JY bSbRFIqIrC64hcdCXkJ1lZBdigfTepBuq066i6elJTZ0S/agVm2Oae3jpybuHvf6rntBnSiVmKZE JSpiaOEF8BymEIx74dYiK0xqpnnYWZrSHgl1lPKZ5Ls2HoiKqzJkV6QbtJI9KJeCoydgI0XgM51s a8Vgdq3V42k/vaVbeR7m9/Nmrg6jLm3PV5GaidL1WtDy9G2NndvbLkjF0WHALZed8fKtjc/sdp+I C025JIfeSvMpbN4/ndVkMjj0RgvsCDhopeYDSxE7WcMy5tXvPoi2HlHBYMExdONxdPc0PHkQhjfj GLZWMZiZmUnxHYLyRtdIDf2VvCJKZKCRbJGCBmFb1guVvaO7zQ6EbhLrJnkdo0ihVNE7grkhwAm0 9BwuApyqdhDYLihr1I7/gQ0Xi3cXLVZoxXESggjJxPVO5zhHjAkxhYByZDMCZUQupRO5VQEe4pQk zhcEP8wYiHHpXzdiKLl5jW+txOU9GtNVsR3qywqxL7CZjyMzl8N5AD5xtg2cLjDNFU3stp0LtAzg W2W9KkuZYBq1XWhGEUdYIckhkM4cjjRvJbCPEtjr1Gxb+lkk05KmnE1GbvFAZvRgLVZ/MrUI7J4y SHjyr2D5sR6SNl1mmCS/7ZMCVZ6xrUD51L4N8Npufgwb21rUywOb6KyrlGXV08LdCVTMJmYVW8YS on3Mi0qIgUUOkYZuZ20KuXGN73SnLSr0dRqqLoEre5cy86uXku6DJb1US77SaGkIPYBBZhvbzONz 8gFADMYYqsbOMUb9Lhv/cKtIjgFdwGpWVXY7Q7EA2AT7GA6pAdeiOB0cn6JIZJSK+OKhmOZhXrQX aF0CSw2HtsSRko3kIThEE3Ors4eZtmjrX5HgXZFLd6fbthty6nS61kkj0JrlNOc1yLczmFcSnLpP YtiIKoPR64kydgpE0MdK3fGY5bdzxmPDkBQPfWXqD08xszk/maqOmqyVB2OllW6DR0JoiSG2uqQe hFIAcrUOHAvqigHzRJFL6s02ZzFNIJIJDb0+czzKI6qPnsNGFaj6lkid4NYZRNiG6MiaG0JriHAV j9HVFoaseqda5IqBrACTLZ2pQlHF8UlYSQvUecxrTwSRVkGpGYa0DQNrNjzUL3cr/Fn9Sq0DdtmL XeBSC956BvEZKqSjPc1GUGqjn5pIiC9JPVWw/QODQxodFtrtuX7Ydy3H1PM4+V4ge7oNXU1Zq3X7 zwzchsckEW5EB4KyxBk5eiIYKhRFwp5/aMq9qLH4oCZ9Y+BJFYote/wZ3fExWqzR0LYAWLWsUY6B LJlQ2jnaxaoyOMbHncJWFBtsY28JEPrYAWcg8YS8ZVJcrq0g6u0yNbHkG8AExJCZH0tv9rhQN6ud d0YjrfaouEHvJKz+DxmPd9HukkqUVeSwRh7UY9XCvT0gPOgAhw5L4ZFI3wkflEtIjUMJrlN97pTQ MeYDUGujhSyUfthabWszYrEniIgyMlVDWZ7GvFBWlWCDoPowNBehTIArRr6MoHeLJUwDJiLMcrJb 4zf3K6ARDcxqiXZK8fY4KEAMFGaLUt732pj7C9aKw6MRo6HpkkkklcEdtEMHaLYaQakDbNLvWdJp mpREbGrwKxOa1fWcb608e7NhlUbuumGR7oCq45atY7mvKqgLItwlzw4KAQYhhqi+4FDAongpwgr3 c5SawBwjUuCCH3Ls738iA4OTJFzU9qL9z3RWcDG/O4XE2ER3dLbU8S5tuWNNpmDY1WgIkkmERUff yHolcDlzsxawLKEEquwabwXanXGf1roWxHeGnfPj67DkRTKO1xvNqkJYEnCsfVC3mQ+KNwNqQxR4 rzCWi2qsFvmDMgs4Aaphz94lVeZYwLRKYHUPcUaOxSTgIplWso6lz0TBljGN6moVKDita+RXq9Sg uZcuF0McXbKl5SRMzKDgPSvrErDkLjYjJRyrWCUEMxnAyWm5ULb4BA8GB//F3JFOFCQaYf6RAA== --===============0000510007==--