Below is the list of changes that have just been committed into a local
5.0 repository of msvensson. When msvensson does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-02-07 08:08:43+01:00, msvensson@stripped +1 -0
Bug#32025 ndb_waiter does too many roundtrips to ndb_mgmd
ndb/tools/waiter.cpp@stripped, 2008-02-07 08:08:42+01:00, msvensson@stripped +43 -86
- Only contact ndb_mgmd once per loop
- Program only cares about ndbd nodes -> remove the api and mgm vectors
- Program can not wait for "starting" -> remove that code
- Remove unused includes
- Protect against SIGPIPE(writing to a socket where the mgmsrv was
gone, silently killed the program)
- Don't sleep one second if if all nodes are in the wanted state
- Use 100 milliseconds sleep between each poll
diff -Nrup a/ndb/tools/waiter.cpp b/ndb/tools/waiter.cpp
--- a/ndb/tools/waiter.cpp 2007-03-09 09:37:03 +01:00
+++ b/ndb/tools/waiter.cpp 2008-02-07 08:08:42 +01:00
@@ -21,13 +21,11 @@
#include <NdbMain.h>
#include <NdbOut.hpp>
#include <NdbSleep.h>
-#include <kernel/ndb_limits.h>
#include <NDBT.hpp>
-int
-waitClusterStatus(const char* _addr, ndb_mgm_node_status _status,
- unsigned int _timeout);
+static int
+waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
enum ndb_waiter_options {
OPT_WAIT_STATUS_NOT_STARTED = NDB_STD_OPTIONS_LAST,
@@ -55,12 +53,13 @@ static struct my_option my_long_options[
"Wait for cluster to enter single user mode",
(gptr*) &_single_user, (gptr*) &_single_user, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
- { "timeout", 't', "Timeout to wait",
+ { "timeout", 't', "Timeout to wait in seconds",
(gptr*) &_timeout, (gptr*) &_timeout, 0,
GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
+
static void usage()
{
ndb_std_print_version();
@@ -70,16 +69,18 @@ static void usage()
my_print_variables(my_long_options);
}
+
int main(int argc, char** argv){
NDB_INIT(argv[0]);
load_defaults("my",load_default_groups,&argc,&argv);
const char* _hostName = NULL;
- int ho_error;
+
#ifndef DBUG_OFF
opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
#endif
- if ((ho_error=handle_options(&argc, &argv, my_long_options,
- ndb_std_get_one_option)))
+
+ if (handle_options(&argc, &argv, my_long_options,
+ ndb_std_get_one_option))
return NDBT_ProgramExit(NDBT_WRONGARGS);
_hostName = argv[0];
@@ -105,7 +106,7 @@ int main(int argc, char** argv){
wait_status= NDB_MGM_NODE_STATUS_STARTED;
}
- if (waitClusterStatus(_hostName, wait_status, _timeout) != 0)
+ if (waitClusterStatus(_hostName, wait_status) != 0)
return NDBT_ProgramExit(NDBT_FAILED);
return NDBT_ProgramExit(NDBT_OK);
}
@@ -118,8 +119,6 @@ int main(int argc, char** argv){
NdbMgmHandle handle= NULL;
Vector<ndb_mgm_node_state> ndbNodes;
-Vector<ndb_mgm_node_state> mgmNodes;
-Vector<ndb_mgm_node_state> apiNodes;
int
getStatus(){
@@ -128,8 +127,6 @@ getStatus(){
struct ndb_mgm_node_state * node;
ndbNodes.clear();
- mgmNodes.clear();
- apiNodes.clear();
while(retries < 10){
status = ndb_mgm_get_status(handle);
@@ -153,18 +150,16 @@ getStatus(){
ndbNodes.push_back(*node);
break;
case NDB_MGM_NODE_TYPE_MGM:
- mgmNodes.push_back(*node);
+ /* Don't care about MGM nodes */
break;
case NDB_MGM_NODE_TYPE_API:
- apiNodes.push_back(*node);
+ /* Don't care about API nodes */
break;
default:
if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
retries++;
ndbNodes.clear();
- mgmNodes.clear();
- apiNodes.clear();
free(status);
status = NULL;
count = 0;
@@ -183,24 +178,22 @@ getStatus(){
free(status);
return 0;
}
-
- g_err << "getStatus failed" << endl;
+
return -1;
}
-int
+static int
waitClusterStatus(const char* _addr,
- ndb_mgm_node_status _status,
- unsigned int _timeout)
+ ndb_mgm_node_status _status)
{
int _startphase = -1;
- int _nodes[MAX_NDB_NODES];
- int _num_nodes = 0;
+ /* Ignore SIGPIPE */
+ signal(SIGPIPE, SIG_IGN);
handle = ndb_mgm_create_handle();
if (handle == NULL){
- g_err << "handle == NULL" << endl;
+ g_err << "Could not create ndb_mgm handle" << endl;
return -1;
}
g_info << "Connecting to mgmsrv at " << _addr << endl;
@@ -216,19 +209,11 @@ waitClusterStatus(const char* _addr,
return -1;
}
- if (getStatus() != 0)
- return -1;
-
- // Collect all nodes into nodes
- for (size_t i = 0; i < ndbNodes.size(); i++){
- _nodes[i] = ndbNodes[i].node_id;
- _num_nodes++;
- }
-
- unsigned int attempts = 0;
- unsigned int resetAttempts = 0;
- const unsigned int MAX_RESET_ATTEMPTS = 10;
- bool allInState = false;
+ int attempts = 0;
+ int resetAttempts = 0;
+ const int MAX_RESET_ATTEMPTS = 10;
+ bool allInState = false;
+ int timeout_ms= _timeout * 10; /* In number of 100 milliseconds */
while (allInState == false){
if (_timeout > 0 && attempts > _timeout){
/**
@@ -236,8 +221,8 @@ waitClusterStatus(const char* _addr,
* the state we want
*/
bool waitMore = false;
- /**
- * Make special check if we are waiting for
+ /**
+ * Make special check if we are waiting for
* cluster to become started
*/
if(_status == NDB_MGM_NODE_STATUS_STARTED){
@@ -252,7 +237,7 @@ waitClusterStatus(const char* _addr,
waitMore = false;
}
- }
+ }
if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
g_err << "waitNodeState("
@@ -260,7 +245,7 @@ waitClusterStatus(const char* _addr,
<<", "<<_startphase<<")"
<< " timeout after " << attempts <<" attemps" << endl;
return -1;
- }
+ }
g_err << "waitNodeState("
<< ndb_mgm_get_node_status_string(_status)
@@ -269,62 +254,34 @@ waitClusterStatus(const char* _addr,
<< resetAttempts << endl;
attempts = 0;
resetAttempts++;
-
}
- allInState = true;
if (getStatus() != 0){
- g_err << "getStatus != 0" << endl;
return -1;
}
- // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl;
- // for (int i = 0; i < _num_nodes; i++)
- // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl;
-
- for (int i = 0; i < _num_nodes; i++){
- ndb_mgm_node_state* ndbNode = NULL;
- for (size_t n = 0; n < ndbNodes.size(); n++){
- if (ndbNodes[n].node_id == _nodes[i])
- ndbNode = &ndbNodes[n];
- }
-
- if(ndbNode == NULL){
- allInState = false;
- continue;
- }
+ /* Assume all nodes are in state(if there is any) */
+ allInState = (ndbNodes.size() > 0);
- g_info << "State node " << ndbNode->node_id << " "
- << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
+ /* Loop through all nodes and check their state */
+ for (size_t n = 0; n < ndbNodes.size(); n++) {
+ ndb_mgm_node_state* ndbNode = &ndbNodes[n];
assert(ndbNode != NULL);
- if(_status == NDB_MGM_NODE_STATUS_STARTING &&
- ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
- ndbNode->start_phase >= _startphase) ||
- (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
- continue;
-
- if (_status == NDB_MGM_NODE_STATUS_STARTING){
- g_info << "status = "
- << ndb_mgm_get_node_status_string(ndbNode->node_status)
- <<", start_phase="<<ndbNode->start_phase<<endl;
- if (ndbNode->node_status != _status) {
- if (ndbNode->node_status < _status)
- allInState = false;
- else
- g_info << "node_status(" << (unsigned)ndbNode->node_status
- << ") != _status("<< (unsigned)_status << ")" <<endl;
- } else if (ndbNode->start_phase < _startphase)
- allInState = false;
- } else {
- if (ndbNode->node_status != _status)
+ g_info << "Node " << ndbNode->node_id << ": "
+ << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
+
+ if (ndbNode->node_status != _status)
allInState = false;
- }
}
- g_info << "Waiting for cluster enter state "
- << ndb_mgm_get_node_status_string(_status)<< endl;
- NdbSleep_SecSleep(1);
+
+ if (!allInState) {
+ g_info << "Waiting for cluster enter state "
+ << ndb_mgm_get_node_status_string(_status)<< endl;
+ NdbSleep_MilliSleep(100);
+ }
+
attempts++;
}
return 0;
| Thread |
|---|
| • bk commit into 5.0 tree (msvensson:1.2540) BUG#32025 | msvensson | 7 Feb |