Util thread maintains cache in table share, updated every
ndb_cache_check_time. On info call, if count is zero - force a check.
If ndb_cache_check_time is zero, leave 2sec between checks.
For an exact row count, NDB requires round-trips to the data nodes.
For approximate, we can cache.
Old behaviour of disabling ndb_use_exact_count would return 100 as
the row count. This led to horrible execution plans in some cases,
but big respens time boosts in others.
This patch gives us our cake *and* we can eat it too!
I've observed 300% improvement in response time in some tests.
Index: telco/sql/mysqld.cc
===================================================================
--- telco.orig/sql/mysqld.cc 2007-10-12 12:19:43.125872633 +1000
+++ telco/sql/mysqld.cc 2007-10-12 12:26:12.380964272 +1000
@@ -4978,8 +4978,8 @@ enum options_mysqld
OPT_DISCONNECT_SLAVE_EVENT_COUNT, OPT_TC_HEURISTIC_RECOVER,
OPT_ABORT_SLAVE_EVENT_COUNT,
OPT_LOG_BIN_TRUST_FUNCTION_CREATORS,
- OPT_ENGINE_CONDITION_PUSHDOWN, OPT_NDB_CONNECTSTRING,
- OPT_NDB_USE_EXACT_COUNT, OPT_NDB_USE_TRANSACTIONS,
+ OPT_ENGINE_CONDITION_PUSHDOWN, OPT_NDB_CONNECTSTRING,
+ OPT_NDB_USE_TRANSACTIONS,
OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ,
OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME,
OPT_NDB_WAIT_CONNECTED,
@@ -5577,17 +5577,6 @@ master-ssl",
(uchar**) &opt_ndb_log_orig,
0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
#endif
- {"ndb-use-exact-count", OPT_NDB_USE_EXACT_COUNT,
- "Use exact records count during query planning and for fast "
- "select count(*), disable for faster queries.",
- (uchar**) &global_system_variables.ndb_use_exact_count,
- (uchar**) &global_system_variables.ndb_use_exact_count,
- 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
- {"ndb_use_exact_count", OPT_NDB_USE_EXACT_COUNT,
- "same as --ndb-use-exact-count.",
- (uchar**) &global_system_variables.ndb_use_exact_count,
- (uchar**) &global_system_variables.ndb_use_exact_count,
- 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
{"ndb-use-transactions", OPT_NDB_USE_TRANSACTIONS,
"Use transactions for large inserts, if enabled then large "
"inserts will be split into several smaller transactions",
Index: telco/sql/set_var.cc
===================================================================
--- telco.orig/sql/set_var.cc 2007-10-12 12:19:43.129872829 +1000
+++ telco/sql/set_var.cc 2007-10-12 12:26:12.388964664 +1000
@@ -503,8 +503,6 @@ static sys_var_bool_ptr
sys_ndb_log_updated_only(&vars, "ndb_log_updated_only",
&opt_ndb_log_updated_only);
#endif
static sys_var_thd_bool
-sys_ndb_use_exact_count(&vars, "ndb_use_exact_count", &SV::ndb_use_exact_count);
-static sys_var_thd_bool
sys_ndb_use_transactions(&vars, "ndb_use_transactions",
&SV::ndb_use_transactions);
static sys_var_long_ptr
sys_ndb_cache_check_time(&vars, "ndb_cache_check_time", &ndb_cache_check_time);
Index: telco/sql/sql_class.h
===================================================================
--- telco.orig/sql/sql_class.h 2007-10-12 12:19:43.149873810 +1000
+++ telco/sql/sql_class.h 2007-10-12 12:26:12.424966430 +1000
@@ -320,7 +320,6 @@ struct system_variables
my_bool keep_files_on_create;
my_bool ndb_force_send;
my_bool ndb_use_copying_alter_table;
- my_bool ndb_use_exact_count;
my_bool ndb_use_transactions;
my_bool ndb_index_stat_enable;
Index: telco/sql/ha_ndbcluster.cc
===================================================================
--- telco.orig/sql/ha_ndbcluster.cc 2007-10-12 12:20:27.172032948 +1000
+++ telco/sql/ha_ndbcluster.cc 2007-10-12 12:28:13.798919410 +1000
@@ -670,8 +670,6 @@ ha_rows ha_ndbcluster::records()
int ha_ndbcluster::records_update()
{
- if (m_ha_not_exact_count)
- return 0;
DBUG_ENTER("ha_ndbcluster::records_update");
int result= 0;
@@ -707,8 +705,7 @@ int ha_ndbcluster::records_update()
void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
- if (m_ha_not_exact_count)
- return;
+
DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
get_thd_ndb(current_thd)->m_error= TRUE;
get_thd_ndb(current_thd)->m_error_code= 0;
@@ -717,8 +714,6 @@ void ha_ndbcluster::no_uncommitted_rows_
void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
- if (m_ha_not_exact_count)
- return;
DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
struct Ndb_local_table_statistics *local_info= m_table_info;
local_info->no_uncommitted_rows_count+= c;
@@ -730,8 +725,6 @@ void ha_ndbcluster::no_uncommitted_rows_
void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
{
- if (m_ha_not_exact_count)
- return;
DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
Thd_ndb *thd_ndb= get_thd_ndb(thd);
thd_ndb->count++;
@@ -4873,15 +4866,14 @@ int ha_ndbcluster::info(uint flag)
if (flag & HA_STATUS_VARIABLE)
{
DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
- if (m_table_info)
- {
- if (m_ha_not_exact_count)
- stats.records= 100;
- else
- result= records_update();
- }
- else
+ DBUG_PRINT("info", ("use saved"));
+ ha_rows r;
+ pthread_mutex_lock(&m_share->mutex);
+ r= m_share->approx_records;
+ pthread_mutex_unlock(&m_share->mutex);
+ if(!r)
{
+ DBUG_PRINT("info", ("update"));
if ((my_errno= check_ndb_connection()))
DBUG_RETURN(my_errno);
Ndb *ndb= get_ndb();
@@ -4891,13 +4883,13 @@ int ha_ndbcluster::info(uint flag)
{
DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
}
- if (current_thd->variables.ndb_use_exact_count &&
- (result= ndb_get_table_statistics
- (this, TRUE, ndb, m_ndb_statistics_record, &stat)) == 0)
+ if ((result= ndb_get_table_statistics(this, TRUE, ndb, m_ndb_statistics_record,
&stat)) == 0)
{
+ pthread_mutex_lock(&m_share->mutex);
+ stats.records= m_share->approx_records= stat.row_count;
+ pthread_mutex_unlock(&m_share->mutex);
stats.mean_rec_length= stat.row_size;
stats.data_file_length= stat.fragment_memory;
- stats.records= stat.row_count;
}
else
{
@@ -4905,6 +4897,11 @@ int ha_ndbcluster::info(uint flag)
stats.records= 100;
}
}
+ else
+ {
+ DBUG_PRINT("info", ("Used cached value %llu",r));
+ stats.records= r;
+ }
}
if (flag & HA_STATUS_CONST)
{
@@ -5390,7 +5387,6 @@ int ha_ndbcluster::init_handler_for_stat
DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
// store thread specific data first to set the right context
- m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
m_autoincrement_prefetch=
(ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
@@ -7384,20 +7380,6 @@ void ha_ndbcluster::get_auto_increment(u
Other flags are set under certain circumstaces in table_flags()
*/
#define HA_NDBCLUSTER_TABLE_FLAGS \
- HA_REC_NOT_IN_SEQ | \
- HA_NULL_IN_KEY | \
- HA_AUTO_PART_KEY | \
- HA_NO_PREFIX_CHAR_KEYS | \
- HA_NEED_READ_RANGE_BUFFER | \
- HA_CAN_GEOMETRY | \
- HA_CAN_BIT_FIELD | \
- HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \
- HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \
- HA_PARTIAL_COLUMN_READ | \
- HA_HAS_OWN_BINLOGGING | \
- HA_BINLOG_ROW_CAPABLE | \
- HA_HAS_RECORDS | \
- HA_ONLINE_ALTER
ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
@@ -7406,7 +7388,6 @@ ha_ndbcluster::ha_ndbcluster(handlerton
m_active_cursor(NULL),
m_table(NULL),
m_table_info(NULL),
- m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS),
m_share(0),
m_part_info(NULL),
m_use_partition_pruning(FALSE),
@@ -7435,7 +7416,6 @@ ha_ndbcluster::ha_ndbcluster(handlerton
m_ndb_hidden_key_record(0),
m_ndb_statistics_record(0),
m_dupkey((uint) -1),
- m_ha_not_exact_count(FALSE),
m_autoincrement_prefetch((ha_rows) 32),
m_transaction_on(TRUE),
m_cond(NULL)
@@ -9030,9 +9010,24 @@ ha_ndbcluster::records_in_range(uint inx
ulonglong ha_ndbcluster::table_flags(void) const
{
THD *thd= current_thd;
- ulonglong f= m_table_flags;
- if (m_ha_not_exact_count)
- f= f & ~HA_STATS_RECORDS_IS_EXACT;
+ /*
+ Normal flags for binlogging is that ndb has HA_HAS_OWN_BINLOGGING
+ and preferes HA_BINLOG_ROW_CAPABLE
+ */
+ ulonglong f= (HA_REC_NOT_IN_SEQ |
+ HA_NULL_IN_KEY |
+ HA_AUTO_PART_KEY |
+ HA_NO_PREFIX_CHAR_KEYS |
+ HA_NEED_READ_RANGE_BUFFER |
+ HA_CAN_GEOMETRY |
+ HA_CAN_BIT_FIELD |
+ HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
+ HA_PRIMARY_KEY_REQUIRED_FOR_DELETE |
+ HA_PARTIAL_COLUMN_READ |
+ HA_HAS_OWN_BINLOGGING |
+ HA_BINLOG_ROW_CAPABLE |
+ HA_HAS_RECORDS |
+ HA_ONLINE_ALTER);
/*
To allow for logging of ndb tables during stmt based logging;
flag cabablity, but also turn off flag for OWN_BINLOGGING
@@ -9710,6 +9705,7 @@ NDB_SHARE *ndbcluster_get_share(const ch
}
#endif
*root_ptr= old_root;
+ share->approx_records= 0;
}
else
{
@@ -10651,7 +10647,14 @@ ha_ndbcluster::update_table_comment(
}
-// Utility thread main loop
+/**
+ * Utility thread main loop
+ *
+ * Utility thread has two jobs:
+ * - invalidate query cache (if ndb_cache_check_time is set, else no Qcache)
+ * - keep row count statistics up to date so we can give optimiser decent
+ * approx row count.
+ */
pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
{
THD *thd; /* needs to be first for thread_stack */
@@ -10779,13 +10782,6 @@ pthread_handler_t ndb_util_thread_func(v
ndbcluster_setup_binlog_table_shares(thd);
#endif
- if (ndb_cache_check_time == 0)
- {
- /* Wake up in 1 second to check if value has changed */
- set_timespec(abstime, 1);
- continue;
- }
-
/* Lock mutex and fill list with pointers to all open tables */
NDB_SHARE *share;
pthread_mutex_lock(&ndbcluster_mutex);
@@ -10872,6 +10868,9 @@ pthread_handler_t ndb_util_thread_func(v
share->key,
llstr(stat.commit_count, buff),
llstr(stat.row_count, buff2)));
+ pthread_mutex_lock(&share->mutex);
+ share->approx_records= stat.row_count;
+ pthread_mutex_unlock(&share->mutex);
}
else
{
@@ -10894,8 +10893,14 @@ pthread_handler_t ndb_util_thread_func(v
}
next:
/* Calculate new time to wake up */
+
int secs= 0;
- int msecs= ndb_cache_check_time;
+ int msecs;
+
+ if(ndb_cache_check_time)
+ msecs= ndb_cache_check_time;
+ else
+ msecs= 2000; /* keep statistics up to date */
struct timeval tick_time;
gettimeofday(&tick_time, 0);
Index: telco/sql/ha_ndbcluster.h
===================================================================
--- telco.orig/sql/ha_ndbcluster.h 2007-10-12 12:20:27.188033732 +1000
+++ telco/sql/ha_ndbcluster.h 2007-10-12 12:28:38.132112870 +1000
@@ -161,6 +161,7 @@ typedef struct st_ndbcluster_share {
MY_BITMAP *subscriber_bitmap;
NdbEventOperation *new_op;
#endif
+ ha_rows approx_records;
} NDB_SHARE;
inline
@@ -705,7 +706,6 @@ private:
char m_dbname[FN_HEADLEN];
//char m_schemaname[FN_HEADLEN];
char m_tabname[FN_HEADLEN];
- ulonglong m_table_flags;
THR_LOCK_DATA m_lock;
bool m_lock_tuple;
NDB_SHARE *m_share;
@@ -766,7 +766,6 @@ private:
uint32 m_blobs_buffer_size;
uint m_dupkey;
// set from thread variables at external lock
- bool m_ha_not_exact_count;
ha_rows m_autoincrement_prefetch;
bool m_transaction_on;
Index: telco/mysql-test/suite/ndb/t/ndb_alter_table.test
===================================================================
--- telco.orig/mysql-test/suite/ndb/t/ndb_alter_table.test 2007-10-12 12:19:43.241878323
+1000
+++ telco/mysql-test/suite/ndb/t/ndb_alter_table.test 2007-10-12 12:26:12.580974082 +1000
@@ -55,12 +55,12 @@ col3 varchar (20) not null,
col4 varchar(4) not null,
col5 enum('PENDING', 'ACTIVE', 'DISABLED') not null,
col6 int not null, to_be_deleted int) ENGINE=ndbcluster;
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
show table status;
SET SQL_MODE=NO_AUTO_VALUE_ON_ZERO;
insert into t1 values
(0,4,3,5,"PENDING",1,7),(NULL,4,3,5,"PENDING",1,7),(31,4,3,5,"PENDING",1,7),
(7,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7), (100,4,3,5,"PENDING",1,7),
(99,4,3,5,"PENDING",1,7), (8,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7);
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
show table status;
select * from t1 order by col1;
alter table t1
@@ -69,11 +69,11 @@ add column col7 varchar(30) not null aft
add column col8 datetime not null, drop column to_be_deleted,
change column col2 fourth varchar(30) not null after col3,
modify column col6 int not null first;
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
show table status;
select * from t1 order by col1;
insert into t1 values (2, NULL,4,3,5,99,"PENDING","EXTRA",'2004-01-01 00:00:00');
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
show table status;
select * from t1 order by col1;
delete from t1;
Index: telco/mysql-test/suite/ndb/r/ndb_alter_table.result
===================================================================
--- telco.orig/mysql-test/suite/ndb/r/ndb_alter_table.result 2007-10-12 12:19:43.389885582
+1000
+++ telco/mysql-test/suite/ndb/r/ndb_alter_table.result 2007-10-12 12:26:12.900989777
+1000
@@ -34,13 +34,13 @@ col5 enum('PENDING', 'ACTIVE', 'DISABLED
col6 int not null, to_be_deleted int) ENGINE=ndbcluster;
show table status;
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ndbcluster 10 Dynamic 0 # # # 0 # 1 # # # latin1_swedish_ci NULL #
+t1 ndbcluster 10 Dynamic # # # # 0 # 1 # # # latin1_swedish_ci NULL #
SET SQL_MODE=NO_AUTO_VALUE_ON_ZERO;
insert into t1 values
(0,4,3,5,"PENDING",1,7),(NULL,4,3,5,"PENDING",1,7),(31,4,3,5,"PENDING",1,7),
(7,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7), (100,4,3,5,"PENDING",1,7),
(99,4,3,5,"PENDING",1,7), (8,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7);
show table status;
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ndbcluster 10 Dynamic 9 # # # 0 # 102 # # # latin1_swedish_ci NULL #
+t1 ndbcluster 10 Dynamic # # # # 0 # 102 # # # latin1_swedish_ci NULL #
select * from t1 order by col1;
col1 col2 col3 col4 col5 col6 to_be_deleted
0 4 3 5 PENDING 1 7
@@ -60,7 +60,7 @@ change column col2 fourth varchar(30) no
modify column col6 int not null first;
show table status;
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ndbcluster 10 Dynamic 9 # # # 0 # 102 # # # latin1_swedish_ci NULL #
+t1 ndbcluster 10 Dynamic # # # # 0 # 102 # # # latin1_swedish_ci NULL #
select * from t1 order by col1;
col6 col1 col3 fourth col4 col4_5 col5 col7 col8
1 0 3 4 5 PENDING 0000-00-00 00:00:00
@@ -75,7 +75,7 @@ col6 col1 col3 fourth col4 col4_5 col5 c
insert into t1 values (2, NULL,4,3,5,99,"PENDING","EXTRA",'2004-01-01 00:00:00');
show table status;
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ndbcluster 10 Dynamic 10 # # # 0 # 103 # # # latin1_swedish_ci NULL #
+t1 ndbcluster 10 Dynamic # # # # 0 # 103 # # # latin1_swedish_ci NULL #
select * from t1 order by col1;
col6 col1 col3 fourth col4 col4_5 col5 col7 col8
1 0 3 4 5 PENDING 0000-00-00 00:00:00
Index: telco/mysql-test/suite/ndb/r/ndb_basic.result
===================================================================
--- telco.orig/mysql-test/suite/ndb/r/ndb_basic.result 2007-10-12 12:20:27.208034713 +1000
+++ telco/mysql-test/suite/ndb/r/ndb_basic.result 2007-10-12 12:26:12.964992917 +1000
@@ -31,7 +31,6 @@ ndb_optimized_node_selection #
ndb_report_thresh_binlog_epoch_slip #
ndb_report_thresh_binlog_mem_usage #
ndb_use_copying_alter_table #
-ndb_use_exact_count #
ndb_use_transactions #
CREATE TABLE t1 (
pk1 INT NOT NULL PRIMARY KEY,
--
Stewart Smith