List:Commits« Previous MessageNext Message »
From:stewart Date:October 12 2007 4:56am
Subject:[patch 1/1] WL4092 Remove ndb_use_exact_count - use cache of approximate count instead
View as plain text  
Util thread maintains cache in table share, updated every
ndb_cache_check_time. On info call, if count is zero - force a check.

If ndb_cache_check_time is zero, leave 2sec between checks.

For an exact row count, NDB requires round-trips to the data nodes.

For approximate, we can cache.

Old behaviour of disabling ndb_use_exact_count would return 100 as
the row count. This led to horrible execution plans in some cases,
but big respens time boosts in others.

This patch gives us our cake *and* we can eat it too!

I've observed 300% improvement in response time in some tests.

Index: telco/sql/mysqld.cc
===================================================================
--- telco.orig/sql/mysqld.cc	2007-10-12 12:19:43.125872633 +1000
+++ telco/sql/mysqld.cc	2007-10-12 12:26:12.380964272 +1000
@@ -4978,8 +4978,8 @@ enum options_mysqld
   OPT_DISCONNECT_SLAVE_EVENT_COUNT, OPT_TC_HEURISTIC_RECOVER,
   OPT_ABORT_SLAVE_EVENT_COUNT,
   OPT_LOG_BIN_TRUST_FUNCTION_CREATORS,
-  OPT_ENGINE_CONDITION_PUSHDOWN, OPT_NDB_CONNECTSTRING, 
-  OPT_NDB_USE_EXACT_COUNT, OPT_NDB_USE_TRANSACTIONS,
+  OPT_ENGINE_CONDITION_PUSHDOWN, OPT_NDB_CONNECTSTRING,
+  OPT_NDB_USE_TRANSACTIONS,
   OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ,
   OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME,
   OPT_NDB_WAIT_CONNECTED,
@@ -5577,17 +5577,6 @@ master-ssl",
    (uchar**) &opt_ndb_log_orig,
    0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
 #endif
-  {"ndb-use-exact-count", OPT_NDB_USE_EXACT_COUNT,
-   "Use exact records count during query planning and for fast "
-   "select count(*), disable for faster queries.",
-   (uchar**) &global_system_variables.ndb_use_exact_count,
-   (uchar**) &global_system_variables.ndb_use_exact_count,
-   0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
-  {"ndb_use_exact_count", OPT_NDB_USE_EXACT_COUNT,
-   "same as --ndb-use-exact-count.",
-   (uchar**) &global_system_variables.ndb_use_exact_count,
-   (uchar**) &global_system_variables.ndb_use_exact_count,
-   0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
   {"ndb-use-transactions", OPT_NDB_USE_TRANSACTIONS,
    "Use transactions for large inserts, if enabled then large "
    "inserts will be split into several smaller transactions",
Index: telco/sql/set_var.cc
===================================================================
--- telco.orig/sql/set_var.cc	2007-10-12 12:19:43.129872829 +1000
+++ telco/sql/set_var.cc	2007-10-12 12:26:12.388964664 +1000
@@ -503,8 +503,6 @@ static sys_var_bool_ptr
 sys_ndb_log_updated_only(&vars, "ndb_log_updated_only",
&opt_ndb_log_updated_only);
 #endif
 static sys_var_thd_bool
-sys_ndb_use_exact_count(&vars, "ndb_use_exact_count", &SV::ndb_use_exact_count);
-static sys_var_thd_bool
 sys_ndb_use_transactions(&vars, "ndb_use_transactions",
&SV::ndb_use_transactions);
 static sys_var_long_ptr
 sys_ndb_cache_check_time(&vars, "ndb_cache_check_time", &ndb_cache_check_time);
Index: telco/sql/sql_class.h
===================================================================
--- telco.orig/sql/sql_class.h	2007-10-12 12:19:43.149873810 +1000
+++ telco/sql/sql_class.h	2007-10-12 12:26:12.424966430 +1000
@@ -320,7 +320,6 @@ struct system_variables
   my_bool keep_files_on_create;
   my_bool ndb_force_send;
   my_bool ndb_use_copying_alter_table;
-  my_bool ndb_use_exact_count;
   my_bool ndb_use_transactions;
   my_bool ndb_index_stat_enable;
 
Index: telco/sql/ha_ndbcluster.cc
===================================================================
--- telco.orig/sql/ha_ndbcluster.cc	2007-10-12 12:20:27.172032948 +1000
+++ telco/sql/ha_ndbcluster.cc	2007-10-12 12:28:13.798919410 +1000
@@ -670,8 +670,6 @@ ha_rows ha_ndbcluster::records()
 
 int ha_ndbcluster::records_update()
 {
-  if (m_ha_not_exact_count)
-    return 0;
   DBUG_ENTER("ha_ndbcluster::records_update");
   int result= 0;
 
@@ -707,8 +705,7 @@ int ha_ndbcluster::records_update()
 
 void ha_ndbcluster::no_uncommitted_rows_execute_failure()
 {
-  if (m_ha_not_exact_count)
-    return;
+
   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
   get_thd_ndb(current_thd)->m_error= TRUE;
   get_thd_ndb(current_thd)->m_error_code= 0;
@@ -717,8 +714,6 @@ void ha_ndbcluster::no_uncommitted_rows_
 
 void ha_ndbcluster::no_uncommitted_rows_update(int c)
 {
-  if (m_ha_not_exact_count)
-    return;
   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
   struct Ndb_local_table_statistics *local_info= m_table_info;
   local_info->no_uncommitted_rows_count+= c;
@@ -730,8 +725,6 @@ void ha_ndbcluster::no_uncommitted_rows_
 
 void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
 {
-  if (m_ha_not_exact_count)
-    return;
   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
   Thd_ndb *thd_ndb= get_thd_ndb(thd);
   thd_ndb->count++;
@@ -4873,15 +4866,14 @@ int ha_ndbcluster::info(uint flag)
   if (flag & HA_STATUS_VARIABLE)
   {
     DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
-    if (m_table_info)
-    {
-      if (m_ha_not_exact_count)
-        stats.records= 100;
-      else
-	result= records_update();
-    }
-    else
+    DBUG_PRINT("info", ("use saved"));
+    ha_rows r;
+    pthread_mutex_lock(&m_share->mutex);
+    r= m_share->approx_records;
+    pthread_mutex_unlock(&m_share->mutex);
+    if(!r)
     {
+      DBUG_PRINT("info", ("update"));
       if ((my_errno= check_ndb_connection()))
         DBUG_RETURN(my_errno);
       Ndb *ndb= get_ndb();
@@ -4891,13 +4883,13 @@ int ha_ndbcluster::info(uint flag)
       {
         DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
       }
-      if (current_thd->variables.ndb_use_exact_count &&
-          (result= ndb_get_table_statistics
-           (this, TRUE, ndb, m_ndb_statistics_record, &stat)) == 0)
+      if ((result= ndb_get_table_statistics(this, TRUE, ndb, m_ndb_statistics_record,
&stat)) == 0)
       {
+        pthread_mutex_lock(&m_share->mutex);
+        stats.records= m_share->approx_records= stat.row_count;
+        pthread_mutex_unlock(&m_share->mutex);
         stats.mean_rec_length= stat.row_size;
         stats.data_file_length= stat.fragment_memory;
-        stats.records= stat.row_count;
       }
       else
       {
@@ -4905,6 +4897,11 @@ int ha_ndbcluster::info(uint flag)
         stats.records= 100;
       }
     }
+    else
+    {
+      DBUG_PRINT("info", ("Used cached value %llu",r));
+      stats.records= r;
+    }
   }
   if (flag & HA_STATUS_CONST)
   {
@@ -5390,7 +5387,6 @@ int ha_ndbcluster::init_handler_for_stat
 
   DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
   // store thread specific data first to set the right context
-  m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
   m_autoincrement_prefetch= 
     (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
 
@@ -7384,20 +7380,6 @@ void ha_ndbcluster::get_auto_increment(u
   Other flags are set under certain circumstaces in table_flags()
 */
 #define HA_NDBCLUSTER_TABLE_FLAGS \
-                HA_REC_NOT_IN_SEQ | \
-                HA_NULL_IN_KEY | \
-                HA_AUTO_PART_KEY | \
-                HA_NO_PREFIX_CHAR_KEYS | \
-                HA_NEED_READ_RANGE_BUFFER | \
-                HA_CAN_GEOMETRY | \
-                HA_CAN_BIT_FIELD | \
-                HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \
-                HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \
-                HA_PARTIAL_COLUMN_READ | \
-                HA_HAS_OWN_BINLOGGING | \
-                HA_BINLOG_ROW_CAPABLE | \
-                HA_HAS_RECORDS | \
-                HA_ONLINE_ALTER
 
 
 ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
@@ -7406,7 +7388,6 @@ ha_ndbcluster::ha_ndbcluster(handlerton 
   m_active_cursor(NULL),
   m_table(NULL),
   m_table_info(NULL),
-  m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS),
   m_share(0),
   m_part_info(NULL),
   m_use_partition_pruning(FALSE),
@@ -7435,7 +7416,6 @@ ha_ndbcluster::ha_ndbcluster(handlerton 
   m_ndb_hidden_key_record(0),
   m_ndb_statistics_record(0),
   m_dupkey((uint) -1),
-  m_ha_not_exact_count(FALSE),
   m_autoincrement_prefetch((ha_rows) 32),
   m_transaction_on(TRUE),
   m_cond(NULL)
@@ -9030,9 +9010,24 @@ ha_ndbcluster::records_in_range(uint inx
 ulonglong ha_ndbcluster::table_flags(void) const
 {
   THD *thd= current_thd;
-  ulonglong f= m_table_flags;
-  if (m_ha_not_exact_count)
-    f= f & ~HA_STATS_RECORDS_IS_EXACT;
+  /*
+    Normal flags for binlogging is that ndb has HA_HAS_OWN_BINLOGGING
+    and preferes HA_BINLOG_ROW_CAPABLE
+  */
+  ulonglong f= (HA_REC_NOT_IN_SEQ |
+                HA_NULL_IN_KEY |
+                HA_AUTO_PART_KEY |
+                HA_NO_PREFIX_CHAR_KEYS |
+                HA_NEED_READ_RANGE_BUFFER |
+                HA_CAN_GEOMETRY |
+                HA_CAN_BIT_FIELD |
+                HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
+                HA_PRIMARY_KEY_REQUIRED_FOR_DELETE |
+                HA_PARTIAL_COLUMN_READ |
+                HA_HAS_OWN_BINLOGGING |
+                HA_BINLOG_ROW_CAPABLE |
+                HA_HAS_RECORDS |
+                HA_ONLINE_ALTER);
   /*
     To allow for logging of ndb tables during stmt based logging;
     flag cabablity, but also turn off flag for OWN_BINLOGGING
@@ -9710,6 +9705,7 @@ NDB_SHARE *ndbcluster_get_share(const ch
       }
 #endif
       *root_ptr= old_root;
+      share->approx_records= 0;
     }
     else
     {
@@ -10651,7 +10647,14 @@ ha_ndbcluster::update_table_comment(
 }
 
 
-// Utility thread main loop
+/**
+ * Utility thread main loop
+ *
+ * Utility thread has two jobs:
+ * - invalidate query cache (if ndb_cache_check_time is set, else no Qcache)
+ * - keep row count statistics up to date so we can give optimiser decent
+ *   approx row count.
+ */
 pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
 {
   THD *thd; /* needs to be first for thread_stack */
@@ -10779,13 +10782,6 @@ pthread_handler_t ndb_util_thread_func(v
       ndbcluster_setup_binlog_table_shares(thd);
 #endif
 
-    if (ndb_cache_check_time == 0)
-    {
-      /* Wake up in 1 second to check if value has changed */
-      set_timespec(abstime, 1);
-      continue;
-    }
-
     /* Lock mutex and fill list with pointers to all open tables */
     NDB_SHARE *share;
     pthread_mutex_lock(&ndbcluster_mutex);
@@ -10872,6 +10868,9 @@ pthread_handler_t ndb_util_thread_func(v
                       share->key,
                       llstr(stat.commit_count, buff),
                       llstr(stat.row_count, buff2)));
+          pthread_mutex_lock(&share->mutex);
+          share->approx_records= stat.row_count;
+          pthread_mutex_unlock(&share->mutex);
         }
         else
         {
@@ -10894,8 +10893,14 @@ pthread_handler_t ndb_util_thread_func(v
     }
 next:
     /* Calculate new time to wake up */
+
     int secs= 0;
-    int msecs= ndb_cache_check_time;
+    int msecs;
+
+    if(ndb_cache_check_time)
+      msecs= ndb_cache_check_time;
+    else
+      msecs= 2000; /* keep statistics up to date */
 
     struct timeval tick_time;
     gettimeofday(&tick_time, 0);
Index: telco/sql/ha_ndbcluster.h
===================================================================
--- telco.orig/sql/ha_ndbcluster.h	2007-10-12 12:20:27.188033732 +1000
+++ telco/sql/ha_ndbcluster.h	2007-10-12 12:28:38.132112870 +1000
@@ -161,6 +161,7 @@ typedef struct st_ndbcluster_share {
   MY_BITMAP *subscriber_bitmap;
   NdbEventOperation *new_op;
 #endif
+  ha_rows approx_records;
 } NDB_SHARE;
 
 inline
@@ -705,7 +706,6 @@ private:
   char m_dbname[FN_HEADLEN];
   //char m_schemaname[FN_HEADLEN];
   char m_tabname[FN_HEADLEN];
-  ulonglong m_table_flags;
   THR_LOCK_DATA m_lock;
   bool m_lock_tuple;
   NDB_SHARE *m_share;
@@ -766,7 +766,6 @@ private:
   uint32 m_blobs_buffer_size;
   uint m_dupkey;
   // set from thread variables at external lock
-  bool m_ha_not_exact_count;
   ha_rows m_autoincrement_prefetch;
   bool m_transaction_on;
 
Index: telco/mysql-test/suite/ndb/t/ndb_alter_table.test
===================================================================
--- telco.orig/mysql-test/suite/ndb/t/ndb_alter_table.test	2007-10-12 12:19:43.241878323
+1000
+++ telco/mysql-test/suite/ndb/t/ndb_alter_table.test	2007-10-12 12:26:12.580974082 +1000
@@ -55,12 +55,12 @@ col3 varchar (20) not null,
 col4 varchar(4) not null,
 col5 enum('PENDING', 'ACTIVE', 'DISABLED') not null,
 col6 int not null, to_be_deleted int)  ENGINE=ndbcluster;
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
 show table status;
 SET SQL_MODE=NO_AUTO_VALUE_ON_ZERO;
 insert into t1 values
 (0,4,3,5,"PENDING",1,7),(NULL,4,3,5,"PENDING",1,7),(31,4,3,5,"PENDING",1,7),
(7,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7), (100,4,3,5,"PENDING",1,7),
(99,4,3,5,"PENDING",1,7), (8,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7);
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
 show table status;
 select * from t1 order by col1;
 alter table t1
@@ -69,11 +69,11 @@ add column col7 varchar(30) not null aft
 add column col8 datetime not null, drop column to_be_deleted,
 change column col2 fourth varchar(30) not null after col3,
 modify column col6 int not null first;
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
 show table status;
 select * from t1 order by col1;
 insert into t1 values (2, NULL,4,3,5,99,"PENDING","EXTRA",'2004-01-01 00:00:00');
---replace_column 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
+--replace_column 5 # 6 # 7 # 8 # 10 # 12 # 13 # 14 # 18 #
 show table status;
 select * from t1 order by col1;
 delete from t1;
Index: telco/mysql-test/suite/ndb/r/ndb_alter_table.result
===================================================================
--- telco.orig/mysql-test/suite/ndb/r/ndb_alter_table.result	2007-10-12 12:19:43.389885582
+1000
+++ telco/mysql-test/suite/ndb/r/ndb_alter_table.result	2007-10-12 12:26:12.900989777
+1000
@@ -34,13 +34,13 @@ col5 enum('PENDING', 'ACTIVE', 'DISABLED
 col6 int not null, to_be_deleted int)  ENGINE=ndbcluster;
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	ndbcluster	10	Dynamic	0	#	#	#	0	#	1	#	#	#	latin1_swedish_ci	NULL		#
+t1	ndbcluster	10	Dynamic	#	#	#	#	0	#	1	#	#	#	latin1_swedish_ci	NULL		#
 SET SQL_MODE=NO_AUTO_VALUE_ON_ZERO;
 insert into t1 values
 (0,4,3,5,"PENDING",1,7),(NULL,4,3,5,"PENDING",1,7),(31,4,3,5,"PENDING",1,7),
(7,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7), (100,4,3,5,"PENDING",1,7),
(99,4,3,5,"PENDING",1,7), (8,4,3,5,"PENDING",1,7), (NULL,4,3,5,"PENDING",1,7);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	ndbcluster	10	Dynamic	9	#	#	#	0	#	102	#	#	#	latin1_swedish_ci	NULL		#
+t1	ndbcluster	10	Dynamic	#	#	#	#	0	#	102	#	#	#	latin1_swedish_ci	NULL		#
 select * from t1 order by col1;
 col1	col2	col3	col4	col5	col6	to_be_deleted
 0	4	3	5	PENDING	1	7
@@ -60,7 +60,7 @@ change column col2 fourth varchar(30) no
 modify column col6 int not null first;
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	ndbcluster	10	Dynamic	9	#	#	#	0	#	102	#	#	#	latin1_swedish_ci	NULL		#
+t1	ndbcluster	10	Dynamic	#	#	#	#	0	#	102	#	#	#	latin1_swedish_ci	NULL		#
 select * from t1 order by col1;
 col6	col1	col3	fourth	col4	col4_5	col5	col7	col8
 1	0	3	4	5		PENDING		0000-00-00 00:00:00
@@ -75,7 +75,7 @@ col6	col1	col3	fourth	col4	col4_5	col5	c
 insert into t1 values (2, NULL,4,3,5,99,"PENDING","EXTRA",'2004-01-01 00:00:00');
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	ndbcluster	10	Dynamic	10	#	#	#	0	#	103	#	#	#	latin1_swedish_ci	NULL		#
+t1	ndbcluster	10	Dynamic	#	#	#	#	0	#	103	#	#	#	latin1_swedish_ci	NULL		#
 select * from t1 order by col1;
 col6	col1	col3	fourth	col4	col4_5	col5	col7	col8
 1	0	3	4	5		PENDING		0000-00-00 00:00:00
Index: telco/mysql-test/suite/ndb/r/ndb_basic.result
===================================================================
--- telco.orig/mysql-test/suite/ndb/r/ndb_basic.result	2007-10-12 12:20:27.208034713 +1000
+++ telco/mysql-test/suite/ndb/r/ndb_basic.result	2007-10-12 12:26:12.964992917 +1000
@@ -31,7 +31,6 @@ ndb_optimized_node_selection	#
 ndb_report_thresh_binlog_epoch_slip	#
 ndb_report_thresh_binlog_mem_usage	#
 ndb_use_copying_alter_table	#
-ndb_use_exact_count	#
 ndb_use_transactions	#
 CREATE TABLE t1 (
 pk1 INT NOT NULL PRIMARY KEY,

--
Stewart Smith
Thread
[patch 0/1] For Review: WL4092 remove ndb_use_exact_countstewart12 Oct
  • [patch 1/1] WL4092 Remove ndb_use_exact_count - use cache of approximate count insteadstewart12 Oct
    • Re: [patch 1/1] WL4092 Remove ndb_use_exact_count - use cache ofapproximate count insteadJonas Oreland12 Oct
      • Re: [patch 1/1] WL4092 Remove ndb_use_exact_count - use cache ofapproximate count insteadStewart Smith12 Oct
        • Re: [patch 1/1] WL4092 Remove ndb_use_exact_count - use cache ofapproximate count insteadJonas Oreland12 Oct