List:Internals« Previous MessageNext Message »
From:Sergey Petrunia Date:April 24 2005 11:06pm
Subject:bk commit into 4.1 tree (sergefp:1.2173) BUG#9622
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2173 05/04/25 01:06:39 sergefp@stripped +9 -0
  1) Fix for BUG#9622:
   ANALYZE TABLE and ALTER TABLE ENABLE INDEX produce different stats on the 
   same MyISAM table with NULL values. The NULL values were considered to be 
   equal in one case and inequal in the other. Now this is unified using the 
   following:
  2) Changed statistics collection for NULL values - now statistics is calculated 
   this way:
   a) Calculate number of NULL values for each key part
   b) rec_per_key= #records_with_non_null_value / #different_not_null_values.
      (if all records have NULL values then rec_per_key=1)
   
  Cardinality column in SHOW KEYS shows records/rec_per_key[i], as before.

  sql/structs.h
    1.40 05/04/25 01:06:35 sergefp@stripped +2 -2
    Comment corrected.

  mysys/my_handler.c
    1.19 05/04/25 01:06:35 sergefp@stripped +89 -9
    * In ha_key_cmp, removed support for SEARCH_NULL_ARE_NOT_EQUAL as it is no longer
needed.
    * Added ha_key_count_nulls function.

  mysql-test/t/myisam.test
    1.36 05/04/25 01:06:35 sergefp@stripped +56 -0
    Testcase for BUG#9622

  mysql-test/r/myisam.result
    1.48 05/04/25 01:06:35 sergefp@stripped +86 -0
    Testcase for BUG#9622

  myisam/sort.c
    1.43 05/04/25 01:06:35 sergefp@stripped +2 -1
    MyISAM index statistics collection code now uses null_part_records - 
    array of number of null values for each key part.

  myisam/myisamdef.h
    1.78 05/04/25 01:06:35 sergefp@stripped +4 -0
    MyISAM index statistics collection code now uses null_part_records - 
    array of number of null values for each key part.

  myisam/mi_check.c
    1.145 05/04/25 01:06:35 sergefp@stripped +67 -11
    Changed statistics collection to treat NULL values specially

  include/myisam.h
    1.64 05/04/25 01:06:35 sergefp@stripped +5 -1
    MyISAM index statistics collection code now uses null_part_records - 
    array of number of null values for each key part.

  include/my_handler.h
    1.6 05/04/25 01:06:35 sergefp@stripped +3 -0
    Added ha_key_count_nulls function

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	newbox.mylan
# Root:	/home/psergey/mysql-4.1-bug9622

--- 1.63/include/myisam.h	2004-10-06 16:09:37 +04:00
+++ 1.64/include/myisam.h	2005-04-25 01:06:35 +04:00
@@ -335,7 +335,10 @@
   int tmpfile_createflag;
   myf myf_rw;
   IO_CACHE read_cache;
+  /* Statistics: */
   ulonglong unique_count[MI_MAX_KEY_SEG+1];
+  ulonglong null_part_records[MI_MAX_KEY_SEG+1];
+  
   ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
   ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
   void *thd;
@@ -394,7 +397,8 @@
 			       my_bool repair);
 int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
 void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
-			     ulonglong *unique, ulonglong records);
+                      ulonglong *unique, ulonglong *null_part_records,
+                      ulonglong records);
 int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
 	     my_off_t length, const char *type);
 int movepoint(MI_INFO *info,byte *record,my_off_t oldpos,

--- 1.144/myisam/mi_check.c	2005-01-03 16:32:24 +03:00
+++ 1.145/myisam/mi_check.c	2005-04-25 01:06:35 +04:00
@@ -391,6 +391,7 @@
 
     param->record_checksum=init_checksum;
     bzero((char*) &param->unique_count,sizeof(param->unique_count));
+    bzero((char*) &param->null_part_records,sizeof(param->null_part_records));
     if ((!(param->testflag & T_SILENT)))
       printf ("- check data record references index: %d\n",key+1);
     if (keyinfo->flag & HA_FULLTEXT)
@@ -495,6 +496,7 @@
 
     if (param->testflag & T_STATISTICS)
       update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
+                       param->null_part_records,
 		       (ulonglong) info->state->records);
   }
   if (param->testflag & T_INFO)
@@ -635,12 +637,16 @@
     {
       if (*keys != 1L)				/* not first_key */
       {
-	uint diff;
-	ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
-		   SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
+	int diff;
+	ha_key_cmp(keyinfo->seg,info->lastkey ,key, key_length,
+		   comp_flag,
 		   &diff);
-	param->unique_count[diff-1]++;
+        ha_key_count_nulls(keyinfo->seg, key, param->null_part_records);
+        param->unique_count[diff-1]++; 
       }
+      else
+        ha_key_count_nulls(keyinfo->seg, key, param->null_part_records);
+        
     }
     (*key_checksum)+= mi_byte_checksum((byte*) key,
 				       key_length- info->s->rec_reflength);
@@ -2036,6 +2042,8 @@
     sort_param.max_pos=sort_param.pos=share->pack.header_length;
     keyseg=sort_param.seg;
     bzero((char*) sort_param.unique,sizeof(sort_param.unique));
+    bzero((char*) sort_param.null_part_records,
+          sizeof(sort_param.null_part_records));
     sort_param.key_length=share->rec_reflength;
     for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
     {
@@ -2080,7 +2088,8 @@
     sort_info.max_records= (ha_rows) info->state->records;
 
     if (param->testflag & T_STATISTICS)
-      update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
+      update_key_parts(sort_param.keyinfo, rec_per_key_part,
+                       sort_param.unique, sort_param.null_part_records,
 		       (ulonglong) info->state->records);
     share->state.key_map|=(ulonglong) 1 << sort_param.key;
 
@@ -3236,14 +3245,18 @@
   int cmp;
 
   if (sort_info->key_block->inited)
-  {
+  { 
     cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
 		   (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
 		   &diff_pos);
     sort_param->unique[diff_pos-1]++;
+    ha_key_count_nulls(sort_param->seg, (uchar*)a, 
+                       sort_param->null_part_records);
   }
   else
   {
+    ha_key_count_nulls(sort_param->seg, (uchar*)a,
+                       sort_param->null_part_records);
     cmp= -1;
   }
   if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
@@ -3952,20 +3965,63 @@
   return;
 }
 
-    /* calculate unique keys for each part key */
+/* 
+  Calculate statistics for each key part
+  
+  SYNOPSIS
+    update_key_parts()
+      keyinfo          IN  Key information (only keyinfo->keysegs used)
+      rec_per_key_part OUT Put statistics here: Array of 
+                           #records/#distinct_values
+      unique           IN  Array of #distinct_values (see below for details)
+      null_values      IN  Array of numbers of NULLs for each key part
+      records          IN  Ttal #of records in table 
+      
+  NOTES
+    unique is array:
+      unique[0]   = #different values for keypart1 - 1
+      ...
+      unique[i+1] = SUM[over each encountered keypart_1 ... keypart_i]
+                       (#different values for keypart_{i+1} - 1)
+    null_values:
+      null_values[0]   = #null values for keypart 1
+      null_values[i+1] = total #null values for keypart i.
+
+    NULL values are now ignored:
+    rec_per_key_part[i] = #{records with non-null values} /
+                          #{different records with non-null values}
+
+    The function assumes that for 2 different values of 1st key part all 
+    values for the next key parts are different (this naturally follows 
+    from requirement that statistics is collected in one sequential pass 
+    over index)
+    The statistics collection pass is performed by either chk_index() or
+    sort_key_write(). (look for calls to ha_key_cmp(... , &non-dummy-var) and
+    ha_key_count_nulls)
+*/
 
 void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
-			     ulonglong *unique, ulonglong records)
+                      ulonglong *unique, ulonglong *null_values,
+                      ulonglong records)
 {
   ulonglong count=0,tmp;
   uint parts;
+  ulonglong rec;
   for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
   {
+    rec= records - null_values[parts]; // total # of records to consider 
+    
+    /* 
+      unique[parts] has #distinct_values - #groups. 
+      set count= #distinct_values.
+    */
     count+=unique[parts];
-    if (count == 0)
-      tmp=records;
+
+    if (count == 0) // handle the 'all values the same' case.
+      tmp=rec;
     else
-      tmp= (records + (count+1)/2) / (count+1);
+      tmp= (rec+ (count+1)/2) / (count+1); // tmp ~= round(rec/count)
+
     /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
        let's ensure it is not */
     set_if_bigger(tmp,1);

--- 1.77/myisam/myisamdef.h	2005-03-02 12:34:56 +03:00
+++ 1.78/myisam/myisamdef.h	2005-04-25 01:06:35 +04:00
@@ -295,7 +295,11 @@
   pthread_t  thr;
   IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
   DYNAMIC_ARRAY buffpek;
+  
+  /* Statistics collection */
   ulonglong unique[MI_MAX_KEY_SEG+1];
+  ulonglong null_part_records[MI_MAX_KEY_SEG+1];
+
   my_off_t pos,max_pos,filepos,start_recpos;
   uint key, key_length,real_key_length,sortbuff_size;
   uint maxbuffers, keys, find_length, sort_keys_length;

--- 1.42/myisam/sort.c	2005-01-24 17:48:16 +03:00
+++ 1.43/myisam/sort.c	2005-04-25 01:06:35 +04:00
@@ -482,7 +482,8 @@
       share->state.key_map|=(ulonglong) 1 << sinfo->key;
       if (param->testflag & T_STATISTICS)
         update_key_parts(sinfo->keyinfo, rec_per_key_part,
-                         sinfo->unique, (ulonglong) info->state->records);
+                         sinfo->unique, sinfo->null_part_records, 
+                         (ulonglong) info->state->records);
       if (!sinfo->buffpek.elements)
       {
         if (param->testflag & T_VERBOSE)

--- 1.39/sql/structs.h	2005-03-23 21:19:10 +03:00
+++ 1.40/sql/structs.h	2005-04-25 01:06:35 +04:00
@@ -90,8 +90,8 @@
   KEY_PART_INFO *key_part;
   char	*name;				/* Name of key */
   /*
-    Array of AVG(#records with the same field value) for 1st ... Nth key part.
-    0 means 'not known'.
+    Array of (#records with the same field value) for 1st ... Nth key part,
+    rec_per_key[i]=0 means 'not known'.
     For temporary heap tables this member is NULL.
   */
   ulong *rec_per_key;

--- 1.5/include/my_handler.h	2004-03-25 16:04:58 +03:00
+++ 1.6/include/my_handler.h	2005-04-25 01:06:35 +04:00
@@ -63,4 +63,7 @@
 		      register uchar *b, uint key_length, uint nextflag,
 		      uint *diff_pos);
 
+void ha_key_count_nulls(register HA_KEYSEG *keyseg, register uchar *a,
+                        ulonglong *null_part_records);
+
 #endif /* _my_handler_h */

--- 1.18/mysys/my_handler.c	2005-02-01 17:27:00 +03:00
+++ 1.19/mysys/my_handler.c	2005-04-25 01:06:35 +04:00
@@ -129,15 +129,6 @@
       {
         if (nextflag == (SEARCH_FIND | SEARCH_UPDATE))
           nextflag=SEARCH_SAME;                 /* Allow duplicate keys */
-  	else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL)
-	{
-	  /*
-	    This is only used from mi_check() to calculate cardinality.
-	    It can't be used when searching for a key as this would cause
-	    compare of (a,b) and (b,a) to return the same value.
-	  */
-	  return -1;
-	}
         next_key_length=key_length;
         continue;                               /* To next key part */
       }
@@ -448,3 +439,92 @@
   }
   return 0;
 } /* ha_key_cmp */
+
+
+/*
+  Count numbers of NULL values for each key part (used in MyISAM index 
+  statistics calculation)
+  SYNOPSIS
+    ha_key_count_nulls()
+      keyseg            Key description
+      a                 Ptr to full index value tuple
+      null_part_records Ptr to array of counters for keypart 0, 1, ...
+                        if the tuple has NULL value for keypart i,
+                        null_part_records[i] will be increased.
+*/
+
+void ha_key_count_nulls(register HA_KEYSEG *keyseg, register uchar *a,
+                        ulonglong *null_part_records)
+{
+  HA_KEYSEG *orig_keyseg= keyseg;
+  for ( ; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++)
+  {
+    uchar *end;
+    
+    if (keyseg->null_bit)
+    {
+      if (!*a++)                                /* If key was NULL */
+      {
+        null_part_records[keyseg - orig_keyseg]++;
+        continue;                               /* To next key part */
+      }
+    }
+
+    end= a + keyseg->length;
+
+    switch ((enum ha_base_keytype) keyseg->type) {
+    case HA_KEYTYPE_TEXT:                       /* Ascii; Key is converted */
+    case HA_KEYTYPE_BINARY:
+      if (keyseg->flag & HA_SPACE_PACK)
+      {
+        int a_length;
+        get_key_length(a_length,a);
+        a+=a_length;
+      }
+      else
+        a=end;
+      break;
+    case HA_KEYTYPE_VARTEXT:
+    case HA_KEYTYPE_VARBINARY:
+    {
+      int a_length;
+      get_key_length(a_length,a);
+      a+= a_length;
+      break;
+    }
+    case HA_KEYTYPE_INT8:
+    case HA_KEYTYPE_SHORT_INT:
+    case HA_KEYTYPE_USHORT_INT:
+    case HA_KEYTYPE_LONG_INT:
+    case HA_KEYTYPE_ULONG_INT:
+    case HA_KEYTYPE_INT24:
+    case HA_KEYTYPE_UINT24:
+    case HA_KEYTYPE_FLOAT:
+    case HA_KEYTYPE_DOUBLE:
+      a= end;
+      break;
+    case HA_KEYTYPE_NUM:                                /* Numeric key */
+    {
+      int alength;
+      if (keyseg->flag & HA_SPACE_PACK)
+      {
+        alength= *a++;
+        end= a + alength;
+      }
+      else
+        alength= (int) (end-a);
+      a= a + alength;
+      break;
+    }
+#ifdef HAVE_LONG_LONG
+    case HA_KEYTYPE_LONGLONG:
+    case HA_KEYTYPE_ULONGLONG:
+      a= end;
+      break;
+#endif
+    default: /* Shouldn't happen */
+      break; 
+    }
+  }
+}
+

--- 1.47/mysql-test/r/myisam.result	2005-03-02 12:34:56 +03:00
+++ 1.48/mysql-test/r/myisam.result	2005-04-25 01:06:35 +04:00
@@ -573,3 +573,89 @@
 ERROR HY000: MyISAM table 't1' is in use (most likely by a MERGE table). Try FLUSH
TABLES.
 insert into t1 values (1);
 drop table t1,t2;
+create table t1 (a int, key(a));
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7),(8);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (1),(2),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (1),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (1),(1),(2),(2),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	8	NULL	NULL	YES	BTREE	
+delete from t1;
+insert into t1 values (1),(1),(1),(1),(2),(2),(2),(2),
+(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	4	NULL	NULL	YES	BTREE	
+insert into t1 values (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	a	1	a	A	5	NULL	NULL	YES	BTREE	
+create table t2 (a int, b int, key(a,b));
+insert into t2 values 
+(NULL, 1), (NULL, 2), (NULL, 3), (NULL,4);
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+show keys from t2;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t2	1	a	1	a	A	4	NULL	NULL	YES	BTREE	
+t2	1	a	2	b	A	4	NULL	NULL	YES	BTREE	
+delete from t2;
+insert into t2 values 
+(NULL, 1), (NULL, 1), (NULL, 1), (NULL,1);
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+show keys from t2;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t2	1	a	1	a	A	4	NULL	NULL	YES	BTREE	
+t2	1	a	2	b	A	1	NULL	NULL	YES	BTREE	
+drop table t1,t2;

--- 1.35/mysql-test/t/myisam.test	2005-03-02 12:34:56 +03:00
+++ 1.36/mysql-test/t/myisam.test	2005-04-25 01:06:35 +04:00
@@ -550,3 +550,59 @@
 insert into t1 values (1);
 drop table t1,t2;
 
+# BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce
+# different statistics on the same table with NULL values.
+create table t1 (a int, key(a));
+
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7),(8);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(2),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(1),(2),(2),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(1),(1),(1),(2),(2),(2),(2),
+  (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+create table t2 (a int, b int, key(a,b));
+insert into t2 values 
+ (NULL, 1), (NULL, 2), (NULL, 3), (NULL,4);
+analyze table t2;
+show keys from t2;
+
+delete from t2;
+insert into t2 values 
+ (NULL, 1), (NULL, 1), (NULL, 1), (NULL,1);
+analyze table t2;
+show keys from t2;
+
+drop table t1,t2;
Thread
bk commit into 4.1 tree (sergefp:1.2173) BUG#9622Sergey Petrunia24 Apr