Below is the list of changes that have just been committed into a local
4.1 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2173 05/04/25 01:06:39 sergefp@stripped +9 -0
1) Fix for BUG#9622:
ANALYZE TABLE and ALTER TABLE ENABLE INDEX produce different stats on the
same MyISAM table with NULL values. The NULL values were considered to be
equal in one case and inequal in the other. Now this is unified using the
following:
2) Changed statistics collection for NULL values - now statistics is calculated
this way:
a) Calculate number of NULL values for each key part
b) rec_per_key= #records_with_non_null_value / #different_not_null_values.
(if all records have NULL values then rec_per_key=1)
Cardinality column in SHOW KEYS shows records/rec_per_key[i], as before.
sql/structs.h
1.40 05/04/25 01:06:35 sergefp@stripped +2 -2
Comment corrected.
mysys/my_handler.c
1.19 05/04/25 01:06:35 sergefp@stripped +89 -9
* In ha_key_cmp, removed support for SEARCH_NULL_ARE_NOT_EQUAL as it is no longer
needed.
* Added ha_key_count_nulls function.
mysql-test/t/myisam.test
1.36 05/04/25 01:06:35 sergefp@stripped +56 -0
Testcase for BUG#9622
mysql-test/r/myisam.result
1.48 05/04/25 01:06:35 sergefp@stripped +86 -0
Testcase for BUG#9622
myisam/sort.c
1.43 05/04/25 01:06:35 sergefp@stripped +2 -1
MyISAM index statistics collection code now uses null_part_records -
array of number of null values for each key part.
myisam/myisamdef.h
1.78 05/04/25 01:06:35 sergefp@stripped +4 -0
MyISAM index statistics collection code now uses null_part_records -
array of number of null values for each key part.
myisam/mi_check.c
1.145 05/04/25 01:06:35 sergefp@stripped +67 -11
Changed statistics collection to treat NULL values specially
include/myisam.h
1.64 05/04/25 01:06:35 sergefp@stripped +5 -1
MyISAM index statistics collection code now uses null_part_records -
array of number of null values for each key part.
include/my_handler.h
1.6 05/04/25 01:06:35 sergefp@stripped +3 -0
Added ha_key_count_nulls function
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: sergefp
# Host: newbox.mylan
# Root: /home/psergey/mysql-4.1-bug9622
--- 1.63/include/myisam.h 2004-10-06 16:09:37 +04:00
+++ 1.64/include/myisam.h 2005-04-25 01:06:35 +04:00
@@ -335,7 +335,10 @@
int tmpfile_createflag;
myf myf_rw;
IO_CACHE read_cache;
+ /* Statistics: */
ulonglong unique_count[MI_MAX_KEY_SEG+1];
+ ulonglong null_part_records[MI_MAX_KEY_SEG+1];
+
ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
void *thd;
@@ -394,7 +397,8 @@
my_bool repair);
int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
- ulonglong *unique, ulonglong records);
+ ulonglong *unique, ulonglong *null_part_records,
+ ulonglong records);
int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type);
int movepoint(MI_INFO *info,byte *record,my_off_t oldpos,
--- 1.144/myisam/mi_check.c 2005-01-03 16:32:24 +03:00
+++ 1.145/myisam/mi_check.c 2005-04-25 01:06:35 +04:00
@@ -391,6 +391,7 @@
param->record_checksum=init_checksum;
bzero((char*) ¶m->unique_count,sizeof(param->unique_count));
+ bzero((char*) ¶m->null_part_records,sizeof(param->null_part_records));
if ((!(param->testflag & T_SILENT)))
printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT)
@@ -495,6 +496,7 @@
if (param->testflag & T_STATISTICS)
update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
+ param->null_part_records,
(ulonglong) info->state->records);
}
if (param->testflag & T_INFO)
@@ -635,12 +637,16 @@
{
if (*keys != 1L) /* not first_key */
{
- uint diff;
- ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
- SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
+ int diff;
+ ha_key_cmp(keyinfo->seg,info->lastkey ,key, key_length,
+ comp_flag,
&diff);
- param->unique_count[diff-1]++;
+ ha_key_count_nulls(keyinfo->seg, key, param->null_part_records);
+ param->unique_count[diff-1]++;
}
+ else
+ ha_key_count_nulls(keyinfo->seg, key, param->null_part_records);
+
}
(*key_checksum)+= mi_byte_checksum((byte*) key,
key_length- info->s->rec_reflength);
@@ -2036,6 +2042,8 @@
sort_param.max_pos=sort_param.pos=share->pack.header_length;
keyseg=sort_param.seg;
bzero((char*) sort_param.unique,sizeof(sort_param.unique));
+ bzero((char*) sort_param.null_part_records,
+ sizeof(sort_param.null_part_records));
sort_param.key_length=share->rec_reflength;
for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
{
@@ -2080,7 +2088,8 @@
sort_info.max_records= (ha_rows) info->state->records;
if (param->testflag & T_STATISTICS)
- update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
+ update_key_parts(sort_param.keyinfo, rec_per_key_part,
+ sort_param.unique, sort_param.null_part_records,
(ulonglong) info->state->records);
share->state.key_map|=(ulonglong) 1 << sort_param.key;
@@ -3236,14 +3245,18 @@
int cmp;
if (sort_info->key_block->inited)
- {
+ {
cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
&diff_pos);
sort_param->unique[diff_pos-1]++;
+ ha_key_count_nulls(sort_param->seg, (uchar*)a,
+ sort_param->null_part_records);
}
else
{
+ ha_key_count_nulls(sort_param->seg, (uchar*)a,
+ sort_param->null_part_records);
cmp= -1;
}
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
@@ -3952,20 +3965,63 @@
return;
}
- /* calculate unique keys for each part key */
+/*
+ Calculate statistics for each key part
+
+ SYNOPSIS
+ update_key_parts()
+ keyinfo IN Key information (only keyinfo->keysegs used)
+ rec_per_key_part OUT Put statistics here: Array of
+ #records/#distinct_values
+ unique IN Array of #distinct_values (see below for details)
+ null_values IN Array of numbers of NULLs for each key part
+ records IN Ttal #of records in table
+
+ NOTES
+ unique is array:
+ unique[0] = #different values for keypart1 - 1
+ ...
+ unique[i+1] = SUM[over each encountered keypart_1 ... keypart_i]
+ (#different values for keypart_{i+1} - 1)
+ null_values:
+ null_values[0] = #null values for keypart 1
+ null_values[i+1] = total #null values for keypart i.
+
+ NULL values are now ignored:
+ rec_per_key_part[i] = #{records with non-null values} /
+ #{different records with non-null values}
+
+ The function assumes that for 2 different values of 1st key part all
+ values for the next key parts are different (this naturally follows
+ from requirement that statistics is collected in one sequential pass
+ over index)
+ The statistics collection pass is performed by either chk_index() or
+ sort_key_write(). (look for calls to ha_key_cmp(... , &non-dummy-var) and
+ ha_key_count_nulls)
+*/
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
- ulonglong *unique, ulonglong records)
+ ulonglong *unique, ulonglong *null_values,
+ ulonglong records)
{
ulonglong count=0,tmp;
uint parts;
+ ulonglong rec;
for (parts=0 ; parts < keyinfo->keysegs ; parts++)
{
+ rec= records - null_values[parts]; // total # of records to consider
+
+ /*
+ unique[parts] has #distinct_values - #groups.
+ set count= #distinct_values.
+ */
count+=unique[parts];
- if (count == 0)
- tmp=records;
+
+ if (count == 0) // handle the 'all values the same' case.
+ tmp=rec;
else
- tmp= (records + (count+1)/2) / (count+1);
+ tmp= (rec+ (count+1)/2) / (count+1); // tmp ~= round(rec/count)
+
/* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
let's ensure it is not */
set_if_bigger(tmp,1);
--- 1.77/myisam/myisamdef.h 2005-03-02 12:34:56 +03:00
+++ 1.78/myisam/myisamdef.h 2005-04-25 01:06:35 +04:00
@@ -295,7 +295,11 @@
pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek;
+
+ /* Statistics collection */
ulonglong unique[MI_MAX_KEY_SEG+1];
+ ulonglong null_part_records[MI_MAX_KEY_SEG+1];
+
my_off_t pos,max_pos,filepos,start_recpos;
uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length;
--- 1.42/myisam/sort.c 2005-01-24 17:48:16 +03:00
+++ 1.43/myisam/sort.c 2005-04-25 01:06:35 +04:00
@@ -482,7 +482,8 @@
share->state.key_map|=(ulonglong) 1 << sinfo->key;
if (param->testflag & T_STATISTICS)
update_key_parts(sinfo->keyinfo, rec_per_key_part,
- sinfo->unique, (ulonglong) info->state->records);
+ sinfo->unique, sinfo->null_part_records,
+ (ulonglong) info->state->records);
if (!sinfo->buffpek.elements)
{
if (param->testflag & T_VERBOSE)
--- 1.39/sql/structs.h 2005-03-23 21:19:10 +03:00
+++ 1.40/sql/structs.h 2005-04-25 01:06:35 +04:00
@@ -90,8 +90,8 @@
KEY_PART_INFO *key_part;
char *name; /* Name of key */
/*
- Array of AVG(#records with the same field value) for 1st ... Nth key part.
- 0 means 'not known'.
+ Array of (#records with the same field value) for 1st ... Nth key part,
+ rec_per_key[i]=0 means 'not known'.
For temporary heap tables this member is NULL.
*/
ulong *rec_per_key;
--- 1.5/include/my_handler.h 2004-03-25 16:04:58 +03:00
+++ 1.6/include/my_handler.h 2005-04-25 01:06:35 +04:00
@@ -63,4 +63,7 @@
register uchar *b, uint key_length, uint nextflag,
uint *diff_pos);
+void ha_key_count_nulls(register HA_KEYSEG *keyseg, register uchar *a,
+ ulonglong *null_part_records);
+
#endif /* _my_handler_h */
--- 1.18/mysys/my_handler.c 2005-02-01 17:27:00 +03:00
+++ 1.19/mysys/my_handler.c 2005-04-25 01:06:35 +04:00
@@ -129,15 +129,6 @@
{
if (nextflag == (SEARCH_FIND | SEARCH_UPDATE))
nextflag=SEARCH_SAME; /* Allow duplicate keys */
- else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL)
- {
- /*
- This is only used from mi_check() to calculate cardinality.
- It can't be used when searching for a key as this would cause
- compare of (a,b) and (b,a) to return the same value.
- */
- return -1;
- }
next_key_length=key_length;
continue; /* To next key part */
}
@@ -448,3 +439,92 @@
}
return 0;
} /* ha_key_cmp */
+
+
+/*
+ Count numbers of NULL values for each key part (used in MyISAM index
+ statistics calculation)
+ SYNOPSIS
+ ha_key_count_nulls()
+ keyseg Key description
+ a Ptr to full index value tuple
+ null_part_records Ptr to array of counters for keypart 0, 1, ...
+ if the tuple has NULL value for keypart i,
+ null_part_records[i] will be increased.
+*/
+
+void ha_key_count_nulls(register HA_KEYSEG *keyseg, register uchar *a,
+ ulonglong *null_part_records)
+{
+ HA_KEYSEG *orig_keyseg= keyseg;
+ for ( ; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++)
+ {
+ uchar *end;
+
+ if (keyseg->null_bit)
+ {
+ if (!*a++) /* If key was NULL */
+ {
+ null_part_records[keyseg - orig_keyseg]++;
+ continue; /* To next key part */
+ }
+ }
+
+ end= a + keyseg->length;
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_TEXT: /* Ascii; Key is converted */
+ case HA_KEYTYPE_BINARY:
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ int a_length;
+ get_key_length(a_length,a);
+ a+=a_length;
+ }
+ else
+ a=end;
+ break;
+ case HA_KEYTYPE_VARTEXT:
+ case HA_KEYTYPE_VARBINARY:
+ {
+ int a_length;
+ get_key_length(a_length,a);
+ a+= a_length;
+ break;
+ }
+ case HA_KEYTYPE_INT8:
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_FLOAT:
+ case HA_KEYTYPE_DOUBLE:
+ a= end;
+ break;
+ case HA_KEYTYPE_NUM: /* Numeric key */
+ {
+ int alength;
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ alength= *a++;
+ end= a + alength;
+ }
+ else
+ alength= (int) (end-a);
+ a= a + alength;
+ break;
+ }
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ case HA_KEYTYPE_ULONGLONG:
+ a= end;
+ break;
+#endif
+ default: /* Shouldn't happen */
+ break;
+ }
+ }
+}
+
--- 1.47/mysql-test/r/myisam.result 2005-03-02 12:34:56 +03:00
+++ 1.48/mysql-test/r/myisam.result 2005-04-25 01:06:35 +04:00
@@ -573,3 +573,89 @@
ERROR HY000: MyISAM table 't1' is in use (most likely by a MERGE table). Try FLUSH
TABLES.
insert into t1 values (1);
drop table t1,t2;
+create table t1 (a int, key(a));
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7),(8);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (1),(2),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (1),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (1),(1),(2),(2),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+delete from t1;
+insert into t1 values (1),(1),(1),(1),(2),(2),(2),(2),
+(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 4 NULL NULL YES BTREE
+insert into t1 values (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 5 NULL NULL YES BTREE
+create table t2 (a int, b int, key(a,b));
+insert into t2 values
+(NULL, 1), (NULL, 2), (NULL, 3), (NULL,4);
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+show keys from t2;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t2 1 a 1 a A 4 NULL NULL YES BTREE
+t2 1 a 2 b A 4 NULL NULL YES BTREE
+delete from t2;
+insert into t2 values
+(NULL, 1), (NULL, 1), (NULL, 1), (NULL,1);
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+show keys from t2;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t2 1 a 1 a A 4 NULL NULL YES BTREE
+t2 1 a 2 b A 1 NULL NULL YES BTREE
+drop table t1,t2;
--- 1.35/mysql-test/t/myisam.test 2005-03-02 12:34:56 +03:00
+++ 1.36/mysql-test/t/myisam.test 2005-04-25 01:06:35 +04:00
@@ -550,3 +550,59 @@
insert into t1 values (1);
drop table t1,t2;
+# BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce
+# different statistics on the same table with NULL values.
+create table t1 (a int, key(a));
+
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7),(8);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(2),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(1),(2),(2),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+delete from t1;
+insert into t1 values (1),(1),(1),(1),(2),(2),(2),(2),
+ (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+insert into t1 values (NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+create table t2 (a int, b int, key(a,b));
+insert into t2 values
+ (NULL, 1), (NULL, 2), (NULL, 3), (NULL,4);
+analyze table t2;
+show keys from t2;
+
+delete from t2;
+insert into t2 values
+ (NULL, 1), (NULL, 1), (NULL, 1), (NULL,1);
+analyze table t2;
+show keys from t2;
+
+drop table t1,t2;
| Thread |
|---|
| • bk commit into 4.1 tree (sergefp:1.2173) BUG#9622 | Sergey Petrunia | 24 Apr |