List:General Discussion« Previous MessageNext Message »
From:Michael Widenius Date:July 29 1999 8:34am
Subject:'select' duplicates rows!!!
View as plain text  
>>>>> "Michael" == Michael V Samanov <mike@stripped> writes:

Michael> Create the table:
Michael> =========================================================
Michael> # MySQL dump 5.13
Michael> #
Michael> # Host: localhost    Database: publish
Michael> #--------------------------------------------------------
Michael> # Server version        3.22.22

Michael> #
Michael> # Table structure for table 'ISSUES'
Michael> #
Michael> CREATE TABLE ISSUES (
Michael>   PAPER_ID smallint(6) DEFAULT '0' NOT NULL,
Michael>   YEAR smallint(6) DEFAULT '0' NOT NULL,
Michael>   ISSUE smallint(6) DEFAULT '0' NOT NULL,
Michael>   CLOSED tinyint(4) DEFAULT '0' NOT NULL,
Michael>   ISS_DATE date DEFAULT '0000-00-00' NOT NULL,
Michael>   PRIMARY KEY (PAPER_ID,YEAR,ISSUE)
Michael> );

Michael> #
Michael> # Dumping data for table 'ISSUES'
Michael> #

Michael> INSERT INTO ISSUES VALUES (3,1999,34,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (1,1999,111,0,'1999-03-23');
Michael> INSERT INTO ISSUES VALUES (1,1999,222,0,'1999-03-23');
Michael> INSERT INTO ISSUES VALUES (3,1999,33,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (3,1999,32,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (3,1999,31,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (3,1999,30,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (3,1999,29,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (3,1999,28,0,'1999-07-12');
Michael> INSERT INTO ISSUES VALUES (1,1999,40,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,41,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,42,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,46,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,47,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,48,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,49,1,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,50,0,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,51,0,'1999-05-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,200,0,'1999-06-28');
Michael> INSERT INTO ISSUES VALUES (1,1999,52,0,'1999-06-28');
Michael> INSERT INTO ISSUES VALUES (1,1999,53,0,'1999-06-28');
Michael> INSERT INTO ISSUES VALUES (1,1999,54,0,'1999-06-28');
Michael> INSERT INTO ISSUES VALUES (1,1999,55,0,'1999-06-28');
Michael> INSERT INTO ISSUES VALUES (1,1999,56,0,'1999-07-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,57,0,'1999-07-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,58,0,'1999-07-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,59,0,'1999-07-01');
Michael> INSERT INTO ISSUES VALUES (1,1999,60,0,'1999-07-01');
Michael> INSERT INTO ISSUES VALUES (3,1999,35,0,'1999-07-12');
Michael> =====================================================

Michael> then do the query:
Michael>   select YEAR,ISSUE from ISSUES
Michael>   where PAPER_ID=3 and (YEAR>1999 or (YEAR=1999 and ISSUE>28))
Michael>   order by YEAR,ISSUE

Michael> you will get the rows
Michael>   YEAR    ISSUE
Michael>   1999    29
Michael>   1999    29
Michael>   1999    30
Michael>   1999    30
Michael>   1999    31
Michael>   1999    31
Michael>   1999    32
Michael>   1999    32
Michael>   1999    33
Michael>   1999    33
Michael>   1999    34
Michael>   1999    34
Michael>   1999    35
Michael>   1999    35

Michael> while the similar query
Michael>   select * from ISSUES
Michael>   where PAPER_ID=3 and (YEAR>1999 or (YEAR=1999 and ISSUE>28))
Michael>   order by YEAR,ISSUE

Michael> works  fine.  It  looks like if there are fields in the query that are
Michael> not  in  primary  key,  then output's good, and when key fields only -
Michael> result's wrong.

<cut>

Hi!

This was a similar case than the range optimizer bug for which I
posted a patch tonight.  Here is the same patch + a small fix to also
fix this problem.

*** /my/monty/master/mysql-3.22.24/sql/opt_range.cc	Tue Oct 27 04:04:01 1998
--- ./opt_range.cc	Thu Jul 29 11:31:07 1999
***************
*** 30,36 ****
  
  static int sel_cmp(Field *f,char *a,char *b,uint8 a_flag,uint8 b_flag);
  
! static const uint NO_MIN_RANGE=1,NO_MAX_RANGE=2,NEAR_MIN=4,NEAR_MAX=8;
  
  class SEL_ARG :public Sql_alloc
  {
--- 30,37 ----
  
  static int sel_cmp(Field *f,char *a,char *b,uint8 a_flag,uint8 b_flag);
  
! static const uint NO_MIN_RANGE=1,NO_MAX_RANGE=2,NEAR_MIN=4,NEAR_MAX=8,
!   GET_MIN_MAX_KEY=16;
  
  class SEL_ARG :public Sql_alloc
  {
***************
*** 170,175 ****
--- 171,203 ----
        (*max_key)+= length;
      }
    }
+ 
+   void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag)
+   {
+     SEL_ARG *key_tree= first();
+     key_tree->store(key[key_tree->part].length,
+ 		    range_key,*range_key_flag,range_key,NO_MAX_RANGE);
+     *range_key_flag|= key_tree->min_flag;
+     if (key_tree->next_key_part &&
+ 	key_tree->next_key_part->part == key_tree->part+1 &&
+ 	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) &&
+ 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+       key_tree->next_key_part->store_min_key(key,range_key, range_key_flag);
+   }
+ 
+   void store_max_key(KEY_PART *key,char **range_key, uint *range_key_flag)
+   {
+     SEL_ARG *key_tree= last();
+     key_tree->store(key[key_tree->part].length,
+ 		    range_key, NO_MIN_RANGE, range_key,*range_key_flag);
+     (*range_key_flag)|= key_tree->max_flag;
+     if (key_tree->next_key_part &&
+ 	key_tree->next_key_part->part == key_tree->part+1 &&
+ 	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)) &&
+ 	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+       key_tree->next_key_part->store_max_key(key,range_key, range_key_flag);
+   }
+ 
    SEL_ARG *insert(SEL_ARG *key);
    SEL_ARG *tree_delete(SEL_ARG *key);
    SEL_ARG *find_range(SEL_ARG *key);
***************
*** 180,185 ****
--- 208,214 ----
    void test_use_count(SEL_ARG *root);
  #endif
    SEL_ARG *first();
+   SEL_ARG *last();
    void make_root();
    inline bool simple_key()
    {
***************
*** 263,269 ****
  static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
  static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
  static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
! static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,uint index,
  			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
  			   char *max_key,uint max_key_flag);
  static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
--- 292,298 ----
  static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
  static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
  static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
! static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
  			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
  			   char *max_key,uint max_key_flag);
  static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
***************
*** 398,403 ****
--- 427,441 ----
    return next_arg;
  }
  
+ SEL_ARG *SEL_ARG::last()
+ {
+   SEL_ARG *next_arg=this;
+   if (!next_arg->right)
+     return 0;					// MAYBE_KEY
+   while (next_arg->right != &null_element)
+     next_arg=next_arg->right;
+   return next_arg;
+ }
  
  /*
    Check if a compare is ok, when one takes ranges in account
***************
*** 1891,1896 ****
--- 1931,1937 ----
  		 uint max_key_flag)
  {
    ha_rows records=0,tmp;
+ 
    if (key_tree->left != &null_element)
    {
      records=check_quick_keys(param,index,key_tree->left,min_key,min_key_flag,
***************
*** 1898,1946 ****
      if (records == HA_POS_ERROR)			// Impossible
        return records;
    }
    if (key_tree->next_key_part &&
        key_tree->next_key_part->part == key_tree->part+1 &&
-       !key_tree->min_flag &&
        key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
    {						// const key as prefix
!     char *tmp_min_key=min_key,*tmp_max_key=max_key;
!     key_tree->store(param->key[index][key_tree->part].length,
! 		    &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
!     tmp=check_quick_keys(param,index,key_tree->next_key_part,
! 			 tmp_min_key, min_key_flag | key_tree->min_flag,
! 			 tmp_max_key, max_key_flag | key_tree->max_flag);
    }
    else
    {
!     char *tmp_min_key=min_key,*tmp_max_key=max_key;
!     key_tree->store(param->key[index][key_tree->part].length,
! 		    &tmp_min_key,min_key_flag, &tmp_max_key,max_key_flag);
! 
!     uint keynr=param->real_keynr[index];
!     uint min_key_length= (uint) (tmp_min_key- param->min_key);
!     uint max_key_length= (uint) (tmp_max_key- param->max_key);
!     if (!key_tree->min_flag && ! key_tree->max_flag &&
! 	! min_key_flag && ! max_key_flag &&
! 	(uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
! 	!param->table->key_info[keynr].dupp_key &&
! 	min_key_length == (uint) (tmp_max_key- param->max_key) &&
! 	!memcmp(param->min_key,param->max_key,min_key_length))
!       tmp=1;					// Max one record
!     else
        tmp=ni_records_in_range((N_INFO*) param->table->file,
  			      (int) keynr,
  			      (byte*) (!min_key_length ? NullS :
  				       param->min_key),
  			      min_key_length,
! 			      ((key_tree->min_flag | min_key_flag) & NEAR_MIN ?
  			       HA_READ_AFTER_KEY : HA_READ_KEY_EXACT),
  			      (byte*) (!max_key_length ? NullS :
  				       param->max_key),
  			      max_key_length,
! 			      ((key_tree->max_flag | max_key_flag) & NEAR_MAX ?
  			       HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY));
!   }
!   if (tmp == HA_POS_ERROR)			// Impossible
      return tmp;
    records+=tmp;
    if (key_tree->right != &null_element)
--- 1939,2004 ----
      if (records == HA_POS_ERROR)			// Impossible
        return records;
    }
+ 
+   uint tmp_min_flag,tmp_max_flag,keynr;
+   char *tmp_min_key=min_key,*tmp_max_key=max_key;
+ 
+   key_tree->store(param->key[index][key_tree->part].length,
+ 		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
+   uint min_key_length= (uint) (tmp_min_key- param->min_key);
+   uint max_key_length= (uint) (tmp_max_key- param->max_key);
+ 
    if (key_tree->next_key_part &&
        key_tree->next_key_part->part == key_tree->part+1 &&
        key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
    {						// const key as prefix
!     if (min_key_length == max_key_length &&
! 	!memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
! 	!key_tree->min_flag && !key_tree->max_flag)
!     {      
!       tmp=check_quick_keys(param,index,key_tree->next_key_part,
! 			   tmp_min_key, min_key_flag | key_tree->min_flag,
! 			   tmp_max_key, max_key_flag | key_tree->max_flag);
!       goto end;					// Ugly, but efficient
!     }
!     tmp_min_flag=key_tree->min_flag;
!     tmp_max_flag=key_tree->max_flag;
!     key_tree->next_key_part->store_min_key(param->key[index], &tmp_min_key,
! 					   &tmp_min_flag);
!     key_tree->next_key_part->store_max_key(param->key[index], &tmp_max_key,
! 					   &tmp_max_flag);
!     min_key_length= (uint) (tmp_min_key- param->min_key);
!     max_key_length= (uint) (tmp_max_key- param->max_key);
    }
    else
    {
!     tmp_min_flag=min_key_flag | key_tree->min_flag;
!     tmp_max_flag=max_key_flag | key_tree->max_flag;
!   }
!     
!   keynr=param->real_keynr[index];
!   if (!tmp_min_flag && ! tmp_max_flag &&
!       (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts
&&
!       !param->table->key_info[keynr].dupp_key &&
!       min_key_length == max_key_length &&
!       !memcmp(param->min_key,param->max_key,min_key_length))
!     tmp=1;					// Max one record
!   else
        tmp=ni_records_in_range((N_INFO*) param->table->file,
  			      (int) keynr,
  			      (byte*) (!min_key_length ? NullS :
  				       param->min_key),
  			      min_key_length,
! 			      (tmp_min_flag & NEAR_MIN ?
  			       HA_READ_AFTER_KEY : HA_READ_KEY_EXACT),
  			      (byte*) (!max_key_length ? NullS :
  				       param->max_key),
  			      max_key_length,
! 			      (tmp_max_flag & NEAR_MAX ?
  			       HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY));
! 
!  end:
!   if (tmp == HA_POS_ERROR)			// Impossible range
      return tmp;
    records+=tmp;
    if (key_tree->right != &null_element)
***************
*** 1967,1973 ****
    DBUG_ENTER("get_quick_select");
    if ((quick=new QUICK_SELECT(param->table,param->real_keynr[index])))
    {
!     if (get_quick_keys(param,quick,index,key_tree,param->min_key,0,
  		       param->max_key,0))
      {
        delete quick;
--- 2025,2031 ----
    DBUG_ENTER("get_quick_select");
    if ((quick=new QUICK_SELECT(param->table,param->real_keynr[index])))
    {
!     if (get_quick_keys(param,quick,param->key[index],key_tree,param->min_key,0,
  		       param->max_key,0))
      {
        delete quick;
***************
*** 1990,2049 ****
  */
  
  static bool
! get_quick_keys(PARAM *param,QUICK_SELECT *quick,uint index,
  	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
  	       char *max_key, uint max_key_flag)
  {
    if (key_tree->left != &null_element)
    {
!     if (get_quick_keys(param,quick,index,key_tree->left,
  		       min_key,min_key_flag, max_key, max_key_flag))
        return 1;
    }
    if (key_tree->next_key_part &&
        key_tree->next_key_part->part == key_tree->part+1 &&
-       !key_tree->min_flag &&
        key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
    {						  // const key as prefix
!     char *tmp_min_key=min_key,*tmp_max_key=max_key;
!     key_tree->store(param->key[index][key_tree->part].length,
! 		    &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
!     if (get_quick_keys(param,quick,index,key_tree->next_key_part,
! 		       tmp_min_key,min_key_flag | key_tree->min_flag,
  			 tmp_max_key, max_key_flag | key_tree->max_flag))
!       return 1;
    }
    else
!   {
!     char *tmp_min_key=min_key,*tmp_max_key=max_key;
!     key_tree->store(param->key[index][key_tree->part].length,
! 		    &tmp_min_key,min_key_flag, &tmp_max_key, max_key_flag);
!     uint flag=key_tree->min_flag | key_tree->max_flag;
!     if (tmp_min_key != param->min_key)
!       flag&= ~NO_MIN_RANGE;
!     else
!       flag|= NO_MIN_RANGE;
!     if (tmp_max_key != param->max_key)
!       flag&= ~NO_MAX_RANGE;
!     else
!       flag|= NO_MAX_RANGE;
!     QUICK_RANGE *range= new QUICK_RANGE(param->min_key,
! 					(uint) (tmp_min_key - param->min_key),
! 					param->max_key,
! 					(uint) (tmp_max_key - param->max_key),
! 					flag);
!     if (!range)					// Not enough memory
!       return 1;
!     quick->ranges.push_back(range);
!   }
    if (key_tree->right != &null_element)
!     return get_quick_keys(param,quick,index,key_tree->right,
  			  min_key,min_key_flag,
  			  max_key,max_key_flag);
    return 0;
  }
  
- 
  	/* get next possible record using quick-struct */
  
  int QUICK_SELECT::get_next()
--- 2048,2119 ----
  */
  
  static bool
! get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
  	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
  	       char *max_key, uint max_key_flag)
  {
+   QUICK_RANGE *range;
+   uint flag;
+ 
    if (key_tree->left != &null_element)
    {
!     if (get_quick_keys(param,quick,key,key_tree->left,
  		       min_key,min_key_flag, max_key, max_key_flag))
        return 1;
    }
+   char *tmp_min_key=min_key,*tmp_max_key=max_key;
+   key_tree->store(key[key_tree->part].length,
+ 		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
+ 
    if (key_tree->next_key_part &&
        key_tree->next_key_part->part == key_tree->part+1 &&
        key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
    {						  // const key as prefix
!     if (!((tmp_min_key - min_key) != (tmp_max_key - max_key) ||
! 	  memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) ||
! 	  key_tree->min_flag || key_tree->max_flag))
!     {      
!       if (get_quick_keys(param,quick,key,key_tree->next_key_part,
! 			 tmp_min_key, min_key_flag | key_tree->min_flag,
  			 tmp_max_key, max_key_flag | key_tree->max_flag))
! 	return 1;
!       goto end;					// Ugly, but efficient
!     }
!     {
!       uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
!       key_tree->next_key_part->store_min_key(key, &tmp_min_key,
&tmp_min_flag);
!       key_tree->next_key_part->store_max_key(key, &tmp_max_key,
&tmp_max_flag);
!       flag=tmp_min_flag | tmp_max_flag;
!     }
    }
    else
!     flag=key_tree->min_flag | key_tree->max_flag;
! 
!   if (tmp_min_key != param->min_key)
!     flag&= ~NO_MIN_RANGE;
!   else
!     flag|= NO_MIN_RANGE;
!   if (tmp_max_key != param->max_key)
!     flag&= ~NO_MAX_RANGE;
!   else
!     flag|= NO_MAX_RANGE;
!   range= new QUICK_RANGE(param->min_key,
! 			 (uint) (tmp_min_key - param->min_key),
! 			 param->max_key,
! 			 (uint) (tmp_max_key - param->max_key),
! 			 flag);
!   if (!range)					// Not enough memory
!     return 1;
!   quick->ranges.push_back(range);
! 
!  end:
    if (key_tree->right != &null_element)
!     return get_quick_keys(param,quick,key,key_tree->right,
  			  min_key,min_key_flag,
  			  max_key,max_key_flag);
    return 0;
  }
  
  	/* get next possible record using quick-struct */
  
  int QUICK_SELECT::get_next()
***************
*** 2076,2082 ****
--- 2146,2155 ----
  		range->min_length,
  		(!(range->flag & NEAR_MIN) ? HA_READ_KEY_OR_NEXT :
  		 HA_READ_AFTER_KEY)))
+     {
+       next=0;					// Not found, to next range
        continue;
+     }
      if (cmp_next(range) == 0)
        DBUG_RETURN(0);				// Found key is in range
      next=0;					// To next range

Regards,
Monty
Thread
'select' duplicates rows!!!Michael V. Samanov14 Jul
  • 'select' duplicates rows!!!sinisa14 Jul
    • Re: 'select' duplicates rows!!!Benjamin Pflugmann14 Jul
    • Re: 'select' duplicates rows!!!Pete Harlan17 Jul
  • 'select' duplicates rows!!!Michael Widenius29 Jul