From: Tor Didriksen Date: June 1 2011 2:18pm Subject: Re: review for BUG#12590211 - ADD BOUNDS CHECK ON REF_POINTER_ARRAY AND FRIENDS List-Archive: http://lists.mysql.com/commits/138558 Message-Id: <4DE64A1B.5020003@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit On 2011-06-01 15:36, Guilhem Bichot wrote: > Hello Tor, > > Tor Didriksen a écrit, Le 30.05.2011 13:01: > > Here is the review of the latest patch for trunk: > http://lists.mysql.com/commits/138414 > > sql_base.h, sql_do.cc, sql_update.cc, item_subselect.cc need a > copyright 2011. will do > >> === modified file 'sql/sql_base.cc' >> --- sql/sql_base.cc 2011-05-26 15:20:09 +0000 >> +++ sql/sql_base.cc 2011-06-01 09:07:59 +0000 >> @@ -8085,10 +8088,13 @@ >> DBUG_RETURN(TRUE); /* purecov: inspected */ >> } >> if (ref) >> - *(ref++)= item; >> + { >> + ref[0]= item; >> + ref.pop_front(); >> + } >> if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM && >> sum_func_list) >> - item->split_sum_func(thd, ref_pointer_array, *sum_func_list); >> + item->split_sum_func(thd, ref_pointer_array.array(), >> *sum_func_list); > > here, our checked array becomes a non-checked array passed to > split_sum_func(). But inside split_sum_func(), we may come to > Item::split_sum_func2() which does: > ref_pointer_array[el]=...; > which would be worth some bound-checking. So I would change the > signature of all split_sum_func* functions, to take a bound-checked > array. Yes, I deliberately left that issue open to the reviewers: whether to propagate this to the Item hierarchy, or not. I'm fine with changing the split_sum functions as well. > >> === modified file 'sql/sql_lex.cc' >> --- sql/sql_lex.cc 2011-05-26 15:20:09 +0000 >> +++ sql/sql_lex.cc 2011-06-01 09:07:59 +0000 >> @@ -1732,6 +1732,8 @@ >> >> void st_select_lex::init_query() >> { >> + DBUG_ENTER("st_select_lex::init_query"); >> + DBUG_PRINT("info", ("this %p", this)); >> st_select_lex_node::init_query(); >> table_list.empty(); >> top_join_list.empty(); >> @@ -1757,7 +1759,7 @@ >> parent_lex->push_context(&context); >> cond_count= between_count= with_wild= 0; >> max_equal_elems= 0; >> - ref_pointer_array= 0; >> + ref_pointer_array.reset(); >> select_n_where_fields= 0; >> select_n_having_items= 0; >> subquery_in_having= explicit_limit= 0; >> @@ -1769,10 +1771,13 @@ >> exclude_from_table_unique_test= no_wrap_view_item= FALSE; >> nest_level= 0; >> link_next= 0; >> + DBUG_VOID_RETURN; >> } >> >> void st_select_lex::init_select() >> { >> + DBUG_ENTER("st_select_lex::init_select"); >> + DBUG_PRINT("info", ("this %p", this)); >> st_select_lex_node::init_select(); >> sj_nests.empty(); >> group_list.empty(); >> @@ -1800,6 +1805,7 @@ >> cond_value= having_value= Item::COND_UNDEF; >> inner_refs_list.empty(); >> full_group_by_flag= 0; >> + DBUG_VOID_RETURN; > > For the DBUG_ statements in sql_lex.cc init functions: keep them if you > think they are useful - I'm not sure they are. I left a couple of extra DBUG things which should go away, yes. > >> @@ -2132,20 +2138,38 @@ >> >> bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num) >> { >> +#ifdef DBUG_OFF >> if (ref_pointer_array) >> return 0; >> +#endif >> + >> + DBUG_ENTER("st_select_lex::setup_ref_array"); >> >> /* >> We have to create array in prepared statement memory if it is >> prepared statement >> */ >> Query_arena *arena= thd->stmt_arena; >> - return (ref_pointer_array= >> - (Item **)arena->alloc(sizeof(Item*) * (n_child_sum_items + >> - item_list.elements + >> - >> select_n_having_items + >> - >> select_n_where_fields + >> - >> order_group_num)*5)) == 0; >> + uint n_elems= (n_child_sum_items + >> + item_list.elements + >> + select_n_having_items + >> + select_n_where_fields + >> + order_group_num) * 5; >> + DBUG_PRINT("info", ("setup_ref_array %4u : %4u %4u %4u %4u %4u", >> + n_elems, >> + n_child_sum_items, >> + item_list.elements, >> + select_n_having_items, >> + select_n_where_fields, >> + order_group_num)); >> + if (ref_pointer_array) { >> + DBUG_ASSERT(ref_pointer_array.size() == n_elems); >> + DBUG_RETURN(0); > > '{' should be on its own line > Do we want to check ==, or only >= ? '{' -- yes of course we want == check > >> + } >> + Item **array= (Item **)arena->alloc(sizeof(Item*) * n_elems); > > or a C++ cast indeed > >> + ref_pointer_array= Ref_ptr_array(array, n_elems); > > Does ref_pointer_array really have n_elems elements? I'd rather say > that it has n_elems/5, and the other elements belong to items0, items1 > etc. > In ordinary code (I'm excluding the code which sets items0 by a > calculation based on ref_pointer_array), I think that doing > ref_pointer_array[something greater than n_elems/5] > is a bug and should fire the assertion. > In other words: I believe that the 5-times allocation is just a quick > way to allocate 5 arrays in one go; there is no intention that > ref_pointer_array should span on items0, items1 (which it does now, > because it is said to contain n_elems). > You would have stricter bound-checking this way. > But maybe I missed something about the usage of ref_pointer_array. The entire array needs to know that it contains n_elems, so that we can do items2= Ref_ptr_array(&ref_pointer_array[3 * all_fields.elements], all_fields.elements); without asserting. but you are right. I will see if I can find a better way of maintaining the distinction between the entire array, and the first slice (which does not have a name) > >> + >> + DBUG_RETURN(array == NULL); >> } >> >> >> >> === modified file 'sql/sql_lex.h' >> --- sql/sql_lex.h 2011-05-12 17:29:19 +0000 >> +++ sql/sql_lex.h 2011-06-01 09:07:59 +0000 >> @@ -655,8 +657,9 @@ >> SQL_I_List order_list; /* ORDER clause */ >> SQL_I_List *gorder_list; >> Item *select_limit, *offset_limit; /* LIMIT clause parameters */ >> - // Arrays of pointers to top elements of all_fields list >> - Item **ref_pointer_array; >> + >> + // Array of pointers to top elements of all_fields list > > can change // to /// for doxygen will do > >> + Ref_ptr_array ref_pointer_array; >> >> /* >> number of items in select_list and HAVING clause used to get number > >> === modified file 'sql/sql_select.cc' >> --- sql/sql_select.cc 2011-05-26 15:20:09 +0000 >> +++ sql/sql_select.cc 2011-06-01 09:07:59 +0000 >> @@ -600,7 +600,7 @@ >> 0 on success >> */ >> int >> -JOIN::prepare(Item ***rref_pointer_array, >> +JOIN::prepare(Ref_ptr_array *rref_pointer_array, >> TABLE_LIST *tables_init, >> uint wild_num, Item *conds_init, uint og_num, >> ORDER *order_init, ORDER *group_init, >> @@ -647,15 +647,27 @@ >> table_ptr= table_ptr->next_leaf) >> tables++; >> >> - if (setup_wild(thd, tables_list, fields_list, &all_fields, >> wild_num) || >> - select_lex->setup_ref_array(thd, og_num) || >> - setup_fields(thd, (*rref_pointer_array), fields_list, >> MARK_COLUMNS_READ, >> - &all_fields, 1) || >> - setup_without_group(thd, (*rref_pointer_array), tables_list, >> + if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num)) >> + DBUG_RETURN(-1); >> + // find_order_in_list() may need some extra space, so multiply >> og_num by two. >> + if (select_lex->setup_ref_array(thd, og_num * 2)) >> + DBUG_RETURN(-1); >> + /* >> + Item and Item_field CTORs will both increment some counters >> + in current_select, based on the current parsing context. >> + We are not parsing anymore: any new Items created now are due to >> + query rewriting, so stop incrementing counters. >> + */ >> + select_lex->parsing_place= NO_MATTER; > > this NO_MATTER looks unrelated to the bug? See the comment. For this query: prepare stmt from 'show databases'; execute stmt; execute stmt; we will continue to increment counters in some items, and it seems that the array has to grow for each execute. There are also other cases where 'parsing_place' is left in some random state, and we will continue to increment counters. > >> + if (setup_fields(thd, *rref_pointer_array, fields_list, >> MARK_COLUMNS_READ, >> + &all_fields, 1)) >> + DBUG_RETURN(-1); >> + if (setup_without_group(thd, (*rref_pointer_array), tables_list, > > () around (*rref_pointer_array) is not needed. OK > >> select_lex->leaf_tables, fields_list, >> all_fields, &conds, order, group_list, >> &hidden_group_fields)) >> - DBUG_RETURN(-1); /* purecov: inspected */ >> + DBUG_RETURN(-1); > > We have strange code (it's not due to your changes): we set up > select_lex::ref_pointer_array here: > > > + // find_order_in_list() may need some extra space, so multiply > og_num by two. > > + if (select_lex->setup_ref_array(thd, og_num * 2)) > > which sets it up large enough for the find_order_in_list() call. > > But find_order_in_list() (called by setup_without_group()) operates on > the rref_pointer_array passed in argument to JOIN::prepare(). > So "select_lex->setup_ref_array(thd, og_num * 2)" is ok only because > select_lex->ref_pointer_array and rref_pointer_array are the same. I'd > add an assertion to check this equality. OR, even better, I'd delete > the rref_pointer_array parameter of JOIN::prepare(), replacing it with > select_lex->ref_pointer_array inside the code of JOIN::prepare(). I > checked all calls to JOIN::prepare() and they are always passing > select_lex->ref_pointer_array as "rref_pointer_array" argument. You > could also do the same parameter simplification for mysql_select() > (eliminating rref_pointer_array again). I vote for this, because it > eliminates one possibility for out-of-sync parameters. Thanks for the tip, I will investigate. > >> ref_pointer_array= *rref_pointer_array; > >> @@ -3040,7 +3051,8 @@ >> curr_join->all_fields= *curr_all_fields; >> if (!items1) >> { >> - items1= items0 + all_fields.elements; >> + items1= Ref_ptr_array(&ref_pointer_array[2 * >> all_fields.elements], >> + all_fields.elements); > > All around code, we have the assumption that ref_pointer_array is made > of 5 chunks, each fitting all_fields.element at least. > But this is not how ref_pointer_array was allocated; it's first > allocated with that number of elements: > uint n_elems= (n_child_sum_items + > item_list.elements + > select_n_having_items + > select_n_where_fields + > order_group_num) * 5; > and then it's expanded (I see that find_order_in_list() expands it and > fortunately expands all_fields as well). > I would have a check somewhere, that all_fields.elements is not more > than one fifth of the size of the array. makes sense > >> if (sort_and_group || curr_tmp_table->group || >> tmp_table_param.precomputed_group_by) >> { > >> @@ -22587,15 +22602,20 @@ >> */ >> tmp_table_param.group_parts= send_group_parts; >> >> - if (!(rollup.null_items= (Item_null_result**) >> thd->alloc((sizeof(Item*) + >> - sizeof(Item**) + >> - sizeof(List) + >> - ref_pointer_array_size) >> - * send_group_parts ))) >> - return 1; >> - - rollup.fields= (List*) (rollup.null_items + >> send_group_parts); >> - rollup.ref_pointer_arrays= (Item***) (rollup.fields + >> send_group_parts); >> + Item_null_result **null_items= >> + >> static_cast(thd->alloc(sizeof(Item*)*send_group_parts)); >> + >> + rollup.null_items=Item_null_array(null_items, send_group_parts); >> + rollup.ref_pointer_arrays= >> + static_cast >> + (thd->alloc((sizeof(Ref_ptr_array) + >> + all_fields.elements * sizeof(Item*)) * >> send_group_parts)); >> + rollup.fields= >> + static_cast*>(thd->alloc(sizeof(List) * >> send_group_parts)); >> + >> + if (!rollup.null_items || !rollup.ref_pointer_arrays || >> !rollup.fields) >> + return true; >> + > > Ok with using Item_null_array, but why not keep one single > thd->alloc() call and a single test-for-NULL-and-return-true? To me it > does not make code more complicated. > Or maybe you did this to solve an alignment problem? well, I find the new code much more readable. > >> ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts); >> >> /* > thanks! -- didrik