From: Ole John Aske Date: May 4 2011 11:49am Subject: bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch (ole.john.aske:3488 to 3489) List-Archive: http://lists.mysql.com/commits/136642 Message-Id: <20110504114910.42C24222@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3489 Ole John Aske 2011-05-04 [merge] merge telco-7.0 -> spj-scan-scan modified: storage/ndb/include/kernel/signaldata/QueryTree.hpp storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp storage/ndb/src/ndbapi/NdbQueryBuilder.cpp storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp storage/ndb/src/ndbapi/Ndbinit.cpp storage/ndb/src/ndbapi/ObjectMap.cpp storage/ndb/src/ndbapi/ObjectMap.hpp storage/ndb/src/ndbapi/ndberror.c 3488 jonas oreland 2011-05-04 [merge] ndb - merge 70 to 70-spj removed: mysql-test/suite/rpl_ndb/combinations renamed: mysql-test/suite/rpl_ndb/r/rpl_ndb_extraCol.result => mysql-test/suite/rpl_ndb/r/rpl_ndb_extra_col_slave.result mysql-test/suite/rpl_ndb/r/rpl_ndb_extraColMaster.result => mysql-test/suite/rpl_ndb/r/rpl_ndb_extra_col_master.result mysql-test/suite/rpl_ndb/t/rpl_ndb_extraCol.test => mysql-test/suite/rpl_ndb/t/rpl_ndb_extra_col_slave.test mysql-test/suite/rpl_ndb/t/rpl_ndb_extraColMaster.test => mysql-test/suite/rpl_ndb/t/rpl_ndb_extra_col_master.test modified: mysql-test/mysql-test-run.pl mysql-test/suite/rpl_ndb/r/rpl_ndb_slave_lsu.result mysql-test/suite/rpl_ndb/r/rpl_ndb_slave_lsu_anyval.result mysql-test/suite/rpl_ndb/r/rpl_ndb_ui2.result mysql-test/suite/rpl_ndb/t/rpl_ndb_insert_ignore.test mysql-test/suite/rpl_ndb/t/rpl_ndb_multi_binlog_update.cnf mysql-test/suite/rpl_ndb/t/rpl_ndb_multi_binlog_update.inc mysql-test/suite/rpl_ndb/t/rpl_ndb_slave_lsu.test mysql-test/suite/rpl_ndb/t/rpl_ndb_ui2.test sql/ha_ndbcluster.cc sql/ha_ndbcluster.h storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp storage/ndb/test/run-test/daily-basic-tests.txt mysql-test/suite/rpl_ndb/t/rpl_ndb_extra_col_slave.test === modified file 'storage/ndb/include/kernel/signaldata/QueryTree.hpp' --- a/storage/ndb/include/kernel/signaldata/QueryTree.hpp 2011-02-23 19:28:26 +0000 +++ b/storage/ndb/include/kernel/signaldata/QueryTree.hpp 2011-05-04 11:45:33 +0000 @@ -99,6 +99,17 @@ struct DABits */ NI_LINKED_DISK = 0x100, + /** + * If REPEAT_SCAN_RESULT is set, multiple star-joined (or bushy, or X) + * scan results are handled by repeating the other scans result + * when we advance to the next batch chunk for the current 'active' + * result set. + * This removes the requirement for the API client to being able + * buffer an (possible huge) amount of scan result relating to + * the same parent scan. + */ + NI_REPEAT_SCAN_RESULT = 0x200, + NI_END = 0 }; === modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp' --- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-02-23 19:28:26 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-05-04 11:45:33 +0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "../dbtup/tuppage.hpp" @@ -104,6 +105,7 @@ public: typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_dependency_map; typedef DataBuffer2<14, LocalArenaPoolImpl> PatternStore; typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_pattern_store; + typedef Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> TreeNodeBitMask; struct RowRef { @@ -296,6 +298,8 @@ public: Signal* m_start_signal; // Argument to first node in tree SegmentedSectionPtr m_keyPtr; + TreeNodeBitMask m_scans; // TreeNodes doing scans + // Used for resolving dependencies Ptr m_node_list[NDB_SPJ_MAX_TREE_NODES]; }; @@ -415,6 +419,18 @@ public: void (Dbspj::*m_parent_batch_complete)(Signal*,Ptr,Ptr); /** + * This function is called on the *child* by the *parent* when this + * child should prepare to resend results related to parents current batch + */ + void (Dbspj::*m_parent_batch_repeat)(Signal*,Ptr,Ptr); + + /** + * This function is called on the *child* by the *parent* when + * child should release buffers related to parents current batch + */ + void (Dbspj::*m_parent_batch_cleanup)(Ptr,Ptr); + + /** * This function is called when getting a SCAN_NEXTREQ */ void (Dbspj::*m_execSCAN_NEXTREQ)(Signal*, Ptr,Ptr); @@ -441,7 +457,7 @@ public: * should only do local cleanup(s) */ void (Dbspj::*m_cleanup)(Ptr, Ptr); - }; + }; //struct OpInfo struct LookupData { @@ -520,6 +536,7 @@ public: Uint16 m_frags_outstanding; Uint32 m_rows_received; // #execTRANSID_AI Uint32 m_rows_expecting; // Sum(ScanFragConf) + Uint32 m_batch_chunks; // #SCAN_FRAGREQ + #SCAN_NEXTREQ to retrieve batch Uint32 m_scanCookie; Uint32 m_fragCount; ScanFragHandle_list::HeadPOD m_fragments; // ScanFrag states @@ -547,7 +564,8 @@ public: TreeNode() : m_magic(MAGIC), m_state(TN_END), - m_parentPtrI(RNIL), m_requestPtrI(0) + m_parentPtrI(RNIL), m_requestPtrI(0), + m_ancestors() { } @@ -555,6 +573,7 @@ public: : m_magic(MAGIC), m_info(0), m_bits(T_LEAF), m_state(TN_BUILDING), m_parentPtrI(RNIL), m_requestPtrI(request), + m_ancestors(), nextList(RNIL), prevList(RNIL) { // m_send.m_ref = 0; @@ -658,7 +677,7 @@ public: T_REPORT_BATCH_COMPLETE = 0x200, /** - * Do I need to know when parent batch is cimpleted + * Do I need to know when parent batch is completed */ T_NEED_REPORT_BATCH_COMPLETED = 0x400, @@ -677,6 +696,11 @@ public: */ T_SCAN_PARALLEL = 0x2000, + /** + * Possible requesting resultset for this index scan to be repeated + */ + T_SCAN_REPEATABLE = 0x4000, + // End marker... T_END = 0 }; @@ -689,6 +713,7 @@ public: Uint32 m_batch_size; Uint32 m_parentPtrI; const Uint32 m_requestPtrI; + TreeNodeBitMask m_ancestors; Dependency_map::Head m_dependent_nodes; PatternStore::Head m_keyPattern; PatternStore::Head m_attrParamPattern; @@ -725,7 +750,7 @@ public: Uint32 nextPool; }; Uint32 prevList; - }; + }; //struct TreeNode static const Ptr NullTreeNodePtr; @@ -745,12 +770,13 @@ public: { enum RequestBits { - RT_SCAN = 0x1 // unbounded result set, scan interface - ,RT_ROW_BUFFERS = 0x2 // Do any of the node use row-buffering - ,RT_MULTI_SCAN = 0x4 // Is there several scans in request - ,RT_VAR_ALLOC = 0x8 // Is var-allocation used for row-buffer - ,RT_NEED_PREPARE = 0x10 // Does any node need m_prepare hook - ,RT_NEED_COMPLETE = 0x20 // Does any node need m_complete hook + RT_SCAN = 0x1 // unbounded result set, scan interface + ,RT_ROW_BUFFERS = 0x2 // Do any of the node use row-buffering + ,RT_MULTI_SCAN = 0x4 // Is there several scans in request + ,RT_VAR_ALLOC = 0x8 // Is var-allocation used for row-buffer + ,RT_NEED_PREPARE = 0x10 // Does any node need m_prepare hook + ,RT_NEED_COMPLETE = 0x20 // Does any node need m_complete hook + ,RT_REPEAT_SCAN_RESULT = 0x40 // Repeat bushy scan result when required }; enum RequestState @@ -765,7 +791,7 @@ public: RS_ABORTED = 0x2008, // Aborted and waiting for SCAN_NEXTREQ RS_END = 0 - }; + }; //struct Request Request() {} Request(const ArenaHead & arena) : m_arena(arena) {} @@ -781,7 +807,8 @@ public: TreeNode_list::Head m_nodes; TreeNodeCursor_list::Head m_cursor_nodes; Uint32 m_cnt_active; // No of "running" nodes - Bitmask<1> m_active_nodes; // Nodes which will return more data + TreeNodeBitMask + m_active_nodes; // Nodes which will return more data in NEXTREQ Uint32 m_rows; // Rows accumulated in current batch Uint32 m_outstanding; // Outstanding signals, when 0, batch is done Uint16 m_lookup_node_data[MAX_NDB_NODES]; @@ -976,6 +1003,7 @@ private: void start(Signal*, Ptr); void checkBatchComplete(Signal*, Ptr, Uint32 cnt); void batchComplete(Signal*, Ptr); + void prepareNextBatch(Signal*, Ptr); void sendConf(Signal*, Ptr, bool is_complete); void complete(Signal*, Ptr); void cleanup(Ptr); @@ -988,12 +1016,11 @@ private: void releaseRequestBuffers(Ptr requestPtr, bool reset); void releaseNodeRows(Ptr requestPtr, Ptr); void releaseRow(Ptr, RowRef ref); - Uint32 releaseScanBuffers(Ptr requestPtr, Ptr); - void registerCursor(Ptr, Ptr); + void registerActiveCursor(Ptr, Ptr); void nodeFail_checkRequests(Signal*); + void cleanupChildBranch(Ptr, Ptr); void cleanup_common(Ptr, Ptr); - void mark_active(Ptr, Ptr, bool value); /** * Row buffering @@ -1141,13 +1168,17 @@ private: Uint32 scanIndex_findFrag(Local_ScanFragHandle_list &, Ptr&, Uint32 fragId); void scanIndex_parent_batch_complete(Signal*, Ptr, Ptr); + void scanIndex_parent_batch_repeat(Signal*, Ptr, Ptr); void scanIndex_execSCAN_NEXTREQ(Signal*, Ptr,Ptr); void scanIndex_complete(Signal*, Ptr, Ptr); void scanIndex_abort(Signal*, Ptr, Ptr); Uint32 scanIndex_execNODE_FAILREP(Signal*signal, Ptr, Ptr, NdbNodeBitmask); + void scanIndex_parent_batch_cleanup(Ptr, Ptr); void scanIndex_cleanup(Ptr, Ptr); + void scanIndex_release_rangekeys(Ptr, Ptr); + /** * Page manager */ === modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp' --- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-04-29 09:11:12 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-05-04 11:45:33 +0000 @@ -973,19 +973,6 @@ Dbspj::build(Build_context& ctx, jam(); requestPtr.p->m_bits |= Request::RT_VAR_ALLOC; } - - { - /** - * If multi scan, then cursors are determined when one batch is complete - * hence clear list here... - * But if it's single scan...the list will already contain the - * only scan in the tree - */ - Local_TreeNodeCursor_list list(m_treenode_pool, - requestPtr.p->m_cursor_nodes); - ndbassert(list.noOfElements() > 1); - list.remove(); - } } return 0; @@ -1119,6 +1106,8 @@ Dbspj::batchComplete(Signal* signal, Ptr { ndbassert(is_complete); } + + prepareNextBatch(signal, requestPtr); sendConf(signal, requestPtr, is_complete); } else if (is_complete && need_complete_phase) @@ -1158,6 +1147,132 @@ Dbspj::batchComplete(Signal* signal, Ptr } } +/** + * Locate next TreeNode(s) to retrieve more rows from. + * + * Calcule set of 'm_active_nodes' we will receive from in NEXTREQ. + * Add these TreeNodes to the cursor list to be iterated. + */ +void +Dbspj::prepareNextBatch(Signal* signal, Ptr requestPtr) +{ + requestPtr.p->m_cursor_nodes.init(); + requestPtr.p->m_active_nodes.clear(); + + if (requestPtr.p->m_cnt_active == 0) + { + jam(); + return; + } + + if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT) + { + /** + * If REPEAT_SCAN_RESULT we handle byshy scans by return more *new* rows + * from only one of the active child scans. If there are multiple + * bushy scans not being able to return their current result set in + * a single batch, result sets from the other child scans are repeated + * until all rows has been returned to the API client. + * + * Hence, the cross joined results from the bushy scans are partly + * produced within the SPJ block on a 'batchsize granularity', + * and partly is the responsibility of the API-client by iterating + * the result rows within the current result batches. + * (Opposed to non-REPEAT_SCAN_RESULT, the client only have to care about + * the current batched rows - no buffering is required) + */ + jam(); + Ptr nodePtr; + Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); + + /** + * Locate last 'TN_ACTIVE' TreeNode which is the only one choosen + * to return more *new* rows. + */ + for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr)) + { + if (nodePtr.p->m_state == TreeNode::TN_ACTIVE) + { + jam(); + DEBUG("Will fetch more from 'active' m_node_no: " << nodePtr.p->m_node_no); + /** + * A later NEXTREQ will request a *new* batch of rows from this TreeNode. + */ + registerActiveCursor(requestPtr, nodePtr); + break; + } + } + + /** + * Restart/repeat other (index scan) child batches which: + * - Being 'after' nodePtr located above. + * - Not being an ancestor of (depends on) any 'active' TreeNode. + * (As these scans are started when rows from these parent nodes + * arrives.) + */ + if (!nodePtr.isNull()) + { + jam(); + DEBUG("Calculate 'active', w/ cursor on m_node_no: " << nodePtr.p->m_node_no); + + /* Restart any partial index-scans after this 'TN_ACTIVE' TreeNode */ + for (list.next(nodePtr); !nodePtr.isNull(); list.next(nodePtr)) + { + jam(); + if (!nodePtr.p->m_ancestors.overlaps (requestPtr.p->m_active_nodes)) + { + jam(); + ndbrequire(nodePtr.p->m_state != TreeNode::TN_ACTIVE); + ndbrequire(nodePtr.p->m_info != 0); + if (nodePtr.p->m_info->m_parent_batch_repeat != 0) + { + jam(); + (this->*(nodePtr.p->m_info->m_parent_batch_repeat))(signal, + requestPtr, + nodePtr); + } + } + } + } // if (!nodePtr.isNull() + } + else // not 'RT_REPEAT_SCAN_RESULT' + { + /** + * If not REPEAT_SCAN_RESULT multiple active TreeNodes may return their + * remaining result simultaneously. In case of byshy-scans, these + * concurrent result streams are cross joins of each other + * in SQL terms. In order to produce the cross joined result, it is + * the responsibility of the API-client to buffer these streams and + * iterate them to produce the cross join. + */ + jam(); + Ptr nodePtr; + Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); + TreeNodeBitMask ancestors_of_active; + + for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr)) + { + /** + * If we are active (i.e not consumed all rows originating + * from parent rows) and we are not in the set of parents + * for any active child: + * + * Then, this is a position that execSCAN_NEXTREQ should continue + */ + if (nodePtr.p->m_state == TreeNode::TN_ACTIVE && + !ancestors_of_active.get (nodePtr.p->m_node_no)) + { + jam(); + DEBUG("Add 'active' m_node_no: " << nodePtr.p->m_node_no); + registerActiveCursor(requestPtr, nodePtr); + ancestors_of_active.bitOR(nodePtr.p->m_ancestors); + } + } + } // if (RT_REPEAT_SCAN_RESULT) + + DEBUG("Calculated 'm_active_nodes': " << requestPtr.p->m_active_nodes.rep.data[0]); +} + void Dbspj::sendConf(Signal* signal, Ptr requestPtr, bool is_complete) { @@ -1283,51 +1398,52 @@ void Dbspj::releaseScanBuffers(Ptr requestPtr) { Ptr treeNodePtr; - { - Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); - list.first(treeNodePtr); - } - - /** - * This is calling recursive function...buh! - * but i can't figure out how to do it someother way... - */ + Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); + TreeNodeBitMask ancestors_of_active; - /** - * This recursive function will register nodes to be notified - * about SCAN_NEXTREQ. - * - * Clear it first, so that nodes won't end up in it several times... - */ - requestPtr.p->m_cursor_nodes.init(); + for (list.last(treeNodePtr); !treeNodePtr.isNull(); list.prev(treeNodePtr)) + { + /** + * If there are no active children, + * then we can cleanup in our sub-branch + */ + if (!ancestors_of_active.get(treeNodePtr.p->m_node_no)) + { + if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER) + { + jam(); + releaseNodeRows(requestPtr, treeNodePtr); + } + + if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE) + { + jam(); + cleanupChildBranch(requestPtr, treeNodePtr); + } + } + /** + * Build Bitmask of all nodes having TN_ACTIVE childs + */ + if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE) + { + ancestors_of_active.bitOR(treeNodePtr.p->m_ancestors); + } + } /** * Needs to be atleast 1 active otherwise we should have * taken the cleanup "path" in batchComplete */ - ndbrequire(releaseScanBuffers(requestPtr, treeNodePtr) > 0); + ndbrequire(requestPtr.p->m_cnt_active >= 1); } void -Dbspj::mark_active(Ptr requestPtr, - Ptr treeNodePtr, - bool value) +Dbspj::registerActiveCursor(Ptr requestPtr, Ptr treeNodePtr) { Uint32 bit = treeNodePtr.p->m_node_no; - if (value) - { - ndbassert(requestPtr.p->m_active_nodes.get(bit) == false); - } - else - { - ndbassert(requestPtr.p->m_active_nodes.get(bit) == true); - } - requestPtr.p->m_active_nodes.set(bit, value); -} + ndbrequire(!requestPtr.p->m_active_nodes.get(bit)); + requestPtr.p->m_active_nodes.set(bit); -void -Dbspj::registerCursor(Ptr requestPtr, Ptr treeNodePtr) -{ Local_TreeNodeCursor_list list(m_treenode_pool, requestPtr.p->m_cursor_nodes); #ifdef VM_TRACE { @@ -1341,12 +1457,9 @@ Dbspj::registerCursor(Ptr reque list.add(treeNodePtr); } -Uint32 -Dbspj::releaseScanBuffers(Ptr requestPtr, - Ptr treeNodePtr) +void +Dbspj::cleanupChildBranch(Ptr requestPtr, Ptr treeNodePtr) { - Uint32 active_child = 0; - LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool); Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes); Dependency_map::ConstDataBufferIterator it; @@ -1354,40 +1467,15 @@ Dbspj::releaseScanBuffers(Ptr r { jam(); Ptr childPtr; - m_treenode_pool.getPtr(childPtr, * it.data); - active_child += releaseScanBuffers(requestPtr, childPtr); - } - - const bool active = treeNodePtr.p->m_state == TreeNode::TN_ACTIVE; - if (active_child == 0) - { - jam(); - - /** - * If there is no active children, - * then we can release our own (optionally) buffered rows - */ - if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER) - { - jam(); - releaseNodeRows(requestPtr, treeNodePtr); - } - - /** - * If we have no active children, - * and we ourself is active (i.e not consumed all rows originating - * from parent rows) - * - * Then, this is a position that execSCAN_NEXTREQ should continue - */ - if (active) + m_treenode_pool.getPtr(childPtr, *it.data); + if (childPtr.p->m_info->m_parent_batch_cleanup != 0) { jam(); - registerCursor(requestPtr, treeNodePtr); + (this->*(childPtr.p->m_info->m_parent_batch_cleanup))(requestPtr, + childPtr); } + cleanupChildBranch(requestPtr,childPtr); } - - return active_child + (active ? 1 : 0); } void @@ -1672,7 +1760,7 @@ Dbspj::complete(Signal* signal, Ptr requestPtr) { - ndbrequire(requestPtr.p->m_active_nodes.isclear()); + ndbrequire(requestPtr.p->m_cnt_active == 0); { Ptr nodePtr; Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); @@ -1949,16 +2037,46 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal) Ptr treeNodePtr; Local_TreeNodeCursor_list list(m_treenode_pool, requestPtr.p->m_cursor_nodes); + Uint32 cnt_active = 0; + for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr)) { - jam(); - ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE); - ndbrequire(treeNodePtr.p->m_info != 0 && - treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0); - (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal, - requestPtr, - treeNodePtr); + if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE) + { + jam(); + DEBUG("SCAN_NEXTREQ on TreeNode: " << treeNodePtr.i + << ", m_node_no: " << treeNodePtr.p->m_node_no + << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI); + + ndbrequire(treeNodePtr.p->m_info != 0 && + treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0); + (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal, + requestPtr, + treeNodePtr); + cnt_active++; + } + else + { + /** + * Restart any other scans not being 'TN_ACTIVE' + * (Only effective if 'RT_REPEAT_SCAN_RESULT') + */ + jam(); + ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT); + DEBUG(" Restart TreeNode: " << treeNodePtr.i + << ", m_node_no: " << treeNodePtr.p->m_node_no + << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI); + + ndbrequire(treeNodePtr.p->m_info != 0 && + treeNodePtr.p->m_info->m_parent_batch_complete !=0 ); + (this->*(treeNodePtr.p->m_info->m_parent_batch_complete))(signal, + requestPtr, + treeNodePtr); + } } + /* Expected only a single ACTIVE TreeNode among the cursors */ + ndbrequire(cnt_active == 1 || + !(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT)); } } @@ -2559,6 +2677,8 @@ Dbspj::g_LookupOpInfo = 0, // execSCAN_FRAGCONF &Dbspj::lookup_parent_row, &Dbspj::lookup_parent_batch_complete, + 0, // Dbspj::lookup_parent_batch_repeat, + 0, // Dbspj::lookup_parent_batch_cleanup, 0, // Dbspj::lookup_execSCAN_NEXTREQ 0, // Dbspj::lookup_complete &Dbspj::lookup_abort, @@ -3613,6 +3733,8 @@ Dbspj::g_ScanFragOpInfo = &Dbspj::scanFrag_execSCAN_FRAGCONF, 0, // parent row 0, // parent batch complete + 0, // parent batch repeat + 0, // Dbspj::scanFrag_parent_batch_cleanup, &Dbspj::scanFrag_execSCAN_NEXTREQ, 0, // Dbspj::scanFrag_complete &Dbspj::scanFrag_abort, @@ -3716,13 +3838,7 @@ Dbspj::scanFrag_build(Build_context& ctx } ctx.m_scan_cnt++; - /** - * In the scenario with only 1 scan in tree, - * register cursor here, so we don't need to search for in after build - * If m_scan_cnt > 1, - * then this list will simply be cleared after build - */ - registerCursor(requestPtr, treeNodePtr); + ctx.m_scans.set(treeNodePtr.p->m_node_no); if (ctx.m_start_signal) { @@ -3833,7 +3949,6 @@ Dbspj::scanFrag_send(Signal* signal, requestPtr.p->m_outstanding++; requestPtr.p->m_cnt_active++; - mark_active(requestPtr, treeNodePtr, true); treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; Ptr scanFragHandlePtr; m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p-> @@ -3966,7 +4081,6 @@ Dbspj::scanFrag_execSCAN_FRAGREF(Signal* ndbrequire(requestPtr.p->m_outstanding); requestPtr.p->m_outstanding--; treeNodePtr.p->m_state = TreeNode::TN_INACTIVE; - mark_active(requestPtr, treeNodePtr, false); abort(signal, requestPtr, errCode); } @@ -4014,7 +4128,6 @@ Dbspj::scanFrag_execSCAN_FRAGCONF(Signal ndbrequire(requestPtr.p->m_cnt_active); requestPtr.p->m_cnt_active--; treeNodePtr.p->m_state = TreeNode::TN_INACTIVE; - mark_active(requestPtr, treeNodePtr, false); scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_COMPLETE; } else @@ -4170,6 +4283,8 @@ Dbspj::g_ScanIndexOpInfo = &Dbspj::scanIndex_execSCAN_FRAGCONF, &Dbspj::scanIndex_parent_row, &Dbspj::scanIndex_parent_batch_complete, + &Dbspj::scanIndex_parent_batch_repeat, + &Dbspj::scanIndex_parent_batch_cleanup, &Dbspj::scanIndex_execSCAN_NEXTREQ, &Dbspj::scanIndex_complete, &Dbspj::scanIndex_abort, @@ -4282,14 +4397,23 @@ Dbspj::scanIndex_build(Build_context& ct nodePtr.i = nodePtr.p->m_parentPtrI; } - ctx.m_scan_cnt++; /** - * In the scenario with only 1 scan in tree, - * register cursor here, so we don't need to search for in after build - * If m_scan_cnt > 1, - * then this list will simply be cleared after build + * If there exists other scan TreeNodes not being among + * my ancestors, results from this scanIndex may be repeated + * as part of an X-scan. + * + * NOTE: The scan nodes being along the left deep ancestor chain + * are not 'repeatable' as they are driving the + * repeated X-scan and are thus not repeated themself. */ - registerCursor(requestPtr, treeNodePtr); + if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT && + !treeNodePtr.p->m_ancestors.contains(ctx.m_scans)) + { + nodePtr.p->m_bits |= TreeNode::T_SCAN_REPEATABLE; + } + + ctx.m_scan_cnt++; + ctx.m_scans.set(treeNodePtr.p->m_node_no); return 0; } while (0); @@ -4317,6 +4441,7 @@ Dbspj::parseScanIndex(Build_context& ctx data.m_fragments.init(); data.m_frags_outstanding = 0; data.m_frags_not_complete = 0; + data.m_batch_chunks = 0; err = parseDA(ctx, requestPtr, treeNodePtr, tree, treeBits, param, paramBits); @@ -4860,6 +4985,32 @@ Dbspj::scanIndex_parent_batch_complete(S } void +Dbspj::scanIndex_parent_batch_repeat(Signal* signal, + Ptr requestPtr, + Ptr treeNodePtr) +{ + jam(); + ScanIndexData& data = treeNodePtr.p->m_scanindex_data; + + DEBUG("scanIndex_parent_batch_repeat(), m_node_no: " << treeNodePtr.p->m_node_no + << ", m_batch_chunks: " << data.m_batch_chunks); + + /** + * Register index-scans to be restarted if we didn't get all + * previously fetched parent related child rows in a single batch. + */ + if (data.m_batch_chunks > 1) + { + jam(); + DEBUG("Register TreeNode for restart, m_node_no: " << treeNodePtr.p->m_node_no); + ndbrequire(treeNodePtr.p->m_state != TreeNode::TN_ACTIVE); + registerActiveCursor(requestPtr, treeNodePtr); + data.m_frags_not_complete = 1; + data.m_batch_chunks = 0; + } +} + +void Dbspj::scanIndex_send(Signal* signal, Ptr requestPtr, Ptr treeNodePtr) @@ -4884,12 +5035,24 @@ Dbspj::scanIndex_send(Signal* signal, } /** - * keys, - * - sliced out to each ScanFragHandle => release = true - * - all kept on first ScanFragHandle => release = false + * if (m_bits & prunemask): + * - Range keys sliced out to each ScanFragHandle + * - Else, range keys kept on first (and only) ScanFragHandle */ Uint32 prunemask = TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE; - bool release = (treeNodePtr.p->m_bits & prunemask) != 0; + + /** + * Don't release keyInfo if it may be sent multiple times, eiter: + * - Not pruned -> same keyInfo goes to all datanodes. + * - Result handling is REPEAT_SCAN_RESULT and same batch may be + * repeated multiple times due to incomplete bushy X-scans. + * (by ::scanIndex_parent_batch_repeat()) + * + * When not released, ::scanIndex_parent_batch_cleanup() will + * eventually release them when preparing arrival of a new parent batch. + */ + const bool release = ((treeNodePtr.p->m_bits & prunemask) != 0 && + (treeNodePtr.p->m_bits & TreeNode::T_SCAN_REPEATABLE) == 0); ScanFragReq* req = reinterpret_cast(signal->getDataPtrSend()); memcpy(req, org, sizeof(data.m_scanFragReq)); @@ -4901,18 +5064,16 @@ Dbspj::scanIndex_send(Signal* signal, Ptr fragPtr; Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); - Uint32 keyInfoPtrI; - if (release == false) + Uint32 keyInfoPtrI = RNIL; + list.first(fragPtr); + if ((treeNodePtr.p->m_bits & prunemask) == 0) { jam(); - list.first(fragPtr); keyInfoPtrI = fragPtr.p->m_rangePtrI; ndbrequire(keyInfoPtrI != RNIL); - fragPtr.p->m_rangePtrI = RNIL; } Uint32 batchRange = 0; - list.first(fragPtr); for (Uint32 i = 0; i < cnt && !fragPtr.isNull(); list.next(fragPtr)) { jam(); @@ -4928,7 +5089,7 @@ Dbspj::scanIndex_send(Signal* signal, req->senderData = fragPtr.i; req->fragmentNoKeyLen = fragPtr.p->m_fragId; - if (release) + if ((treeNodePtr.p->m_bits & prunemask)) { jam(); keyInfoPtrI = fragPtr.p->m_rangePtrI; @@ -4938,8 +5099,9 @@ Dbspj::scanIndex_send(Signal* signal, fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE; continue; } - fragPtr.p->m_rangePtrI = RNIL; - + } + if (release) + { /** * If we'll use sendSignal() and we need to send the attrInfo several * times, we need to copy them @@ -4948,7 +5110,6 @@ Dbspj::scanIndex_send(Signal* signal, ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error attrInfoPtrI = tmp; } - fragPtr.p->reset_ranges(); req->variableData[0] = batchRange; getSection(handle.m_ptr[0], attrInfoPtrI); @@ -4982,6 +5143,8 @@ Dbspj::scanIndex_send(Signal* signal, jam(); sendSignal(ref, GSN_SCAN_FRAGREQ, signal, NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle); + fragPtr.p->m_rangePtrI = RNIL; + fragPtr.p->reset_ranges(); } else { @@ -4997,14 +5160,6 @@ Dbspj::scanIndex_send(Signal* signal, batchRange += bs_rows; } - if (release == false) - { - jam(); - // only supported for now... - ndbrequire(treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL); - releaseSection(keyInfoPtrI); - } - if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL) { ndbrequire(data.m_frags_outstanding == data.m_frags_not_complete); @@ -5014,9 +5169,9 @@ Dbspj::scanIndex_send(Signal* signal, ndbrequire(data.m_frags_outstanding == 1); } + data.m_batch_chunks = 1; requestPtr.p->m_cnt_active++; requestPtr.p->m_outstanding++; - mark_active(requestPtr, treeNodePtr, true); treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; } @@ -5117,7 +5272,6 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa ndbrequire(requestPtr.p->m_cnt_active); requestPtr.p->m_cnt_active--; treeNodePtr.p->m_state = TreeNode::TN_INACTIVE; - mark_active(requestPtr, treeNodePtr, false); } } @@ -5180,7 +5334,6 @@ Dbspj::scanIndex_execSCAN_FRAGREF(Signal ndbrequire(requestPtr.p->m_cnt_active); requestPtr.p->m_cnt_active--; treeNodePtr.p->m_state = TreeNode::TN_INACTIVE; - mark_active(requestPtr, treeNodePtr, false); } if (data.m_frags_outstanding == 0) @@ -5265,6 +5418,8 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal * so require that we did actually send something */ ndbrequire(data.m_frags_outstanding > 0); + ndbrequire(data.m_batch_chunks > 0); + data.m_batch_chunks++; requestPtr.p->m_outstanding++; ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE); @@ -5452,26 +5607,25 @@ Dbspj::scanIndex_execNODE_FAILREP(Signal ndbrequire(requestPtr.p->m_cnt_active); requestPtr.p->m_cnt_active--; treeNodePtr.p->m_state = TreeNode::TN_INACTIVE; - mark_active(requestPtr, treeNodePtr, false); } return sum; } void -Dbspj::scanIndex_cleanup(Ptr requestPtr, - Ptr treeNodePtr) +Dbspj::scanIndex_release_rangekeys(Ptr requestPtr, + Ptr treeNodePtr) { + jam(); + DEBUG("scanIndex_release_rangekeys(), m_node_no: " << treeNodePtr.p->m_node_no); + ScanIndexData& data = treeNodePtr.p->m_scanindex_data; Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); - if (requestPtr.p->m_state & Request::RS_ABORTING) + Ptr fragPtr; + + if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN) { - /** - * If we're aborting...there can be keys attached...that has not - * (and will not) be sent...release them to avoid memleak - */ jam(); - Ptr fragPtr; for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr)) { if (fragPtr.p->m_rangePtrI != RNIL) @@ -5479,20 +5633,52 @@ Dbspj::scanIndex_cleanup(Ptr re releaseSection(fragPtr.p->m_rangePtrI); fragPtr.p->m_rangePtrI = RNIL; } + fragPtr.p->reset_ranges(); } } else { -#ifdef VM_TRACE - Ptr fragPtr; - for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr)) + jam(); + list.first(fragPtr); + if (fragPtr.p->m_rangePtrI != RNIL) { - ndbrequire(fragPtr.p->m_rangePtrI == RNIL); + releaseSection(fragPtr.p->m_rangePtrI); + fragPtr.p->m_rangePtrI = RNIL; } -#endif + fragPtr.p->reset_ranges(); } - list.remove(); +} + +/** + * Parent batch has completed, and will not refetch (X-joined) results + * from its childs. Release & reset range keys which are unsent or we + * have kept for possible resubmits. + */ +void +Dbspj::scanIndex_parent_batch_cleanup(Ptr requestPtr, + Ptr treeNodePtr) +{ + DEBUG("scanIndex_parent_batch_cleanup"); + scanIndex_release_rangekeys(requestPtr,treeNodePtr); +} +void +Dbspj::scanIndex_cleanup(Ptr requestPtr, + Ptr treeNodePtr) +{ + ScanIndexData& data = treeNodePtr.p->m_scanindex_data; + DEBUG("scanIndex_cleanup"); + + /** + * Range keys has been collected wherever there are uncompleted + * parent batches...release them to avoid memleak. + */ + scanIndex_release_rangekeys(requestPtr,treeNodePtr); + + { + Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); + list.remove(); + } if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN) { jam(); @@ -6085,7 +6271,7 @@ Dbspj::expandS(Uint32 & _dst, Local_patt case QueryPattern::P_PARENT: jam(); // P_PARENT is a prefix to another pattern token - // that permits code to access rows from earlier than imediate parent. + // that permits code to access rows from earlier than immediate parent. // val is no of levels to move up the tree err = appendFromParent(dst, pattern, it, val, row, hasNull); break; @@ -6150,7 +6336,7 @@ Dbspj::expandL(Uint32 & _dst, Local_patt case QueryPattern::P_PARENT: jam(); // P_PARENT is a prefix to another pattern token - // that permits code to access rows from earlier than imediate parent + // that permits code to access rows from earlier than immediate parent // val is no of levels to move up the tree err = appendFromParent(dst, pattern, it, val, row, hasNull); break; @@ -6360,6 +6546,13 @@ Dbspj::parseDA(Build_context& ctx, do { + if (treeBits & DABits::NI_REPEAT_SCAN_RESULT) + { + jam(); + DEBUG("use REPEAT_SCAN_RESULT when returning results"); + requestPtr.p->m_bits |= Request::RT_REPEAT_SCAN_RESULT; + } // DABits::NI_HAS_PARENT + if (treeBits & DABits::NI_HAS_PARENT) { jam(); @@ -6405,6 +6598,10 @@ Dbspj::parseDA(Build_context& ctx, } parentPtr.p->m_bits &= ~(Uint32)TreeNode::T_LEAF; treeNodePtr.p->m_parentPtrI = parentPtr.i; + + // Build Bitmask of all ancestors to treeNode + treeNodePtr.p->m_ancestors = parentPtr.p->m_ancestors; + treeNodePtr.p->m_ancestors.set(parentPtr.p->m_node_no); } if (unlikely(err != 0)) === modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp' --- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2011-04-14 09:21:18 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2011-05-04 11:48:48 +0000 @@ -2060,14 +2060,6 @@ int NdbQueryOperationDefImpl::markScanAn NdbQueryOperationDefImpl* operation = getParentOperation(); while (operation != NULL) { - if (operation->m_hasScanDescendant) - { - /* Remove this line if you want to allow bushy scans. Result sets will - * probably be wrong, but 'explain' output etc. may be useful for - * debugging. - */ - return QRY_MULTIPLE_SCAN_BRANCHES; - } operation->m_hasScanDescendant = true; if (operation->isScanOperation()) { @@ -2829,7 +2821,8 @@ NdbQueryScanOperationDefImpl::serialize( } node->tableId = tableOrIndex.getObjectId(); node->tableVersion = tableOrIndex.getObjectVersion(); - node->requestInfo = requestInfo; + // Need NI_REPEAT_SCAN_RESULT if there are star-joined scans + node->requestInfo = requestInfo | DABits::NI_REPEAT_SCAN_RESULT; QueryNode::setOpLen(node->len, QueryNode::QN_SCAN_INDEX, length); } === modified file 'storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp' --- a/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2011-04-14 08:59:45 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2011-05-04 11:48:48 +0000 @@ -42,9 +42,8 @@ #define QRY_SCAN_ORDER_ALREADY_SET 4821 #define QRY_PARAMETER_HAS_WRONG_TYPE 4822 #define QRY_CHAR_PARAMETER_TRUNCATED 4823 -#define QRY_MULTIPLE_SCAN_BRANCHES 4824 -#define QRY_MULTIPLE_SCAN_SORTED 4825 -#define QRY_BATCH_SIZE_TOO_SMALL 4826 +#define QRY_MULTIPLE_SCAN_SORTED 4824 +#define QRY_BATCH_SIZE_TOO_SMALL 4825 #ifdef __cplusplus #include === modified file 'storage/ndb/src/ndbapi/Ndbinit.cpp' --- a/storage/ndb/src/ndbapi/Ndbinit.cpp 2011-04-15 06:29:59 +0000 +++ b/storage/ndb/src/ndbapi/Ndbinit.cpp 2011-05-04 09:35:08 +0000 @@ -201,8 +201,7 @@ NdbImpl::NdbImpl(Ndb_cluster_connection m_transporter_facade(ndb_cluster_connection->m_impl.m_transporter_facade), m_dictionary(ndb), theCurrentConnectIndex(0), - theNdbObjectIdMap(m_transporter_facade->theMutexPtr, - 1024,1024), + theNdbObjectIdMap(1024,1024), theNoOfDBnodes(0), theWaiter(this), m_ev_op(0), === modified file 'storage/ndb/src/ndbapi/ObjectMap.cpp' --- a/storage/ndb/src/ndbapi/ObjectMap.cpp 2011-04-07 14:02:50 +0000 +++ b/storage/ndb/src/ndbapi/ObjectMap.cpp 2011-05-04 09:35:08 +0000 @@ -18,13 +18,13 @@ #include "ObjectMap.hpp" -NdbObjectIdMap::NdbObjectIdMap(NdbMutex* mutex, Uint32 sz, Uint32 eSz) +NdbObjectIdMap::NdbObjectIdMap(Uint32 sz, Uint32 eSz): + m_expandSize(eSz), + m_size(0), + m_firstFree(InvalidId), + m_lastFree(InvalidId), + m_map(0) { - m_size = 0; - m_firstFree = InvalidId; - m_map = 0; - m_mutex = mutex; - m_expandSize = eSz; expand(sz); #ifdef DEBUG_OBJECTMAP ndbout_c("NdbObjectIdMap:::NdbObjectIdMap(%u)", sz); @@ -33,12 +33,14 @@ NdbObjectIdMap::NdbObjectIdMap(NdbMutex* NdbObjectIdMap::~NdbObjectIdMap() { + assert(checkConsistency()); free(m_map); + m_map = NULL; } int NdbObjectIdMap::expand(Uint32 incSize) { - NdbMutex_Lock(m_mutex); + assert(checkConsistency()); Uint32 newSize = m_size + incSize; MapEntry * tmp = (MapEntry*)realloc(m_map, newSize * sizeof(MapEntry)); @@ -46,20 +48,45 @@ int NdbObjectIdMap::expand(Uint32 incSiz { m_map = tmp; - for(Uint32 i = m_size; i < newSize; i++){ - m_map[i].m_next = 2 * (i + 1) + 1; + for(Uint32 i = m_size; i < newSize-1; i++) + { + m_map[i].setNext(i+1); } - m_firstFree = (2 * m_size) + 1; - m_map[newSize-1].m_next = Uint32(InvalidId); + m_firstFree = m_size; + m_lastFree = newSize - 1; + m_map[newSize-1].setNext(InvalidId); m_size = newSize; + assert(checkConsistency()); } else { - NdbMutex_Unlock(m_mutex); g_eventLogger->error("NdbObjectIdMap::expand: realloc(%u*%lu) failed", newSize, sizeof(MapEntry)); return -1; } - NdbMutex_Unlock(m_mutex); return 0; } + +bool NdbObjectIdMap::checkConsistency() +{ + if (m_firstFree == InvalidId) + { + for (Uint32 i = 0; i -//#include #include #include @@ -31,11 +30,11 @@ extern EventLogger * g_eventLogger; /** * Global ObjectMap */ -class NdbObjectIdMap //: NdbLockable +class NdbObjectIdMap { public: - STATIC_CONST( InvalidId = ~(Uint32)0 ); - NdbObjectIdMap(NdbMutex*, Uint32 initalSize = 128, Uint32 expandSize = 10); + STATIC_CONST( InvalidId = 0x7fffffff ); + NdbObjectIdMap(Uint32 initalSize, Uint32 expandSize); ~NdbObjectIdMap(); Uint32 map(void * object); @@ -43,34 +42,75 @@ public: void * getObject(Uint32 id); private: + const Uint32 m_expandSize; Uint32 m_size; - Uint32 m_expandSize; Uint32 m_firstFree; - union MapEntry { - UintPtr m_next; - void * m_obj; - } * m_map; + /** + * We put released entries at the end of the free list. That way, we delay + * re-use of an object id as long as possible. This minimizes the chance + * of sending an incoming message to the wrong object because the recipient + * object id was reused. + */ + Uint32 m_lastFree; + + class MapEntry + { + public: + bool isFree() const + { + return (m_val & 1) == 1; + } + + Uint32 getNext() const + { + assert(isFree()); + return static_cast(m_val >> 1); + } + + void setNext(Uint32 next) + { + m_val = (next << 1) | 1; + } + + void* getObj() const + { + assert((m_val & 3) == 0); + return reinterpret_cast(m_val); + } + + void setObj(void* obj) + { + m_val = reinterpret_cast(obj); + assert((m_val & 3) == 0); + } + + private: + /** + * This holds either a pointer to a mapped object *or* the index of the + * next entry in the free list. If it is a pointer, then the two least + * significant bits should be zero (requiring all mapped objects to be + * four-byte aligned). If it is an index, then bit 0 should be set. + */ + UintPtr m_val; + }; + + MapEntry* m_map; - NdbMutex * m_mutex; int expand(Uint32 newSize); + // For debugging purposes. + bool checkConsistency(); }; inline Uint32 -NdbObjectIdMap::map(void * object){ - - // lock(); - assert((UintPtr(object) & 3) == 0); - - if(m_firstFree == Uint32(InvalidId) && expand(m_expandSize)) +NdbObjectIdMap::map(void * object) +{ + if(m_firstFree == InvalidId && expand(m_expandSize)) return InvalidId; - Uint32 ff = m_firstFree >> 1; - assert(UintPtr(m_map[ff].m_next) == Uint32(m_map[ff].m_next)); - m_firstFree = Uint32(m_map[ff].m_next); - m_map[ff].m_obj = object; - - // unlock(); + const Uint32 ff = m_firstFree; + m_firstFree = m_map[ff].getNext(); + m_map[ff].setObj(object); DBUG_PRINT("info",("NdbObjectIdMap::map(0x%lx) %u", (long) object, ff<<2)); @@ -79,26 +119,37 @@ NdbObjectIdMap::map(void * object){ inline void * -NdbObjectIdMap::unmap(Uint32 id, void *object){ - - Uint32 i = id>>2; +NdbObjectIdMap::unmap(Uint32 id, void *object) +{ + const Uint32 i = id>>2; - // lock(); - if(i < m_size){ - void * obj = m_map[i].m_obj; - if (object == obj) { - m_map[i].m_next = m_firstFree; - m_firstFree = (2 * i) + 1; - } else { + assert(i < m_size); + if(i < m_size) + { + void * const obj = m_map[i].getObj(); + if (object == obj) + { + m_map[i].setNext(InvalidId); + if (m_firstFree == InvalidId) + { + m_firstFree = i; + } + else + { + m_map[m_lastFree].setNext(i); + } + m_lastFree = i; + } + else + { g_eventLogger->error("NdbObjectIdMap::unmap(%u, 0x%lx) obj=0x%lx", id, (long) object, (long) obj); DBUG_PRINT("error",("NdbObjectIdMap::unmap(%u, 0x%lx) obj=0x%lx", id, (long) object, (long) obj)); + assert(false); return 0; } - // unlock(); - DBUG_PRINT("info",("NdbObjectIdMap::unmap(%u) obj=0x%lx", id, (long) obj)); return obj; @@ -107,12 +158,21 @@ NdbObjectIdMap::unmap(Uint32 id, void *o } inline void * -NdbObjectIdMap::getObject(Uint32 id){ +NdbObjectIdMap::getObject(Uint32 id) +{ // DBUG_PRINT("info",("NdbObjectIdMap::getObject(%u) obj=0x%x", id, m_map[id>>2].m_obj)); id >>= 2; - if(id < m_size){ - if ((m_map[id].m_next & 3) == 0) - return m_map[id].m_obj; + assert(id < m_size); + if(id < m_size) + { + if(m_map[id].isFree()) + { + return 0; + } + else + { + return m_map[id].getObj(); + } } return 0; } === modified file 'storage/ndb/src/ndbapi/ndberror.c' --- a/storage/ndb/src/ndbapi/ndberror.c 2011-04-29 09:23:56 +0000 +++ b/storage/ndb/src/ndbapi/ndberror.c 2011-05-04 11:48:48 +0000 @@ -797,8 +797,6 @@ ErrorBundle ErrorCodes[] = { "Parameter value has an incompatible datatype" }, { QRY_CHAR_PARAMETER_TRUNCATED, DMEC, AE, "Character Parameter was right truncated" }, - { QRY_MULTIPLE_SCAN_BRANCHES, DMEC, AE, - "Query has scans that are not descendants/ancestors of each other." }, { QRY_MULTIPLE_SCAN_SORTED, DMEC, AE, "Query with multiple scans may not be sorted." }, { QRY_SEQUENTIAL_SCAN_SORTED, DMEC, AE, No bundle (reason: useless for push emails).