List:Commits« Previous MessageNext Message »
From:ingo Date:April 5 2006 1:00pm
Subject:bk commit into 5.1 tree (ingo:1.2286)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of mydev. When mydev does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2286 06/04/05 15:00:28 ingo@stripped +9 -0
  Merge mysql.com:/home/mydev/mysql-5.0-bug5390
  into  mysql.com:/home/mydev/mysql-5.1-bug5390

  sql/lock.cc
    1.90 06/04/05 15:00:24 ingo@stripped +25 -109
    BUG#5390 - problems with merge tables
    Manual merge from 5.0.

  storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
    1.108 06/04/05 14:58:26 ingo@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
    1.73.20.2 06/04/05 14:58:25 ingo@stripped +0 -0
    Merge rename: ndb/src/kernel/blocks/dbtc/DbtcMain.cpp -> storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.58 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  sql/table.h
    1.137 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  sql/sql_insert.cc
    1.195 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  sql/share/charsets/Index.xml
    1.40 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  sql/item.cc
    1.181 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  mysql-test/t/information_schema.test
    1.73 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  mysql-test/r/information_schema.result
    1.113 06/04/05 14:58:25 ingo@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.24.21.2 06/04/05 14:58:24 ingo@stripped +0 -0
    Merge rename: ndb/src/kernel/blocks/dbdih/DbdihMain.cpp -> storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	ingo
# Host:	chilla.local
# Root:	/home/mydev/mysql-5.1-bug5390/RESYNC

--- 1.89/sql/lock.cc	2006-03-29 13:27:30 +02:00
+++ 1.90/sql/lock.cc	2006-04-05 15:00:24 +02:00
@@ -68,20 +68,20 @@
 
 #include "mysql_priv.h"
 #include <hash.h>
-#include "ha_myisammrg.h"
-#ifndef MASTER
-#include "../srclib/myisammrg/myrg_def.h"
-#else
-#include "../storage/myisammrg/myrg_def.h"
-#endif
+#include <assert.h>
+
+extern HASH open_cache;
+
+/* flags for get_lock_data */
+#define GET_LOCK_UNLOCK         1
+#define GET_LOCK_STORE_LOCKS    2
 
 static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count,
-				 bool unlock, TABLE **write_locked);
+				 uint flags, TABLE **write_locked);
 static int lock_external(THD *thd, TABLE **table,uint count);
 static int unlock_external(THD *thd, TABLE **table,uint count);
 static void print_lock_error(int error, const char *);
 
-
 /*
   Lock tables.
 
@@ -122,7 +122,8 @@
 
   for (;;)
   {
-    if (!(sql_lock = get_lock_data(thd,tables,count, 0,&write_lock_used)))
+    if (! (sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS,
+                                   &write_lock_used)))
       break;
 
     if (global_read_lock && write_lock_used &&
@@ -156,7 +157,12 @@
     thd->proc_info="Table lock";
     DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
     thd->locked=1;
-    rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks,
+    /* Copy the lock data array. thr_multi_lock() reorders its contens. */
+    memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
+           sql_lock->lock_count * sizeof(*sql_lock->locks));
+    /* Lock on the copied half of the lock data array. */
+    rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
+                                                     sql_lock->lock_count,
                                                      sql_lock->lock_count,
                                                      thd->lock_id)];
     if (rc > 1)                                 /* a timeout or a deadlock */
@@ -269,7 +275,8 @@
 {
   MYSQL_LOCK *sql_lock;
   TABLE *write_lock_used;
-  if ((sql_lock = get_lock_data(thd, table, count, 1, &write_lock_used)))
+  if ((sql_lock= get_lock_data(thd, table, count, GET_LOCK_UNLOCK,
+                               &write_lock_used)))
     mysql_unlock_tables(thd, sql_lock);
 }
 
@@ -306,6 +313,7 @@
   TABLE **table=sql_lock->table;
   for (i=found=0 ; i < sql_lock->table_count ; i++)
   {
+    DBUG_ASSERT(sql_lock->table[i]->lock_position == i);
     if ((uint) sql_lock->table[i]->reginfo.lock_type >= TL_WRITE_ALLOW_READ)
     {
       swap_variables(TABLE *, *table, sql_lock->table[i]);
@@ -319,6 +327,17 @@
     VOID(unlock_external(thd,table,i-found));
     sql_lock->table_count=found;
   }
+  /* Fix the lock positions in TABLE */
+  table= sql_lock->table;
+  found= 0;
+  for (i= 0; i < sql_lock->table_count; i++)
+  {
+    TABLE *tbl= *table;
+    tbl->lock_position= table - sql_lock->table;
+    tbl->lock_data_start= found;
+    found+= tbl->lock_count;
+    table++;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -334,20 +353,51 @@
     {
       if (locked->table[i] == table)
       {
-	locked->table_count--;
+        uint  j, removed_locks, old_tables;
+        TABLE *tbl;
+        uint lock_data_end;
+
+        DBUG_ASSERT(table->lock_position == i);
+
+        /* Decrement table_count in advance, making below expressions easier */
+        old_tables= --locked->table_count;
+
+        /* The table has 'removed_locks' lock data elements in locked->locks */
+        removed_locks= table->lock_count;
+
+        /* Move down all table pointers above 'i'. */
 	bmove((char*) (locked->table+i),
 	      (char*) (locked->table+i+1),
-	      (locked->table_count-i)* sizeof(TABLE*));
+	      (old_tables - i) * sizeof(TABLE*));
+
+        lock_data_end= table->lock_data_start + table->lock_count;
+        /* Move down all lock data pointers above 'table->lock_data_end-1' */
+        bmove((char*) (locked->locks + table->lock_data_start),
+              (char*) (locked->locks + lock_data_end),
+              (locked->lock_count - lock_data_end) *
+              sizeof(THR_LOCK_DATA*));
+
+        /*
+          Fix moved table elements.
+          lock_position is the index in the 'locked->table' array,
+          it must be fixed by one.
+          table->lock_data_start is pointer to the lock data for this table
+          in the 'locked->locks' array, they must be fixed by 'removed_locks',
+          the lock data count of the removed table.
+        */
+        for (j= i ; j < old_tables; j++)
+        {
+          tbl= locked->table[j];
+          tbl->lock_position--;
+          DBUG_ASSERT(tbl->lock_position == j);
+          tbl->lock_data_start-= removed_locks;
+        }
+
+        /* Finally adjust lock_count. */
+        locked->lock_count-= removed_locks;
 	break;
       }
     }
-    THR_LOCK_DATA **prev=locked->locks;
-    for (i=0 ; i < locked->lock_count ; i++)
-    {
-      if (locked->locks[i]->type != TL_UNLOCK)
-	*prev++ = locked->locks[i];
-    }
-    locked->lock_count=(uint) (prev - locked->locks);
   }
 }
 
@@ -375,7 +425,8 @@
   TABLE *write_lock_used;
   DBUG_ENTER("mysql_lock_abort");
 
-  if ((locked = get_lock_data(thd,&table,1,1,&write_lock_used)))
+  if ((locked= get_lock_data(thd, &table, 1, GET_LOCK_UNLOCK,
+                             &write_lock_used)))
   {
     for (uint i=0; i < locked->lock_count; i++)
       thr_abort_locks(locked->locks[i]->lock, upgrade_lock);
@@ -405,7 +456,8 @@
   bool result= FALSE;
   DBUG_ENTER("mysql_lock_abort_for_thread");
 
-  if ((locked = get_lock_data(thd,&table,1,1,&write_lock_used)))
+  if ((locked= get_lock_data(thd, &table, 1, GET_LOCK_UNLOCK,
+                             &write_lock_used)))
   {
     for (uint i=0; i < locked->lock_count; i++)
     {
@@ -422,7 +474,9 @@
 MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b)
 {
   MYSQL_LOCK *sql_lock;
+  TABLE **table, **end_table;
   DBUG_ENTER("mysql_lock_merge");
+
   if (!(sql_lock= (MYSQL_LOCK*)
 	my_malloc(sizeof(*sql_lock)+
 		  sizeof(THR_LOCK_DATA*)*(a->lock_count+b->lock_count)+
@@ -438,6 +492,21 @@
   memcpy(sql_lock->table,a->table,a->table_count*sizeof(*a->table));
   memcpy(sql_lock->table+a->table_count,b->table,
 	 b->table_count*sizeof(*b->table));
+
+  /*
+    Now adjust lock_position and lock_data_start for all objects that was
+    moved in 'b' (as there is now all objects in 'a' before these).
+  */
+  for (table= sql_lock->table + a->table_count,
+         end_table= table + b->table_count;
+       table < end_table;
+       table++)
+  {
+    (*table)->lock_position+=   a->table_count;
+    (*table)->lock_data_start+= a->lock_count;
+  }
+
+  /* Delete old, not needed locks */
   my_free((gptr) a,MYF(0));
   my_free((gptr) b,MYF(0));
   DBUG_RETURN(sql_lock);
@@ -456,112 +525,96 @@
   NOTE
     This is mainly meant for MERGE tables in INSERT ... SELECT
     situations. The 'real', underlying tables can be found only after
-    the table is opened. The easier way is to check this after the
-    tables are locked.
+    the MERGE tables are opened. This function assumes that the tables are
+    already locked.
+
+    Temporary tables are ignored here like they are ignored in
+    get_lock_data(). If we allow two opens on temporary tables later,
+    both functions should be checked.
 
   RETURN
-    1           A table from 'tables' matches a lock on 'table'.
-    0           No duplicate lock is present.
-    -1          Error.
+    NULL        No duplicate lock found.
+    ! NULL      First table from 'haystack' that matches a lock on 'needle'.
 */
 
 TABLE_LIST *mysql_lock_have_duplicate(THD *thd, TABLE_LIST *needle,
                                       TABLE_LIST *haystack)
 {
-  uint                  count;
-  uint                  dup_pos;
-  TABLE                 *write_lock_used; /* dummy */
-  TABLE                 **tables1;
-  TABLE                 **tables2;
-  TABLE                 **table_ptr;
-  TABLE_LIST            *tlist_ptr;
-  MYSQL_LOCK            *sql_lock1;
-  MYSQL_LOCK            *sql_lock2;
-  THR_LOCK_DATA         **lock_data1;
-  THR_LOCK_DATA         **end_data1;
+  MYSQL_LOCK            *mylock;
+  TABLE                 **lock_tables;
+  TABLE                 *table;
+  TABLE                 *table2;
+  THR_LOCK_DATA         **lock_locks;
+  THR_LOCK_DATA         **table_lock_data;
+  THR_LOCK_DATA         **end_data;
   THR_LOCK_DATA         **lock_data2;
   THR_LOCK_DATA         **end_data2;
-  THR_LOCK              *lock1;
   DBUG_ENTER("mysql_lock_have_duplicate");
 
-  /* Table may not be defined for derived or view tables. */
-  if (! needle->table)
-    DBUG_RETURN(NULL);
-
-  /* Get lock(s) for needle. */
-  tables1= &needle->table;
-  if (! (sql_lock1= get_lock_data(thd, tables1, 1, 1, &write_lock_used)))
-    goto err0;
-
-  /* Count real tables in list. */
-  count=0;
-  for (tlist_ptr = haystack; tlist_ptr; tlist_ptr= tlist_ptr->next_global)
-    if (! tlist_ptr->placeholder() && ! tlist_ptr->schema_table)
-      count++;
-  /* Allocate a table array. */
-  if (! (tables2= (TABLE**) sql_alloc(sizeof(TABLE*) * count)))
-    goto err1;
-  table_ptr= tables2;
-  /* Assign table pointers. */
-  for (tlist_ptr = haystack; tlist_ptr; tlist_ptr= tlist_ptr->next_global)
-    if (! tlist_ptr->placeholder() && ! tlist_ptr->schema_table)
-      *(table_ptr++)= tlist_ptr->table;
-  /* Get lock(s) for haystack. */
-  if (! (sql_lock2= get_lock_data(thd, tables2, count, 1, &write_lock_used)))
-    goto err1;
-
-  /* Initialize duplicate position to an impossible value. */
-  dup_pos= UINT_MAX;
   /*
-    Find a duplicate lock.
-    In case of merge tables, sql_lock1 can have more than 1 lock.
+    Table may not be defined for derived or view tables.
+    Table may not be part of a lock for delayed operations.
   */
-  for (lock_data1= sql_lock1->locks,
-         end_data1= lock_data1 + sql_lock1->lock_count;
-       lock_data1 < end_data1;
-       lock_data1++)
-  {
-    lock1= (*lock_data1)->lock;
-    for (lock_data2= sql_lock2->locks,
-           end_data2= lock_data2 + sql_lock2->lock_count;
+  if (! (table= needle->table) || ! table->lock_count)
+    goto end;
+
+  /* A temporary table does not have locks. */
+  if (table->s->tmp_table == TMP_TABLE)
+    goto end;
+
+  /* Get command lock or LOCK TABLES lock. Maybe empty for INSERT DELAYED. */
+  if (! (mylock= thd->lock ? thd->lock : thd->locked_tables))
+    goto end;
+
+  /* If we have less than two tables, we cannot have duplicates. */
+  if (mylock->table_count < 2)
+    goto end;
+
+  lock_locks=  mylock->locks;
+  lock_tables= mylock->table;
+
+  /* Prepare table related variables that don't change in loop. */
+  DBUG_ASSERT((table->lock_position < mylock->table_count) &&
+              (table == lock_tables[table->lock_position]));
+  table_lock_data= lock_locks + table->lock_data_start;
+  end_data= table_lock_data + table->lock_count;
+
+  for (; haystack; haystack= haystack->next_global)
+  {
+    if (haystack->placeholder() || haystack->schema_table)
+      continue;
+    table2= haystack->table;
+    if (table2->s->tmp_table == TMP_TABLE)
+      continue;
+
+    /* All tables in list must be in lock. */
+    DBUG_ASSERT((table2->lock_position < mylock->table_count) &&
+                (table2 == lock_tables[table2->lock_position]));
+
+    for (lock_data2=  lock_locks + table2->lock_data_start,
+           end_data2= lock_data2 + table2->lock_count;
          lock_data2 < end_data2;
          lock_data2++)
     {
-      if ((*lock_data2)->lock == lock1)
+      THR_LOCK_DATA **lock_data;
+      THR_LOCK *lock2= (*lock_data2)->lock;
+
+      for (lock_data= table_lock_data;
+           lock_data < end_data;
+           lock_data++)
       {
-        DBUG_PRINT("ingo", ("duplicate lock found"));
-        /* Change duplicate position to the real value. */
-        dup_pos= lock_data2 - sql_lock2->locks;
-        goto end;
+        if ((*lock_data)->lock == lock2)
+        {
+          DBUG_PRINT("info", ("haystack match: '%s'", haystack->table_name));
+          DBUG_RETURN(haystack);
+        }
       }
     }
   }
 
  end:
-  tlist_ptr= NULL; /* In case that no duplicate was found. */
-  if (dup_pos != UINT_MAX)
-  {
-    /* Duplicate found. Search the matching TABLE_LIST object. */
-    count= 0;
-    for (tlist_ptr = haystack; tlist_ptr; tlist_ptr= tlist_ptr->next_global)
-    {
-      if (! tlist_ptr->placeholder() && ! tlist_ptr->schema_table)
-      {
-        count+= tlist_ptr->table->file->lock_count();
-        if (count > dup_pos)
-          break;
-      }
-    }
-  }
-  my_free((gptr) sql_lock2, MYF(0));
-  my_free((gptr) sql_lock1, MYF(0));
-  DBUG_RETURN(tlist_ptr);
-
- err1:
-  my_free((gptr) sql_lock1, MYF(0));
- err0:
-  /* This non-null but special value indicates error, if caller cares. */
-  DBUG_RETURN(needle);
+  DBUG_PRINT("info", ("no duplicate found"));
+  DBUG_RETURN(NULL);
 }
 
 
@@ -591,17 +644,27 @@
 
 
 /*
-** Get lock structures from table structs and initialize locks
+  Get lock structures from table structs and initialize locks
+
+  SYNOPSIS
+    get_lock_data()
+    thd			Thread handler
+    table_ptr		Pointer to tables that should be locks
+    flags		One of:
+			GET_LOCK_UNLOCK:      If we should send TL_IGNORE to
+                        		      store lock
+			GET_LOCK_STORE_LOCKS: Store lock info in TABLE
+    write_lock_used	Store pointer to last table with WRITE_ALLOW_WRITE
 */
 
 
 static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
-				 bool get_old_locks, TABLE **write_lock_used)
+				 uint flags, TABLE **write_lock_used)
 {
   uint i,tables,lock_count;
   MYSQL_LOCK *sql_lock;
-  THR_LOCK_DATA **locks;
-  TABLE **to;
+  THR_LOCK_DATA **locks, **locks_buf, **locks_start;
+  TABLE **to, **table_buf;
   DBUG_ENTER("get_lock_data");
 
   DBUG_PRINT("info", ("count %d", count));
@@ -625,13 +688,20 @@
       DBUG_RETURN(0);
   }
 
+  /*
+    Allocating twice the number of pointers for lock data for use in
+    thr_mulit_lock(). This function reorders the lock data, but cannot
+    update the table values. So the second part of the array is copied
+    from the first part immediately before calling thr_multi_lock().
+  */
   if (!(sql_lock= (MYSQL_LOCK*)
-	my_malloc(sizeof(*sql_lock)+
-		  sizeof(THR_LOCK_DATA*)*tables+sizeof(table_ptr)*lock_count,
+	my_malloc(sizeof(*sql_lock) +
+		  sizeof(THR_LOCK_DATA*) * tables * 2 +
+                  sizeof(table_ptr) * lock_count,
 		  MYF(0))))
     DBUG_RETURN(0);
-  locks=sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1);
-  to=sql_lock->table=(TABLE**) (locks+tables);
+  locks= locks_buf= sql_lock->locks= (THR_LOCK_DATA**) (sql_lock + 1);
+  to= table_buf= sql_lock->table= (TABLE**) (locks + tables * 2);
   sql_lock->table_count=lock_count;
   sql_lock->lock_count=tables;
   DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d",
@@ -640,10 +710,11 @@
   for (i=0 ; i < count ; i++)
   {
     TABLE *table;
+    enum thr_lock_type lock_type;
+
     if ((table=table_ptr[i])->s->tmp_table == TMP_TABLE)
       continue;
-    *to++=table;
-    enum thr_lock_type lock_type= table->reginfo.lock_type;
+    lock_type= table->reginfo.lock_type;
     if (lock_type >= TL_WRITE_ALLOW_WRITE)
     {
       *write_lock_used=table;
@@ -655,8 +726,17 @@
       }
     }
     THR_LOCK_DATA **org_locks = locks;
-    locks=table->file->store_lock(thd, locks, get_old_locks ? TL_IGNORE :
-				  lock_type);
+    locks_start= locks;
+    locks= table->file->store_lock(thd, locks,
+                                   (flags & GET_LOCK_UNLOCK) ? TL_IGNORE :
+                                   lock_type);
+    if (flags & GET_LOCK_STORE_LOCKS)
+    {
+      table->lock_position=   (uint) (to - table_buf);
+      table->lock_data_start= (uint) (locks_start - locks_buf);
+      table->lock_count=      (uint) (locks - locks_start);
+    }
+    *to++= table;
     if (locks)
       for ( ; org_locks != locks ; org_locks++)
 	(*org_locks)->debug_print_param= (void *) table;

--- 1.194/sql/sql_insert.cc	2006-03-30 17:07:57 +02:00
+++ 1.195/sql/sql_insert.cc	2006-04-05 14:58:25 +02:00
@@ -1571,7 +1571,10 @@
 
   /* Adjust in_use for pointing to client thread */
   copy->in_use= client_thd;
-  
+
+  /* Adjust lock_count. This table object is not part of a lock. */
+  copy->lock_count= 0;
+
   return copy;
 
   /* Got fatal error */

--- 1.136/sql/table.h	2006-03-17 18:10:59 +01:00
+++ 1.137/sql/table.h	2006-04-05 14:58:25 +02:00
@@ -277,7 +277,10 @@
   */
   timestamp_auto_set_type timestamp_field_type;
   table_map	map;                    /* ID bit of table (1,2,4,8,16...) */
-  
+
+  uint          lock_position;          /* Position in MYSQL_LOCK.table */
+  uint          lock_data_start;        /* Start pos. in MYSQL_LOCK.locks */
+  uint          lock_count;             /* Number of locks */
   uint		tablenr,used_fields;
   uint          temp_pool_slot;		/* Used by intern temp tables */
   uint		status;                 /* What's in record[0] */
@@ -286,8 +289,8 @@
   uint          derived_select_number;
   int		current_lock;           /* Type of lock on table */
   my_bool copy_blobs;			/* copy_blobs when storing */
-  
-  /* 
+
+  /*
     0 or JOIN_TYPE_{LEFT|RIGHT}. Currently this is only compared to 0.
     If maybe_null !=0, this table is inner w.r.t. some outer join operation,
     and null_row may be true.

--- 1.39/sql/share/charsets/Index.xml	2005-12-06 13:18:38 +01:00
+++ 1.40/sql/share/charsets/Index.xml	2006-04-05 14:58:25 +02:00
@@ -115,7 +115,10 @@
   <alias>l1</alias>
   <alias>latin1</alias>
   <collation name="latin1_german1_ci"	id="5"	order="German Duden"/>
-  <collation name="latin1_swedish_ci"	id="8"	order="Finnish, Swedish"	flag="primary"/>
+  <collation name="latin1_swedish_ci"	id="8"	order="Finnish, Swedish">
+    <flag>primary</flag>
+    <flag>compiled</flag>
+  </collation>
   <collation name="latin1_danish_ci"	id="15"	order="Danish"/>
   <collation name="latin1_german2_ci"	id="31"	order="German Phonebook"	flag="compiled"/>
   <collation name="latin1_spanish_ci"	id="94"	order="Spanish"/>

--- 1.112/mysql-test/r/information_schema.result	2006-03-21 13:10:08 +01:00
+++ 1.113/mysql-test/r/information_schema.result	2006-04-05 14:58:25 +02:00
@@ -214,34 +214,34 @@
 select * from information_schema.COLLATIONS
 where COLLATION_NAME like 'latin1%';
 COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-latin1_german1_ci	latin1	5			0
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin1_danish_ci	latin1	15			0
-latin1_german2_ci	latin1	31		Yes	2
-latin1_bin	latin1	47		Yes	1
-latin1_general_ci	latin1	48			0
-latin1_general_cs	latin1	49			0
-latin1_spanish_ci	latin1	94			0
+latin1_german1_ci	latin1	5		#	1
+latin1_swedish_ci	latin1	8	Yes	#	1
+latin1_danish_ci	latin1	15		#	1
+latin1_german2_ci	latin1	31		#	2
+latin1_bin	latin1	47		#	1
+latin1_general_ci	latin1	48		#	1
+latin1_general_cs	latin1	49		#	1
+latin1_spanish_ci	latin1	94		#	1
 SHOW COLLATION LIKE 'latin1%';
 Collation	Charset	Id	Default	Compiled	Sortlen
-latin1_german1_ci	latin1	5			0
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin1_danish_ci	latin1	15			0
-latin1_german2_ci	latin1	31		Yes	2
-latin1_bin	latin1	47		Yes	1
-latin1_general_ci	latin1	48			0
-latin1_general_cs	latin1	49			0
-latin1_spanish_ci	latin1	94			0
+latin1_german1_ci	latin1	5		#	1
+latin1_swedish_ci	latin1	8	Yes	#	1
+latin1_danish_ci	latin1	15		#	1
+latin1_german2_ci	latin1	31		#	2
+latin1_bin	latin1	47		#	1
+latin1_general_ci	latin1	48		#	1
+latin1_general_cs	latin1	49		#	1
+latin1_spanish_ci	latin1	94		#	1
 SHOW COLLATION WHERE collation like 'latin1%';
 Collation	Charset	Id	Default	Compiled	Sortlen
-latin1_german1_ci	latin1	5			0
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin1_danish_ci	latin1	15			0
-latin1_german2_ci	latin1	31		Yes	2
-latin1_bin	latin1	47		Yes	1
-latin1_general_ci	latin1	48			0
-latin1_general_cs	latin1	49			0
-latin1_spanish_ci	latin1	94			0
+latin1_german1_ci	latin1	5		#	1
+latin1_swedish_ci	latin1	8	Yes	#	1
+latin1_danish_ci	latin1	15		#	1
+latin1_german2_ci	latin1	31		#	2
+latin1_bin	latin1	47		#	1
+latin1_general_ci	latin1	48		#	1
+latin1_general_cs	latin1	49		#	1
+latin1_spanish_ci	latin1	94		#	1
 select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY
 where COLLATION_NAME like 'latin1%';
 COLLATION_NAME	CHARACTER_SET_NAME

--- 1.72/mysql-test/t/information_schema.test	2006-03-21 13:10:08 +01:00
+++ 1.73/mysql-test/t/information_schema.test	2006-04-05 14:58:25 +02:00
@@ -97,9 +97,12 @@
 # Test for information_schema.COLLATIONS &
 # SHOW COLLATION
 
+--replace_column 5 #
 select * from information_schema.COLLATIONS
 where COLLATION_NAME like 'latin1%';
+--replace_column 5 #
 SHOW COLLATION LIKE 'latin1%';
+--replace_column 5 #
 SHOW COLLATION WHERE collation like 'latin1%';
 
 select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY

--- 1.24.21.1/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-04-04 11:50:43 +02:00
+++ 1.58/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-04-05 14:58:25 +02:00
@@ -67,6 +67,7 @@
 #include <signaldata/CreateFragmentation.hpp>
 #include <signaldata/LqhFrag.hpp>
 #include <signaldata/FsOpenReq.hpp>
+#include <signaldata/DihFragCount.hpp>
 #include <DebuggerNames.hpp>
 
 #include <EventLogger.hpp>
@@ -609,6 +610,14 @@
     checkWaitDropTabFailedLqh(signal, nodeId, tableId);
     return;
   }
+  case DihContinueB::ZTO_START_FRAGMENTS:
+  {
+    TakeOverRecordPtr takeOverPtr;
+    takeOverPtr.i = signal->theData[1];
+    ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
+    nr_start_fragments(signal, takeOverPtr);
+    return;
+  }
   }//switch
   
   ndbrequire(false);
@@ -639,9 +648,11 @@
     c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
     return;
   }//if
-  
+
+  Uint32 tmp= SYSFILE->m_restart_seq;
   memcpy(sysfileData, cdata, sizeof(sysfileData));
-  
+  SYSFILE->m_restart_seq = tmp;
+
   c_copyGCISlave.m_copyReason = reason;
   c_copyGCISlave.m_senderRef  = signal->senderBlockRef();
   c_copyGCISlave.m_senderData = copyGCI->anyData;
@@ -1052,7 +1063,7 @@
   jamEntry();
 
   const ndb_mgm_configuration_iterator * p = 
-    theConfiguration.getOwnConfigIterator();
+    m_ctx.m_config.getOwnConfigIterator();
   ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG);
 
   initData();
@@ -1138,7 +1149,7 @@
 {
   jamEntry();
   cntrlblockref = signal->theData[0];
-  if(theConfiguration.getInitialStart()){
+  if(m_ctx.m_config.getInitialStart()){
     sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
   } else {
     readGciFileLab(signal);
@@ -1656,12 +1667,15 @@
    *
    * But dont copy lastCompletedGCI:s
    */
+  Uint32 key = SYSFILE->m_restart_seq;
   Uint32 tempGCP[MAX_NDB_NODES];
   for(i = 0; i < MAX_NDB_NODES; i++)
     tempGCP[i] = SYSFILE->lastCompletedGCI[i];
 
   for(i = 0; i < Sysfile::SYSFILE_SIZE32; i++)
     sysfileData[i] = cdata[i];
+
+  SYSFILE->m_restart_seq = key;
   for(i = 0; i < MAX_NDB_NODES; i++)
     SYSFILE->lastCompletedGCI[i] = tempGCP[i];
 
@@ -1819,11 +1833,6 @@
   ndbrequire(c_nodeStartMaster.startNode == Tnodeid);
   ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING);
   
-  sendSTART_RECREQ(signal, Tnodeid);
-}//Dbdih::execSTART_MEREQ()
-
-void Dbdih::nodeRestartStartRecConfLab(Signal* signal) 
-{
   c_nodeStartMaster.blockLcp = true;
   if ((c_lcpState.lcpStatus != LCP_STATUS_IDLE) &&
       (c_lcpState.lcpStatus != LCP_TCGET)) {
@@ -2634,13 +2643,14 @@
     return;
   }//if
   c_startToLock = takeOverPtrI;
+
+  takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING;
   StartToReq * const req = (StartToReq *)&signal->theData[0];
   req->userPtr = takeOverPtr.i;
   req->userRef = reference();
   req->startingNodeId = takeOverPtr.p->toStartingNode;
   req->nodeTakenOver = takeOverPtr.p->toFailedNode;
   req->nodeRestart = takeOverPtr.p->toNodeRestart;
-  takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING;
   sendLoopMacro(START_TOREQ, sendSTART_TOREQ);
 }//Dbdih::sendStartTo()
 
@@ -2684,9 +2694,153 @@
   CRASH_INSERTION(7134);
   c_startToLock = RNIL;
 
+  if (takeOverPtr.p->toNodeRestart)
+  {
+    jam();
+    takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING_LOCAL_FRAGMENTS;
+    nr_start_fragments(signal, takeOverPtr);
+    return;
+  }
+
   startNextCopyFragment(signal, takeOverPtr.i);
 }//Dbdih::execSTART_TOCONF()
 
+void
+Dbdih::nr_start_fragments(Signal* signal, 
+			  TakeOverRecordPtr takeOverPtr)
+{
+  Uint32 loopCount = 0 ;
+  TabRecordPtr tabPtr;
+  while (loopCount++ < 100) {
+    tabPtr.i = takeOverPtr.p->toCurrentTabref;
+    if (tabPtr.i >= ctabFileSize) {
+      jam();
+      nr_run_redo(signal, takeOverPtr);
+      return;
+    }//if
+    ptrAss(tabPtr, tabRecord);
+    if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
+      jam();
+      takeOverPtr.p->toCurrentFragid = 0;
+      takeOverPtr.p->toCurrentTabref++;
+      continue;
+    }//if
+    Uint32 fragId = takeOverPtr.p->toCurrentFragid;
+    if (fragId >= tabPtr.p->totalfragments) {
+      jam();
+      takeOverPtr.p->toCurrentFragid = 0;
+      takeOverPtr.p->toCurrentTabref++;
+      continue;
+    }//if
+    FragmentstorePtr fragPtr;
+    getFragstore(tabPtr.p, fragId, fragPtr);
+    ReplicaRecordPtr loopReplicaPtr;
+    loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
+    while (loopReplicaPtr.i != RNIL) {
+      ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
+      if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
+        jam();
+	nr_start_fragment(signal, takeOverPtr, loopReplicaPtr);
+	break;
+      } else {
+        jam();
+        loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
+      }//if
+    }//while
+    takeOverPtr.p->toCurrentFragid++;
+  }//while
+  signal->theData[0] = DihContinueB::ZTO_START_FRAGMENTS;
+  signal->theData[1] = takeOverPtr.i;
+  sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
+}
+
+void
+Dbdih::nr_start_fragment(Signal* signal, 
+			 TakeOverRecordPtr takeOverPtr,
+			 ReplicaRecordPtr replicaPtr)
+{
+  Uint32 i, j = 0;
+  Uint32 maxLcpId = 0;
+  Uint32 maxLcpIndex = ~0;
+  
+  Uint32 restorableGCI = 0;
+  
+  ndbout_c("tab: %d frag: %d replicaP->nextLcp: %d",
+	   takeOverPtr.p->toCurrentTabref,
+	   takeOverPtr.p->toCurrentFragid,
+	   replicaPtr.p->nextLcp);
+  
+  Uint32 idx = replicaPtr.p->nextLcp;
+  for(i = 0; i<MAX_LCP_STORED; i++, idx = nextLcpNo(idx))
+  {
+    ndbout_c("scanning idx: %d lcpId: %d", idx, replicaPtr.p->lcpId[idx]);
+    if (replicaPtr.p->lcpStatus[idx] == ZVALID) 
+    {
+      ndbrequire(replicaPtr.p->lcpId[idx] > maxLcpId);
+      Uint32 startGci = replicaPtr.p->maxGciCompleted[idx];
+      Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
+      for (;j < replicaPtr.p->noCrashedReplicas; j++)
+      {
+	ndbout_c("crashed replica: %d(%d) replicaLastGci: %d",
+		 j, 
+		 replicaPtr.p->noCrashedReplicas,
+		 replicaPtr.p->replicaLastGci[j]);
+	if (replicaPtr.p->replicaLastGci[j] > stopGci)
+	{
+	  maxLcpId = replicaPtr.p->lcpId[idx];
+	  maxLcpIndex = idx;
+	  restorableGCI = replicaPtr.p->replicaLastGci[j];
+	  break;
+	}
+      }
+    }
+  }
+  
+  if (maxLcpIndex == ~0)
+  {
+    ndbout_c("Didnt find any LCP for node: %d tab: %d frag: %d",
+	     takeOverPtr.p->toStartingNode,
+	     takeOverPtr.p->toCurrentTabref,
+	     takeOverPtr.p->toCurrentFragid);
+    replicaPtr.p->lcpIdStarted = 0;
+  }
+  else
+  {
+    ndbout_c("Found LCP: %d(%d) maxGciStarted: %d maxGciCompleted: %d restorable: %d(%d) newestRestorableGCI: %d",
+	     maxLcpId,
+	     maxLcpIndex,
+	     replicaPtr.p->maxGciStarted[maxLcpIndex],
+	     replicaPtr.p->maxGciCompleted[maxLcpIndex],	     
+	     restorableGCI,
+	     SYSFILE->lastCompletedGCI[takeOverPtr.p->toStartingNode],
+	     SYSFILE->newestRestorableGCI);
+
+    replicaPtr.p->lcpIdStarted = restorableGCI;
+    BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
+    StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
+    req->userPtr = 0;
+    req->userRef = reference();
+    req->lcpNo = maxLcpIndex;
+    req->lcpId = maxLcpId;
+    req->tableId = takeOverPtr.p->toCurrentTabref;
+    req->fragId = takeOverPtr.p->toCurrentFragid;
+    req->noOfLogNodes = 1;
+    req->lqhLogNode[0] = takeOverPtr.p->toStartingNode;
+    req->startGci[0] = replicaPtr.p->maxGciCompleted[maxLcpIndex];
+    req->lastGci[0] = restorableGCI;
+    sendSignal(ref, GSN_START_FRAGREQ, signal, 
+	       StartFragReq::SignalLength, JBB);
+  }
+}
+
+void
+Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr)
+{
+  takeOverPtr.p->toCurrentTabref = 0;
+  takeOverPtr.p->toCurrentFragid = 0;
+  sendSTART_RECREQ(signal, takeOverPtr.p->toStartingNode);
+}
+
 void Dbdih::initStartTakeOver(const StartToReq * req, 
 			      TakeOverRecordPtr takeOverPtr)
 {
@@ -3019,6 +3173,14 @@
     /*---------------------------------------------------------------------- */
     FragmentstorePtr fragPtr;
     getFragstore(tabPtr.p, fragId, fragPtr);
+    Uint32 gci = 0;
+    if (takeOverPtr.p->toNodeRestart)
+    {
+      ReplicaRecordPtr replicaPtr;
+      findReplica(replicaPtr, fragPtr.p, takeOverPtr.p->toStartingNode, true);
+      gci = replicaPtr.p->lcpIdStarted;
+      replicaPtr.p->lcpIdStarted = 0;
+    }
     takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
     BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toCopyNode);
     CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
@@ -3029,6 +3191,7 @@
     copyFragReq->nodeId = takeOverPtr.p->toStartingNode;
     copyFragReq->schemaVersion = tabPtr.p->schemaVersion;
     copyFragReq->distributionKey = fragPtr.p->distributionKey;
+    copyFragReq->gci = gci;
     sendSignal(ref, GSN_COPY_FRAGREQ, signal, CopyFragReq::SignalLength, JBB);
   } else {
     ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COMMIT_CREATE);
@@ -3550,6 +3713,7 @@
   /*     WE ALSO COPY TO OUR OWN NODE. TO ENABLE US TO DO THIS PROPERLY WE   */
   /*     START BY CLOSING THIS FILE.                                         */
   /* ----------------------------------------------------------------------- */
+  globalData.m_restart_seq = ++SYSFILE->m_restart_seq;
   closeFile(signal, filePtr);
   filePtr.p->reqStatus = FileRecord::CLOSING_GCP;
 }//Dbdih::readingGcpLab()
@@ -4068,6 +4232,8 @@
 						  Uint32 takeOverPtrI)
 {
   jam();
+  ndbout_c("checkTakeOverInMasterStartNodeFailure %x",
+	   takeOverPtrI);
   if (takeOverPtrI == RNIL) {
     jam();
     return;
@@ -4081,6 +4247,9 @@
   takeOverPtr.i = takeOverPtrI;
   ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
 
+  ndbout_c("takeOverPtr.p->toMasterStatus: %x", 
+	   takeOverPtr.p->toMasterStatus);
+  
   bool ok = false;
   switch (takeOverPtr.p->toMasterStatus) {
   case TakeOverRecord::IDLE:
@@ -4189,6 +4358,13 @@
     //-----------------------------------------------------------------------
     endTakeOver(takeOverPtr.i);
     break;
+
+  case TakeOverRecord::STARTING_LOCAL_FRAGMENTS:
+    ok = true;
+    jam();
+    endTakeOver(takeOverPtr.i);
+    break;
+    
     /**
      * The following are states that it should not be possible to "be" in
      */
@@ -5601,11 +5777,9 @@
 #endif
   }
 
-  bool ok = false;
   MasterLCPConf::State lcpState;
   switch (c_lcpState.lcpStatus) {
   case LCP_STATUS_IDLE:
-    ok = true;
     jam();
     /*------------------------------------------------*/
     /*       LOCAL CHECKPOINT IS CURRENTLY NOT ACTIVE */
@@ -5616,7 +5790,6 @@
     lcpState = MasterLCPConf::LCP_STATUS_IDLE;
     break;
   case LCP_STATUS_ACTIVE:
-    ok = true;
     jam();
     /*--------------------------------------------------*/
     /*       COPY OF RESTART INFORMATION HAS BEEN       */
@@ -5625,7 +5798,6 @@
     lcpState = MasterLCPConf::LCP_STATUS_ACTIVE;
     break;
   case LCP_TAB_COMPLETED:
-    ok = true;
     jam();
     /*--------------------------------------------------------*/
     /*       ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR         */
@@ -5635,7 +5807,6 @@
     lcpState = MasterLCPConf::LCP_TAB_COMPLETED;
     break;
   case LCP_TAB_SAVED:
-    ok = true;
     jam();
     /*--------------------------------------------------------*/
     /*       ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR         */
@@ -5659,15 +5830,15 @@
     break;
   case LCP_COPY_GCI:
   case LCP_INIT_TABLES:
-    ok = true;
     /**
      * These two states are handled by if statements above
      */
     ndbrequire(false);
     lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
     break;
+  default:
+    ndbrequire(false);
   }//switch
-  ndbrequire(ok);
 
   Uint32 failedNodeId = c_lcpState.m_MASTER_LCPREQ_FailedNodeId;
   MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
@@ -6265,96 +6436,146 @@
   3.7.1   A D D   T A B L E   M A I N L Y
   ***************************************
   */
-void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal){
+
+static inline void inc_node_or_group(Uint32 &node, Uint32 max_node)
+{
+  Uint32 next = node + 1;
+  node = (next == max_node ? 0 : next);
+}
+
+/*
+  Spread fragments in backwards compatible mode
+*/
+static void set_default_node_groups(Signal *signal, Uint32 noFrags)
+{
+  Uint16 *node_group_array = (Uint16*)&signal->theData[25];
+  Uint32 i;
+  node_group_array[0] = 0;
+  for (i = 1; i < noFrags; i++)
+    node_group_array[i] = UNDEF_NODEGROUP;
+}
+void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal)
+{
+  Uint16 node_group_id[MAX_NDB_PARTITIONS];
   jamEntry();
   CreateFragmentationReq * const req = 
     (CreateFragmentationReq*)signal->getDataPtr();
   
   const Uint32 senderRef = req->senderRef;
   const Uint32 senderData = req->senderData;
-  const Uint32 fragmentNode = req->fragmentNode;
-  const Uint32 fragmentType = req->fragmentationType;
-  //const Uint32 fragmentCount = req->noOfFragments;
+  Uint32 noOfFragments = req->noOfFragments;
+  const Uint32 fragType = req->fragmentationType;
   const Uint32 primaryTableId = req->primaryTableId;
 
   Uint32 err = 0;
   
   do {
-    Uint32 noOfFragments = 0;
-    Uint32 noOfReplicas = cnoReplicas;
-    switch(fragmentType){
-    case DictTabInfo::AllNodesSmallTable:
-      jam();
-      noOfFragments = csystemnodes;
-      break;
-    case DictTabInfo::AllNodesMediumTable:
-      jam();
-      noOfFragments = 2 * csystemnodes;
-      break;
-    case DictTabInfo::AllNodesLargeTable:
-      jam();
-      noOfFragments = 4 * csystemnodes;
-      break;
-    case DictTabInfo::SingleFragment:
-      jam();
-      noOfFragments = 1;
-      break;
-#if 0
-    case DictTabInfo::SpecifiedFragmentCount:
-      noOfFragments = (fragmentCount == 0 ? 1 : (fragmentCount + 1)/ 2);
-      break;
-#endif
-    default:
-      jam();
-      err = CreateFragmentationRef::InvalidFragmentationType;
-      break;
-    }
-    if(err)
-      break;
-   
     NodeGroupRecordPtr NGPtr;
     TabRecordPtr primTabPtr;
+    Uint32 count = 2;
+    Uint16 noOfReplicas = cnoReplicas;
+    Uint16 *fragments = (Uint16*)(signal->theData+25);
     if (primaryTableId == RNIL) {
-      if(fragmentNode == 0){
-        jam();
-        NGPtr.i = 0; 
-	if(noOfFragments < csystemnodes)
-	{
-	  NGPtr.i = c_nextNodeGroup; 
-	  c_nextNodeGroup = (NGPtr.i + 1 == cnoOfNodeGroups ? 0 : NGPtr.i + 1);
-	}
-      } else if(! (fragmentNode < MAX_NDB_NODES)) {
-        jam();
-        err = CreateFragmentationRef::InvalidNodeId;
-      } else {
-        jam();
-        const Uint32 stat = Sysfile::getNodeStatus(fragmentNode,
-                                                   SYSFILE->nodeStatus);
-        switch (stat) {
-        case Sysfile::NS_Active:
-        case Sysfile::NS_ActiveMissed_1:
-        case Sysfile::NS_ActiveMissed_2:
-        case Sysfile::NS_TakeOver:
+      jam();
+      switch ((DictTabInfo::FragmentType)fragType)
+      {
+        /*
+          Backward compatability and for all places in code not changed.
+        */
+        case DictTabInfo::AllNodesSmallTable:
           jam();
+          noOfFragments = csystemnodes;
+          set_default_node_groups(signal, noOfFragments);
           break;
-        case Sysfile::NS_NotActive_NotTakenOver:
+        case DictTabInfo::AllNodesMediumTable:
           jam();
+          noOfFragments = 2 * csystemnodes;
+          set_default_node_groups(signal, noOfFragments);
           break;
-        case Sysfile::NS_HotSpare:
+        case DictTabInfo::AllNodesLargeTable:
           jam();
-        case Sysfile::NS_NotDefined:
+          noOfFragments = 4 * csystemnodes;
+          set_default_node_groups(signal, noOfFragments);
+          break;
+        case DictTabInfo::SingleFragment:
+          jam();
+          noOfFragments = 1;
+          set_default_node_groups(signal, noOfFragments);
+          break;
+        case DictTabInfo::DistrKeyHash:
           jam();
+        case DictTabInfo::DistrKeyLin:
+          jam();
+          if (noOfFragments == 0)
+          {
+            jam();
+            noOfFragments = csystemnodes;
+            set_default_node_groups(signal, noOfFragments);
+          }
+          break;
         default:
           jam();
-          err = CreateFragmentationRef::InvalidNodeType;
+          if (noOfFragments == 0)
+          {
+            jam();
+            err = CreateFragmentationRef::InvalidFragmentationType;
+          }
           break;
+      }
+      if (err)
+        break;
+      /*
+        When we come here the the exact partition is specified
+        and there is an array of node groups sent along as well.
+      */
+      memcpy(&node_group_id[0], &signal->theData[25], 2 * noOfFragments);
+      Uint16 next_replica_node[MAX_NDB_NODES];
+      memset(next_replica_node,0,sizeof(next_replica_node));
+      Uint32 default_node_group= c_nextNodeGroup;
+      for(Uint32 fragNo = 0; fragNo < noOfFragments; fragNo++)
+      {
+        jam();
+        NGPtr.i = node_group_id[fragNo];
+        if (NGPtr.i == UNDEF_NODEGROUP)
+        {
+          jam();
+	  NGPtr.i = default_node_group; 
         }
-        if(err)
+        if (NGPtr.i > cnoOfNodeGroups)
+        {
+          jam();
+          err = CreateFragmentationRef::InvalidNodeGroup;
           break;
-        NGPtr.i = Sysfile::getNodeGroup(fragmentNode,
-                                        SYSFILE->nodeGroups);
+        }
+        ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
+        const Uint32 max = NGPtr.p->nodeCount;
+	
+	Uint32 tmp= next_replica_node[NGPtr.i];
+        for(Uint32 replicaNo = 0; replicaNo < noOfReplicas; replicaNo++)
+        {
+          jam();
+          const Uint16 nodeId = NGPtr.p->nodesInGroup[tmp];
+          fragments[count++]= nodeId;
+          inc_node_or_group(tmp, max);
+        }
+        inc_node_or_group(tmp, max);
+	next_replica_node[NGPtr.i]= tmp;
+	
+        /**
+         * Next node group for next fragment
+         */
+        inc_node_or_group(default_node_group, cnoOfNodeGroups);
+      }
+      if (err)
+      {
+        jam();
         break;
       }
+      else
+      {
+        jam();
+        c_nextNodeGroup = default_node_group;
+      }
     } else {
       if (primaryTableId >= ctabFileSize) {
         jam();
@@ -6368,49 +6589,14 @@
         err = CreateFragmentationRef::InvalidPrimaryTable;
         break;
       }
-      if (noOfFragments != primTabPtr.p->totalfragments) {
-        jam();
-        err = CreateFragmentationRef::InvalidFragmentationType;
-        break;
-      }
-    }
-    
-    Uint32 count = 2;
-    Uint16 *fragments = (Uint16*)(signal->theData+25);
-    if (primaryTableId == RNIL) {
-      jam();
-      Uint8 next_replica_node[MAX_NDB_NODES];
-      memset(next_replica_node,0,sizeof(next_replica_node));
-      for(Uint32 fragNo = 0; fragNo<noOfFragments; fragNo++){
-        jam();
-        ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);      
-        const Uint32 max = NGPtr.p->nodeCount;
-	
-	Uint32 tmp= next_replica_node[NGPtr.i];
-        for(Uint32 replicaNo = 0; replicaNo<noOfReplicas; replicaNo++)
-        {
-          jam();
-          const Uint32 nodeId = NGPtr.p->nodesInGroup[tmp++];
-          fragments[count++] = nodeId;
-          tmp = (tmp >= max ? 0 : tmp);
-        }
-	tmp++;
-	next_replica_node[NGPtr.i]= (tmp >= max ? 0 : tmp);
-	
-        /**
-         * Next node group for next fragment
-         */
-        NGPtr.i++;
-        NGPtr.i = (NGPtr.i == cnoOfNodeGroups ? 0 : NGPtr.i);
-      }
-    } else {
+      noOfFragments= primTabPtr.p->totalfragments;
       for (Uint32 fragNo = 0;
-           fragNo < primTabPtr.p->totalfragments; fragNo++) {
+           fragNo < noOfFragments; fragNo++) {
         jam();
         FragmentstorePtr fragPtr;
         ReplicaRecordPtr replicaPtr;
         getFragstore(primTabPtr.p, fragNo, fragPtr);
-        fragments[count++] = fragPtr.p->preferredPrimary;
+        fragments[count++]= fragPtr.p->preferredPrimary;
         for (replicaPtr.i = fragPtr.p->storedReplicas;
              replicaPtr.i != RNIL;
              replicaPtr.i = replicaPtr.p->nextReplica) {
@@ -6418,9 +6604,9 @@
           ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
           if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
             jam();
-            fragments[count++] = replicaPtr.p->procNode;
-          }//if
-        }//for
+            fragments[count++]= replicaPtr.p->procNode;
+          }
+        }
         for (replicaPtr.i = fragPtr.p->oldStoredReplicas;
              replicaPtr.i != RNIL;
              replicaPtr.i = replicaPtr.p->nextReplica) {
@@ -6428,25 +6614,26 @@
           ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
           if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
             jam();
-            fragments[count++] = replicaPtr.p->procNode;
-          }//if
-        }//for
+            fragments[count++]= replicaPtr.p->procNode;
+          }
+        }
       }
     }
-    ndbrequire(count == (2 + noOfReplicas * noOfFragments)); 
+    ndbrequire(count == (2U + noOfReplicas * noOfFragments)); 
     
     CreateFragmentationConf * const conf = 
       (CreateFragmentationConf*)signal->getDataPtrSend();
     conf->senderRef = reference();
     conf->senderData = senderData;
-    conf->noOfReplicas = noOfReplicas;
-    conf->noOfFragments = noOfFragments;
+    conf->noOfReplicas = (Uint32)noOfReplicas;
+    conf->noOfFragments = (Uint32)noOfFragments;
 
-    fragments[0] = noOfReplicas;
-    fragments[1] = noOfFragments;
+    fragments[0]= noOfReplicas;
+    fragments[1]= noOfFragments;
     
     if(senderRef != 0)
     {
+      jam();
       LinearSectionPtr ptr[3];
       ptr[0].p = (Uint32*)&fragments[0];
       ptr[0].sz = (count + 1) / 2;
@@ -6458,33 +6645,17 @@
 		 ptr,
 		 1);
     }
-    else
-    {
-      // Execute direct
-      signal->theData[0] = 0;
-    }
+    // Always ACK/NACK (here ACK)
+    signal->theData[0] = 0;
     return;
   } while(false);
-
-  if(senderRef != 0)
-  {
-    CreateFragmentationRef * const ref = 
-      (CreateFragmentationRef*)signal->getDataPtrSend();
-    ref->senderRef = reference();
-    ref->senderData = senderData;
-    ref->errorCode = err;
-    sendSignal(senderRef, GSN_CREATE_FRAGMENTATION_REF, signal, 
-	       CreateFragmentationRef::SignalLength, JBB);
-  }
-  else
-  {
-    // Execute direct
-    signal->theData[0] = err;
-  }
+  // Always ACK/NACK (here NACK)
+  signal->theData[0] = err;
 }
 
 void Dbdih::execDIADDTABREQ(Signal* signal) 
 {
+  Uint32 fragType;
   jamEntry();
 
   DiAddTabReq * const req = (DiAddTabReq*)signal->getDataPtr();
@@ -6509,6 +6680,7 @@
   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
   tabPtr.p->connectrec = connectPtr.i;
   tabPtr.p->tableType = req->tableType;
+  fragType= req->fragType;
   tabPtr.p->schemaVersion = req->schemaVersion;
   tabPtr.p->primaryTableId = req->primaryTableId;
 
@@ -6545,9 +6717,33 @@
   /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
   tabPtr.p->tabStatus = TabRecord::TS_CREATING;
   tabPtr.p->storedTable = req->storedTable;
-  tabPtr.p->method = TabRecord::HASH;
   tabPtr.p->kvalue = req->kValue;
 
+  switch ((DictTabInfo::FragmentType)fragType)
+  {
+    case DictTabInfo::AllNodesSmallTable:
+    case DictTabInfo::AllNodesMediumTable:
+    case DictTabInfo::AllNodesLargeTable:
+    case DictTabInfo::SingleFragment:
+      jam();
+    case DictTabInfo::DistrKeyLin:
+      jam();
+      tabPtr.p->method= TabRecord::LINEAR_HASH;
+      break;
+    case DictTabInfo::DistrKeyHash:
+    case DictTabInfo::DistrKeyUniqueHashIndex:
+    case DictTabInfo::DistrKeyOrderedIndex:
+      jam();
+      tabPtr.p->method= TabRecord::NORMAL_HASH;
+      break;
+    case DictTabInfo::UserDefined:
+      jam();
+      tabPtr.p->method= TabRecord::USER_DEFINED;
+      break;
+    default:
+      ndbrequire(false);
+  }
+
   union {
     Uint16 fragments[2 + MAX_FRAG_PER_NODE*MAX_REPLICAS*MAX_NDB_NODES];
     Uint32 align;
@@ -6597,6 +6793,8 @@
     Uint32 activeIndex = 0;
     getFragstore(tabPtr.p, fragId, fragPtr);
     fragPtr.p->preferredPrimary = fragments[index];
+    fragPtr.p->m_log_part_id = c_nextLogPart++;
+    
     for (Uint32 i = 0; i<noReplicas; i++) {
       const Uint32 nodeId = fragments[index++];
       ReplicaRecordPtr replicaPtr;
@@ -6641,9 +6839,9 @@
   jam();
   const Uint32 fragCount = tabPtr.p->totalfragments;
   ReplicaRecordPtr replicaPtr; replicaPtr.i = RNIL;
+  FragmentstorePtr fragPtr;
   for(; fragId<fragCount; fragId++){
     jam();
-    FragmentstorePtr fragPtr;
     getFragstore(tabPtr.p, fragId, fragPtr);    
     
     replicaPtr.i = fragPtr.p->storedReplicas;
@@ -6701,6 +6899,7 @@
     req->nodeId = getOwnNodeId();
     req->totalFragments = fragCount;
     req->startGci = SYSFILE->newestRestorableGCI;
+    req->logPartId = fragPtr.p->m_log_part_id;
     sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal, 
 	       AddFragReq::SignalLength, JBB);
     return;
@@ -6982,17 +7181,40 @@
   tabPtr.i = req->tableId;
   Uint32 hashValue = req->hashValue;
   Uint32 ttabFileSize = ctabFileSize;
+  Uint32 fragId;
+  DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
   TabRecord* regTabDesc = tabRecord;
   jamEntry();
   ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc);
-  Uint32 fragId = hashValue & tabPtr.p->mask;
-  ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
-  if (fragId < tabPtr.p->hashpointer) {
+  if (tabPtr.p->method == TabRecord::LINEAR_HASH)
+  {
     jam();
-    fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
-  }//if
+    fragId = hashValue & tabPtr.p->mask;
+    ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
+    if (fragId < tabPtr.p->hashpointer) {
+      jam();
+      fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
+    }//if
+  }
+  else if (tabPtr.p->method == TabRecord::NORMAL_HASH)
+  {
+    jam();
+    fragId= hashValue % tabPtr.p->totalfragments;
+  }
+  else
+  {
+    jam();
+    ndbassert(tabPtr.p->method == TabRecord::USER_DEFINED);
+    fragId= hashValue;
+    if (fragId >= tabPtr.p->totalfragments)
+    {
+      jam();
+      conf->zero= 1; //Indicate error;
+      signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
+      return;
+    }
+  }
   getFragstore(tabPtr.p, fragId, fragPtr);
-  DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
   Uint32 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes);
   Uint32 sig2 = (nodeCount - 1) + 
     (fragPtr.p->distributionKey << 16);
@@ -7159,39 +7381,66 @@
 
 void Dbdih::execDI_FCOUNTREQ(Signal* signal) 
 {
+  DihFragCountReq * const req = (DihFragCountReq*)signal->getDataPtr();
   ConnectRecordPtr connectPtr;
   TabRecordPtr tabPtr;
+  const BlockReference senderRef = signal->senderBlockRef();
+  const Uint32 senderData = req->m_senderData;
   jamEntry();
-  connectPtr.i = signal->theData[0];
-  tabPtr.i = signal->theData[1];
+  connectPtr.i = req->m_connectionData;
+  tabPtr.i = req->m_tableRef;
   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
 
-  ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
+  if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
+  {
+    DihFragCountRef* ref = (DihFragCountRef*)signal->getDataPtrSend();
+    //connectPtr.i == RNIL -> question without connect record
+    if(connectPtr.i == RNIL)
+      ref->m_connectionData = RNIL;
+    else
+      ref->m_connectionData = connectPtr.p->userpointer;
+    ref->m_tableRef = tabPtr.i;
+    ref->m_senderData = senderData;
+    ref->m_error = DihFragCountRef::ErroneousTableState;
+    ref->m_tableStatus = tabPtr.p->tabStatus;
+    sendSignal(senderRef, GSN_DI_FCOUNTREF, signal, 
+               DihFragCountRef::SignalLength, JBB);
+    return;
+  }
 
   if(connectPtr.i != RNIL){
     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
     if (connectPtr.p->connectState == ConnectRecord::INUSE) {
       jam();
-      signal->theData[0] = connectPtr.p->userpointer;
-      signal->theData[1] = tabPtr.p->totalfragments;
-      sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTCONF, signal,2, JBB);
-      return;
-    }//if
-    signal->theData[0] = connectPtr.p->userpointer;
-    signal->theData[1] = ZERRONOUSSTATE;
-    sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTREF, signal, 2, JBB);
+      DihFragCountConf* conf = (DihFragCountConf*)signal->getDataPtrSend();
+      conf->m_connectionData = connectPtr.p->userpointer;
+      conf->m_tableRef = tabPtr.i;
+      conf->m_senderData = senderData;
+      conf->m_fragmentCount = tabPtr.p->totalfragments;
+      conf->m_noOfBackups = tabPtr.p->noOfBackups;
+      sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTCONF, signal,
+                 DihFragCountConf::SignalLength, JBB);
+      return;
+    }//if
+    DihFragCountRef* ref = (DihFragCountRef*)signal->getDataPtrSend();
+    ref->m_connectionData = connectPtr.p->userpointer;
+    ref->m_tableRef = tabPtr.i;
+    ref->m_senderData = senderData;
+    ref->m_error = DihFragCountRef::ErroneousTableState;
+    ref->m_tableStatus = tabPtr.p->tabStatus;
+    sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTREF, signal, 
+               DihFragCountRef::SignalLength, JBB);
     return;
   }//if
-
+  DihFragCountConf* conf = (DihFragCountConf*)signal->getDataPtrSend();
   //connectPtr.i == RNIL -> question without connect record
-  const Uint32 senderData = signal->theData[2];
-  const BlockReference senderRef = signal->senderBlockRef();
-  signal->theData[0] = RNIL;
-  signal->theData[1] = tabPtr.p->totalfragments;
-  signal->theData[2] = tabPtr.i;
-  signal->theData[3] = senderData;
-  signal->theData[4] = tabPtr.p->noOfBackups;
-  sendSignal(senderRef, GSN_DI_FCOUNTCONF, signal, 5, JBB);
+  conf->m_connectionData = RNIL;
+  conf->m_tableRef = tabPtr.i;
+  conf->m_senderData = senderData;
+  conf->m_fragmentCount = tabPtr.p->totalfragments;
+  conf->m_noOfBackups = tabPtr.p->noOfBackups;
+  sendSignal(senderRef, GSN_DI_FCOUNTCONF, signal, 
+             DihFragCountConf::SignalLength, JBB);
 }//Dbdih::execDI_FCOUNTREQ()
 
 void Dbdih::execDIGETPRIMREQ(Signal* signal) 
@@ -7950,9 +8199,12 @@
     
     SubGcpCompleteRep * const rep = (SubGcpCompleteRep*)signal->getDataPtr();
     rep->gci = coldgcp;
-    rep->senderData = 0;
     sendSignal(SUMA_REF, GSN_SUB_GCP_COMPLETE_REP, signal, 
 	       SubGcpCompleteRep::SignalLength, JBB);
+
+    EXECUTE_DIRECT(LGMAN, GSN_SUB_GCP_COMPLETE_REP, signal, 
+		   SubGcpCompleteRep::SignalLength);
+    jamEntry();
   }
   
   jam();
@@ -8519,8 +8771,7 @@
   rf.rwfTabPtr.p->hashpointer = readPageWord(&rf);
   rf.rwfTabPtr.p->kvalue = readPageWord(&rf);
   rf.rwfTabPtr.p->mask = readPageWord(&rf);
-  ndbrequire(readPageWord(&rf) == TabRecord::HASH);
-  rf.rwfTabPtr.p->method = TabRecord::HASH;
+  rf.rwfTabPtr.p->method = (TabRecord::Method)readPageWord(&rf);
   /* ---------------------------------- */
   /* Type of table, 2 = temporary table */
   /* ---------------------------------- */
@@ -8614,7 +8865,7 @@
   writePageWord(&wf, tabPtr.p->hashpointer);
   writePageWord(&wf, tabPtr.p->kvalue);
   writePageWord(&wf, tabPtr.p->mask);
-  writePageWord(&wf, TabRecord::HASH);
+  writePageWord(&wf, tabPtr.p->method);
   writePageWord(&wf, tabPtr.p->storedTable);
 
   signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
@@ -8715,6 +8966,80 @@
 /*****************************************************************************/
 /* **********     START FRAGMENT MODULE                          *************/
 /*****************************************************************************/
+void
+Dbdih::dump_replica_info()
+{
+  TabRecordPtr tabPtr;
+  FragmentstorePtr fragPtr;
+
+  for(tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
+  {
+    ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
+    if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
+      continue;
+    
+    for(Uint32 fid = 0; fid<tabPtr.p->totalfragments; fid++)
+    {
+      getFragstore(tabPtr.p, fid, fragPtr);
+      ndbout_c("tab: %d frag: %d gci: %d\n  -- storedReplicas:", 
+	       tabPtr.i, fid, SYSFILE->newestRestorableGCI);
+      
+      Uint32 i;
+      ReplicaRecordPtr replicaPtr;
+      replicaPtr.i = fragPtr.p->storedReplicas;
+      for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
+      {
+	ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
+	ndbout_c("  node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
+		 replicaPtr.p->procNode,
+		 replicaPtr.p->initialGci,
+		 replicaPtr.p->nextLcp,
+		 replicaPtr.p->noCrashedReplicas);
+	for(i = 0; i<MAX_LCP_STORED; i++)
+	{
+	  ndbout_c("    i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
+		   i, 
+		   (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
+		   replicaPtr.p->lcpId[i],
+		   replicaPtr.p->maxGciCompleted[i],
+		   replicaPtr.p->maxGciStarted[i]);
+	}
+	
+	for (i = 0; i < 8; i++)
+	{
+	  ndbout_c("    crashed replica: %d replicaLastGci: %d createGci: %d",
+		   i, 
+		   replicaPtr.p->replicaLastGci[i],
+		   replicaPtr.p->createGci[i]);
+	}
+      }
+      ndbout_c("  -- oldStoredReplicas");
+      replicaPtr.i = fragPtr.p->oldStoredReplicas;
+      for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
+      {
+	ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
+	for(i = 0; i<MAX_LCP_STORED; i++)
+	{
+	  ndbout_c("    i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
+		   i, 
+		   (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
+		   replicaPtr.p->lcpId[i],
+		   replicaPtr.p->maxGciCompleted[i],
+		   replicaPtr.p->maxGciStarted[i]);
+	}
+	
+	for (i = 0; i < 8; i++)
+	{
+	  ndbout_c("    crashed replica: %d replicaLastGci: %d createGci: %d",
+		   i, 
+		   replicaPtr.p->replicaLastGci[i],
+		   replicaPtr.p->createGci[i]);
+	}
+      }
+    }
+  }
+}
+
 void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId) 
 {
   Uint32 TloopCount = 0;
@@ -8776,6 +9101,7 @@
   /*     SEARCH FOR STORED REPLICAS THAT CAN BE USED TO RESTART THE SYSTEM.  */
   /* ----------------------------------------------------------------------- */
   searchStoredReplicas(fragPtr);
+
   if (cnoOfCreateReplicas == 0) {
     /* --------------------------------------------------------------------- */
     /*   THERE WERE NO STORED REPLICAS AVAILABLE THAT CAN SERVE AS REPLICA TO*/
@@ -8788,6 +9114,10 @@
     char buf[64];
     BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d",
 			 tableId, fragId, SYSFILE->newestRestorableGCI);
+
+    ndbout_c(buf);
+    dump_replica_info();
+    
     progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf);
     ndbrequire(false);
     return;
@@ -8864,8 +9194,8 @@
     // otherwise we have a problem.
     /* --------------------------------------------------------------------- */
     jam();
-    ndbrequire(senderNodeId == c_nodeStartMaster.startNode);
-    nodeRestartStartRecConfLab(signal);
+    ndbout_c("startNextCopyFragment");
+    startNextCopyFragment(signal, findTakeOver(senderNodeId));
     return;
   } else {
     /* --------------------------------------------------------------------- */
@@ -9883,9 +10213,11 @@
 }
 
 void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, 
-			Fragmentstore* fragPtrP, Uint32 nodeId)
+			Fragmentstore* fragPtrP, 
+			Uint32 nodeId,
+			bool old)
 {
-  replicaPtr.i = fragPtrP->storedReplicas;
+  replicaPtr.i = old ? fragPtrP->oldStoredReplicas : fragPtrP->storedReplicas;
   while(replicaPtr.i != RNIL){
     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
     if (replicaPtr.p->procNode == nodeId) {
@@ -11138,6 +11470,7 @@
   cnoHotSpare = 0;
   cnoOfActiveTables = 0;
   cnoOfNodeGroups = 0;
+  c_nextNodeGroup = 0;
   cnoReplicas = 0;
   coldgcp = 0;
   coldGcpId = 0;
@@ -11157,6 +11490,7 @@
   c_newest_restorable_gci = 0;
   cverifyQueueCounter = 0;
   cwaitLcpSr = false;
+  c_nextLogPart = 0;
 
   nodeResetStart();
   c_nodeStartMaster.wait = ZFALSE;
@@ -11164,7 +11498,7 @@
   memset(&sysfileData[0], 0, sizeof(sysfileData));
 
   const ndb_mgm_configuration_iterator * p = 
-    theConfiguration.getOwnConfigIterator();
+    m_ctx.m_config.getOwnConfigIterator();
   ndbrequire(p != 0);
   
   c_lcpState.clcpDelay = 20;
@@ -11243,6 +11577,8 @@
     SYSFILE->takeOver[i] = 0;
   }//for
   Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
+  srand(time(0));
+  globalData.m_restart_seq = SYSFILE->m_restart_seq = 0;
 }//Dbdih::initRestartInfo()
 
 /*--------------------------------------------------------------------*/
@@ -11961,6 +12297,8 @@
     jam();
     fragPtr.p->distributionKey = TdistKey;
   }//if
+
+  fragPtr.p->m_log_part_id = readPageWord(rf);
 }//Dbdih::readFragment()
 
 Uint32 Dbdih::readPageWord(RWFragment* rf) 
@@ -13060,6 +13398,7 @@
   writePageWord(wf, fragPtr.p->noStoredReplicas);
   writePageWord(wf, fragPtr.p->noOldStoredReplicas);
   writePageWord(wf, fragPtr.p->distributionKey);
+  writePageWord(wf, fragPtr.p->m_log_part_id);
 }//Dbdih::writeFragment()
 
 void Dbdih::writePageWord(RWFragment* wf, Uint32 dataWord)
@@ -13126,7 +13465,7 @@
   signal->theData[0] = filePtr.p->fileRef;
   signal->theData[1] = reference();
   signal->theData[2] = filePtr.i;
-  signal->theData[3] = ZLIST_OF_PAIRS;
+  signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
   signal->theData[4] = ZVAR_NO_WORD;
   signal->theData[5] = tab->noPages;
   for (Uint32 i = 0; i < tab->noPages; i++) {
@@ -13489,7 +13828,7 @@
     if (signal->getLength() == 1)
     {
       const ndb_mgm_configuration_iterator * p = 
-	theConfiguration.getOwnConfigIterator();
+	m_ctx.m_config.getOwnConfigIterator();
       ndbrequire(p != 0);
       ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay);
     }

--- 1.73.20.1/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-04-04 11:50:44 +02:00
+++ 1.108/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-04-05 14:58:26 +02:00
@@ -38,6 +38,7 @@
 #include <signaldata/TcContinueB.hpp>
 #include <signaldata/TcKeyFailConf.hpp>
 #include <signaldata/AbortAll.hpp>
+#include <signaldata/DihFragCount.hpp>
 #include <signaldata/ScanFrag.hpp>
 #include <signaldata/ScanTab.hpp>
 #include <signaldata/PrepDropTab.hpp>
@@ -344,7 +345,7 @@
   tabptr.p->noOfKeyAttr = desc->noOfKeyAttr;
   tabptr.p->hasCharAttr = desc->hasCharAttr;
   tabptr.p->noOfDistrKeys = desc->noOfDistrKeys;
-  
+  tabptr.p->hasVarKeys = desc->noOfVarKeys > 0;
   signal->theData[0] = tabptr.i;
   signal->theData[1] = retPtr;
   sendSignal(retRef, GSN_TC_SCHVERCONF, signal, 2, JBB);
@@ -610,7 +611,7 @@
   jamEntry();
   
   const ndb_mgm_configuration_iterator * p = 
-    theConfiguration.getOwnConfigIterator();
+    m_ctx.m_config.getOwnConfigIterator();
   ndbrequire(p != 0);
   
   initData();
@@ -2306,14 +2307,15 @@
 {
   Uint64 Tmp[MAX_KEY_SIZE_IN_WORDS * MAX_XFRM_MULTIPLY];
   const TableRecord* tabPtrP = &tableRecord[tabPtrI];
+  const bool hasVarKeys = tabPtrP->hasVarKeys;
   const bool hasCharAttr = tabPtrP->hasCharAttr;
-  const bool hasDistKeys = tabPtrP->noOfDistrKeys > 0;
+  const bool compute_distkey = distr && (tabPtrP->noOfDistrKeys > 0);
   
   Uint32 *dst = (Uint32*)Tmp;
   Uint32 dstPos = 0;
   Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
   Uint32 * keyPartLenPtr;
-  if(hasCharAttr)
+  if(hasCharAttr || (compute_distkey && hasVarKeys))
   {
     keyPartLenPtr = keyPartLen;
     dstPos = xfrm_key(tabPtrI, src, dst, sizeof(Tmp) >> 2, keyPartLenPtr);
@@ -2331,7 +2333,7 @@
   
   md5_hash(dstHash, (Uint64*)dst, dstPos);
   
-  if(distr && hasDistKeys)
+  if(compute_distkey)
   {
     jam();
     
@@ -2740,12 +2742,14 @@
   Uint8 TDirtyFlag          = tcKeyReq->getDirtyFlag(Treqinfo);
   Uint8 TInterpretedFlag    = tcKeyReq->getInterpretedFlag(Treqinfo);
   Uint8 TDistrKeyFlag       = tcKeyReq->getDistributionKeyFlag(Treqinfo);
+  Uint8 TNoDiskFlag         = TcKeyReq::getNoDiskFlag(Treqinfo);
   Uint8 TexecuteFlag        = TexecFlag;
   
   regCachePtr->opSimple = TSimpleFlag;
   regCachePtr->opExec   = TInterpretedFlag;
   regTcPtr->dirtyOp  = TDirtyFlag;
   regCachePtr->distributionKeyIndicator = TDistrKeyFlag;
+  regCachePtr->m_no_disk_flag = TNoDiskFlag;
 
   //-------------------------------------------------------------
   // The next step is to read the upto three conditional words.
@@ -2809,17 +2813,9 @@
   regCachePtr->attrinfo15[2] = Tdata4;
   regCachePtr->attrinfo15[3] = Tdata5;
 
-  if (TOperationType == ZREAD) {
-    Uint32 TreadCount = c_counters.creadCount;
-    jam();
-    regCachePtr->opLock = 0;
-    c_counters.creadCount = TreadCount + 1;
-  } else if(TOperationType == ZREAD_EX){
+  if (TOperationType == ZREAD || TOperationType == ZREAD_EX) {
     Uint32 TreadCount = c_counters.creadCount;
     jam();
-    TOperationType = ZREAD;
-    regTcPtr->operation = ZREAD;
-    regCachePtr->opLock = ZUPDATE;
     c_counters.creadCount = TreadCount + 1;
   } else {
     if(regApiPtr->commitAckMarker == RNIL){
@@ -2853,24 +2849,10 @@
     c_counters.cwriteCount = TwriteCount + 1;
     switch (TOperationType) {
     case ZUPDATE:
-      jam();
-      if (TattrLen == 0) {
-        //TCKEY_abort(signal, 5);
-        //return;
-      }//if
-      /*---------------------------------------------------------------------*/
-      // The missing break is intentional since we also want to set the opLock 
-      // variable also for updates
-      /*---------------------------------------------------------------------*/
     case ZINSERT:
     case ZDELETE:
-      jam();      
-      regCachePtr->opLock = TOperationType;
-      break;
     case ZWRITE:
       jam();
-      // A write operation is originally an insert operation.
-      regCachePtr->opLock = ZINSERT;  
       break;
     default:
       TCKEY_abort(signal, 9);
@@ -3045,7 +3027,7 @@
   tnoOfStandby = (tnodeinfo >> 8) & 3;
  
   regCachePtr->fragmentDistributionKey = (tnodeinfo >> 16) & 255;
-  if (Toperation == ZREAD) {
+  if (Toperation == ZREAD || Toperation == ZREAD_EX) {
     if (Tdirty == 1) {
       jam();
       /*-------------------------------------------------------------*/
@@ -3174,6 +3156,7 @@
   TcConnectRecord * const regTcPtr = tcConnectptr.p;
   ApiConnectRecord * const regApiPtr = apiConnectptr.p;
   CacheRecord * const regCachePtr = cachePtr.p;
+  Uint32 version = getNodeInfo(refToNode(TBRef)).m_version;
   UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
 #ifdef ERROR_INSERT
   if (ERROR_INSERTED(8002)) {
@@ -3217,7 +3200,12 @@
   bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
   LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
   LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
-  LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
+  if (unlikely(version < NDBD_ROWID_VERSION))
+  {
+    Uint32 op = regTcPtr->operation;
+    Uint32 lock = op == ZREAD_EX ? ZUPDATE : op == ZWRITE ? ZINSERT : op;
+    LqhKeyReq::setLockType(Tdata10, lock);
+  }
   /* ---------------------------------------------------------------------- */
   // Indicate Application Reference is present in bit 15
   /* ---------------------------------------------------------------------- */
@@ -3226,6 +3214,8 @@
   LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
   LqhKeyReq::setSimpleFlag(Tdata10, sig0);
   LqhKeyReq::setOperation(Tdata10, sig1);
+  LqhKeyReq::setNoDiskFlag(Tdata10, regCachePtr->m_no_disk_flag);
+
   /* ----------------------------------------------------------------------- 
    * Sequential Number of first LQH = 0, bit 22-23                           
    * IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@@ -3943,7 +3933,7 @@
   const UintR TopWords = (UintR)regApiPtr->tckeyrec;
   localHostptr.i = refToNode(regApiPtr->ndbapiBlockref);
   const Uint32 type = getNodeInfo(localHostptr.i).m_type;
-  const bool is_api = (type >= NodeInfo::API && type <= NodeInfo::REP);
+  const bool is_api = (type >= NodeInfo::API && type <= NodeInfo::MGM);
   const BlockNumber TblockNum = refToBlock(regApiPtr->ndbapiBlockref);
   const Uint32 Tmarker = (regApiPtr->commitAckMarker == RNIL) ? 0 : 1;
   ptrAss(localHostptr, hostRecord);
@@ -4625,7 +4615,8 @@
     commitConf->transId1 = regApiPtr->transid[0];
     commitConf->transId2 = regApiPtr->transid[1];
     commitConf->gci = regApiPtr->globalcheckpointid;
-    sendSignal(regApiPtr->ndbapiBlockref, GSN_TC_COMMITCONF, signal, 
+
+    sendSignal(regApiPtr->ndbapiBlockref, GSN_TC_COMMITCONF, signal,
 	       TcCommitConf::SignalLength, JBB);
   } else if (regApiPtr->returnsignal == RS_NO_RETURN) {
     jam();
@@ -4815,13 +4806,14 @@
   key.transid2 = signal->theData[1];
 
   CommitAckMarkerPtr removedMarker;
-  m_commitAckMarkerHash.release(removedMarker, key);
+  m_commitAckMarkerHash.remove(removedMarker, key);
   if (removedMarker.i == RNIL) {
     jam();
     warningHandlerLab(signal, __LINE__);
     return;
   }//if
   sendRemoveMarkers(signal, removedMarker.p);
+  m_commitAckMarkerPool.release(removedMarker);
 }
 
 void
@@ -5166,6 +5158,19 @@
 	return;
       }
       
+      /* Only ref in certain situations */
+      {
+	const Uint32 opType = regTcPtr->operation;
+	if (   (opType == ZDELETE && errCode != ZNOT_FOUND)
+	    || (opType == ZINSERT && errCode != ZALREADYEXIST)
+	    || (opType == ZUPDATE && errCode != ZNOT_FOUND)
+	    || (opType == ZWRITE  && errCode != 839 && errCode != 840))
+	{
+	  TCKEY_abort(signal, 49);
+	  return;
+	}
+      }
+
       /* *************** */
       /*    TCKEYREF   < */
       /* *************** */
@@ -8761,6 +8766,7 @@
   ScanFragReq::setDescendingFlag(tmp, ScanTabReq::getDescendingFlag(ri));
   ScanFragReq::setTupScanFlag(tmp, ScanTabReq::getTupScanFlag(ri));
   ScanFragReq::setAttrLen(tmp, scanTabReq->attrLenKeyLen & 0xFFFF);
+  ScanFragReq::setNoDiskFlag(tmp, ScanTabReq::getNoDiskFlag(ri));
   
   scanptr.p->scanRequestInfo = tmp;
   scanptr.p->scanStoredProcId = scanTabReq->storedProcId;
@@ -8880,9 +8886,11 @@
      * THE FIRST STEP TO RECEIVE IS SUCCESSFULLY COMPLETED. 
      * WE MUST FIRST GET THE NUMBER OF  FRAGMENTS IN THE TABLE.
      ***************************************************/
-    signal->theData[0] = tcConnectptr.p->dihConnectptr;
-    signal->theData[1] = scanptr.p->scanTableref;
-    sendSignal(cdihblockref, GSN_DI_FCOUNTREQ, signal, 2, JBB);
+    DihFragCountReq * const req = (DihFragCountReq*)signal->getDataPtrSend();
+    req->m_connectionData = tcConnectptr.p->dihConnectptr;
+    req->m_tableRef = scanptr.p->scanTableref;
+    sendSignal(cdihblockref, GSN_DI_FCOUNTREQ, signal, 
+               DihFragCountReq::SignalLength, JBB);
   }
   else 
   {
@@ -8893,17 +8901,18 @@
     UintR TerrorIndicator = signal->theData[0];
     jamEntry();
     if (TerrorIndicator != 0) {
-      signal->theData[0] = tcConnectptr.i;
-      //signal->theData[1] Contains error
+      DihFragCountRef * const ref = (DihFragCountRef*)signal->getDataPtr();
+      ref->m_connectionData = tcConnectptr.i;
+      ref->m_error = signal->theData[1];
       execDI_FCOUNTREF(signal);
       return;
     }
     
     UintR Tdata1 = signal->theData[1];
     scanptr.p->scanNextFragId = Tdata1;
-
-    signal->theData[0] = tcConnectptr.i;
-    signal->theData[1] = 1; // Frag count
+    DihFragCountConf * const conf = (DihFragCountConf*)signal->getDataPtr();
+    conf->m_connectionData = tcConnectptr.i;
+    conf->m_fragmentCount = 1; // Frag count
     execDI_FCOUNTCONF(signal);
   }
   return;
@@ -8921,8 +8930,9 @@
 void Dbtc::execDI_FCOUNTCONF(Signal* signal) 
 {
   jamEntry();
-  tcConnectptr.i = signal->theData[0];
-  Uint32 tfragCount = signal->theData[1];
+  DihFragCountConf * const conf = (DihFragCountConf*)signal->getDataPtr();
+  tcConnectptr.i = conf->m_connectionData;
+  Uint32 tfragCount = conf->m_fragmentCount;
   ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
   apiConnectptr.i = tcConnectptr.p->apiConnect;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9005,9 +9015,10 @@
 void Dbtc::execDI_FCOUNTREF(Signal* signal) 
 {
   jamEntry();
-  tcConnectptr.i = signal->theData[0];
+  DihFragCountRef * const ref = (DihFragCountRef*)signal->getDataPtr();
+  tcConnectptr.i = ref->m_connectionData;
   ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
-  const Uint32 errCode = signal->theData[1];
+  const Uint32 errCode = ref->m_error;
   apiConnectptr.i = tcConnectptr.p->apiConnect;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
   ScanRecordPtr scanptr;
@@ -10121,6 +10132,7 @@
     tabptr.p->noOfKeyAttr = 0;
     tabptr.p->hasCharAttr = 0;
     tabptr.p->noOfDistrKeys = 0;
+    tabptr.p->hasVarKeys = 0;
   }//for
 }//Dbtc::initTable()
 
@@ -11185,7 +11197,6 @@
   ApiConnectRecordPtr transPtr;
   TcConnectRecord *localTcConnectRecord = tcConnectRecord;
   TcConnectRecordPtr opPtr;
-
   /**
    * TODO
    * Check transid,
@@ -11199,6 +11210,7 @@
     
     c_firedTriggerHash.remove(trigPtr);
 
+    trigPtr.p->fragId= fireOrd->fragId;
     bool ok = trigPtr.p->keyValues.getSize() == fireOrd->m_noPrimKeyWords;
     ok &= trigPtr.p->afterValues.getSize() == fireOrd->m_noAfterValueWords;
     ok &= trigPtr.p->beforeValues.getSize() == fireOrd->m_noBeforeValueWords;
@@ -11410,7 +11422,7 @@
   const UintR TopWords = (UintR)regApiPtr->tcindxrec;
   localHostptr.i = refToNode(regApiPtr->ndbapiBlockref);
   const Uint32 type = getNodeInfo(localHostptr.i).m_type;
-  const bool is_api = (type >= NodeInfo::API && type <= NodeInfo::REP);
+  const bool is_api = (type >= NodeInfo::API && type <= NodeInfo::MGM);
   const BlockNumber TblockNum = refToBlock(regApiPtr->ndbapiBlockref);
   const Uint32 Tmarker = (regApiPtr->commitAckMarker == RNIL ? 0 : 1);
   ptrAss(localHostptr, hostRecord);
@@ -12098,7 +12110,11 @@
   Uint32 dataPos = 0;
   TcKeyReq * const tcIndxReq = &indexOp->tcIndxReq;
   TcKeyReq * const tcKeyReq = (TcKeyReq *)signal->getDataPtrSend();
-  Uint32 * dataPtr = &tcKeyReq->scanInfo;
+  /*
+    Data points to distrGroupHashValue since scanInfo is used to send
+    fragment id of receiving fragment
+  */
+  Uint32 * dataPtr = &tcKeyReq->distrGroupHashValue;
   Uint32 tcKeyLength = TcKeyReq::StaticLength;
   Uint32 tcKeyRequestInfo = tcIndxReq->requestInfo;
   TcIndexData* indexData;
@@ -12137,11 +12153,16 @@
   regApiPtr->executingIndexOp = indexOp->indexOpId;;
   regApiPtr->noIndexOp++; // Increase count
 
-  // Filter out AttributeHeader:s since this should not be in key
+  /*
+    Filter out AttributeHeader:s since this should not be in key.
+    Also filter out fragment id from primary key and handle that
+    separately by setting it as Distribution Key and set indicator.
+  */
+
   AttributeHeader* attrHeader = (AttributeHeader *) aiIter.data;
     
   Uint32 headerSize = attrHeader->getHeaderSize();
-  Uint32 keySize = attrHeader->getDataSize();
+  Uint32 keySize = attrHeader->getDataSize() - 1;
   TcKeyReq::setKeyLength(tcKeyRequestInfo, keySize);
   // Skip header
   if (headerSize == 1) {
@@ -12151,6 +12172,9 @@
     jam();
     moreKeyData = indexOp->transIdAI.next(aiIter, headerSize - 1);
   }//if
+  tcKeyReq->scanInfo = *aiIter.data; //Fragment Id
+  moreKeyData = indexOp->transIdAI.next(aiIter);
+  TcKeyReq::setDistributionKeyFlag(tcKeyRequestInfo, 1U);
   while(// If we have not read complete key
 	(keySize != 0) &&
 	(dataPos < keyBufSize)) {
@@ -12389,7 +12413,7 @@
 	  tmp2.release();
 	  LocalDataBuffer<11> tmp3(pool, trigPtr.p->afterValues);
 	  tmp3.release();
-          regApiPtr->theFiredTriggers.release(trigPtr.i);
+          regApiPtr->theFiredTriggers.release(trigPtr);
         }
 	trigPtr = nextTrigPtr;
       }
@@ -12506,7 +12530,7 @@
   AttributeBuffer::DataBufferIterator iter;
   Uint32 attrId = 0;
   Uint32 keyLength = 0;
-  Uint32 totalPrimaryKeyLength = 0;
+  Uint32 totalPrimaryKeyLength = 1; // fragment length
   Uint32 hops;
 
   indexTabPtr.i = indexData->indexId;
@@ -12559,11 +12583,12 @@
     hops = attrHeader->getHeaderSize() + attrHeader->getDataSize();
     moreAttrData = keyValues.next(iter, hops);
   }
-  AttributeHeader pkAttrHeader(attrId, totalPrimaryKeyLength);
+  AttributeHeader pkAttrHeader(attrId, totalPrimaryKeyLength << 2);
+  Uint32 attributesLength = afterValues.getSize() + 
+    pkAttrHeader.getHeaderSize() + pkAttrHeader.getDataSize();
   
   TcKeyReq::setKeyLength(tcKeyRequestInfo, keyLength);
-  tcKeyReq->attrLen = afterValues.getSize() + 
-    pkAttrHeader.getHeaderSize() + pkAttrHeader.getDataSize();
+  tcKeyReq->attrLen = attributesLength;
   tcKeyReq->tableId = indexData->indexId;
   TcKeyReq::setOperationType(tcKeyRequestInfo, ZINSERT);
   TcKeyReq::setExecutingTrigger(tcKeyRequestInfo, true);
@@ -12613,8 +12638,11 @@
   }
 
   tcKeyLength += dataPos;
-  Uint32 attributesLength = afterValues.getSize() + 
-    pkAttrHeader.getHeaderSize() + pkAttrHeader.getDataSize();
+  /*
+    Size of attrinfo is unique index attributes one by one, header for each
+    of them (all contained in the afterValues data structure), plus a header,
+    the primary key (compacted) and the fragment id before the primary key
+  */
   if (attributesLength <= attrBufSize) {
     jam();
     // ATTRINFO fits in TCKEYREQ
@@ -12631,6 +12659,10 @@
     // as one attribute
     pkAttrHeader.insertHeader(dataPtr);
     dataPtr += pkAttrHeader.getHeaderSize();
+    /*
+      Insert fragment id before primary key as part of reference to tuple
+    */
+    *dataPtr++ = firedTriggerData->fragId;
     moreAttrData = keyValues.first(iter);
     while(moreAttrData) {
       jam();
@@ -12795,6 +12827,29 @@
     pkAttrHeader.insertHeader(dataPtr);
     dataPtr += pkAttrHeader.getHeaderSize();
     attrInfoPos += pkAttrHeader.getHeaderSize();
+    /*
+      Add fragment id before primary key
+      TODO: This code really needs to be made into a long signal
+      to remove this messy code.
+    */
+    if (attrInfoPos == AttrInfo::DataLength)
+    {
+      jam();
+      // Flush ATTRINFO
+#if INTERNAL_TRIGGER_TCKEYREQ_JBA
+      sendSignal(reference(), GSN_ATTRINFO, signal, 
+                 AttrInfo::HeaderLength + AttrInfo::DataLength, JBA);
+#else
+      EXECUTE_DIRECT(DBTC, GSN_ATTRINFO, signal,
+                     AttrInfo::HeaderLength + AttrInfo::DataLength);
+      jamEntry();
+#endif
+      dataPtr = (Uint32 *) &attrInfo->attrData;	  
+      attrInfoPos = 0;
+    }
+    attrInfoPos++;
+    *dataPtr++ = firedTriggerData->fragId;
+
     moreAttrData = keyValues.first(iter);
     while(moreAttrData) {
       jam();
Thread
bk commit into 5.1 tree (ingo:1.2286)ingo5 Apr