List:Internals« Previous MessageNext Message »
From:ingo Date:April 29 2005 8:27pm
Subject:bk commit into 5.0 tree (ingo:1.1919) BUG#8321
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of mydev. When mydev does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1919 05/04/29 20:27:26 ingo@stripped +1 -0
  Bug#8321 - myisampack bug in compression algorithm
  This is the second version of the third of three changesets. 
  It does not contain the bug fix, but is built on top of it.
  The bug revealed that a data amount of less than 3GB could require
  Huffman codes of 32 bits in length. So we can expect even longer
  codes in the near to mid future. Therefore we should use 64-bit types
  to store the Huffman codes. This changeset suggests that change.

  myisam/myisampack.c
    1.42 05/04/29 20:26:18 ingo@stripped +55 -64
    Bug#8321 - myisampack bug in compression algorithm
    This is the second version of the third of three changesets. 
    It does not contain the bug fix, but is built on top of it.
    The bug revealed that a data amount of less than 3GB could require
    Huffman codes of 32 bits in length. So we can expect even longer
    codes in the near to mid future. Therefore we should use 64-bit types
    to store the Huffman codes. This changeset suggests that change.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	ingo
# Host:	chilla.local
# Root:	/home/mydev/mysql-5.0-bug8321

--- 1.41/myisam/myisampack.c	Thu Apr 28 19:33:59 2005
+++ 1.42/myisam/myisampack.c	Fri Apr 29 20:26:18 2005
@@ -33,10 +33,10 @@
 #include <my_getopt.h>
 #include <assert.h>
 
-#if INT_MAX > 32767
-#define BITS_SAVED 32
+#if SIZEOF_LONG_LONG > 4
+#define BITS_SAVED 64
 #else
-#define BITS_SAVED 16
+#define BITS_SAVED 32
 #endif
 
 #define IS_OFFSET ((uint) 32768)	/* Bit if offset or char in tree */
@@ -52,7 +52,7 @@
   char *buffer,*pos,*end;
   my_off_t pos_in_file;
   int bits;
-  uint current_byte;
+  ulonglong bitbucket;
 };
 
 struct st_huff_tree;
@@ -98,7 +98,7 @@
   my_off_t bytes_packed;
   uint tree_pack_length;
   uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
-  ulong *code;
+  ulonglong *code;
   uchar *code_len;
 } HUFF_TREE;
 
@@ -146,7 +146,7 @@
 static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
 static void make_traverse_code_tree(HUFF_TREE *huff_tree,
 				    HUFF_ELEMENT *element,uint size,
-				    ulong code);
+				    ulonglong code);
 static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
 			my_off_t tot_elements,my_off_t filelength);
 static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
@@ -161,7 +161,7 @@
 static void init_file_buffer(File file,pbool read_buffer);
 static int flush_buffer(ulong neaded_length);
 static void end_file_buffer(void);
-static void write_bits(ulong value,uint bits);
+static void write_bits(ulonglong value, uint bits);
 static void flush_bits(void);
 static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
 		      ha_checksum crc);
@@ -1434,12 +1434,13 @@
     {
       elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
       if (!(huff_tree->code =
-	    (ulong*) my_malloc(elements*
-			       (sizeof(ulong)+sizeof(uchar)),
-			       MYF(MY_WME | MY_ZEROFILL))))
+            (ulonglong*) my_malloc(elements*
+                                   (sizeof(ulonglong) + sizeof(uchar)),
+                                   MYF(MY_WME | MY_ZEROFILL))))
 	return 1;
       huff_tree->code_len=(uchar*) (huff_tree->code+elements);
-      make_traverse_code_tree(huff_tree,huff_tree->root,32,0);
+      make_traverse_code_tree(huff_tree, huff_tree->root,
+                              8 * sizeof(ulonglong), LL(0));
     }
   }
   return 0;
@@ -1448,23 +1449,23 @@
 
 static void make_traverse_code_tree(HUFF_TREE *huff_tree,
 				    HUFF_ELEMENT *element,
-				    uint size, ulong code)
+				    uint size, ulonglong code)
 {
   uint chr;
   if (!element->a.leaf.null)
   {
     chr=element->a.leaf.element_nr;
-    huff_tree->code_len[chr]=(uchar) (32-size);
-    huff_tree->code[chr]=    (code >> size);
-    if (huff_tree->height < 32-size)
-      huff_tree->height= 32-size;
+    huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
+    huff_tree->code[chr]= (code >> size);
+    if (huff_tree->height < 8 * sizeof(ulonglong) - size)
+        huff_tree->height= 8 * sizeof(ulonglong) - size;
   }
   else
   {
     size--;
     make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
-    make_traverse_code_tree(huff_tree,element->a.nod.right,size,
-			    code+((ulong) 1L << size));
+    make_traverse_code_tree(huff_tree, element->a.nod.right, size,
+			    code + (((ulonglong) 1) << size));
   }
   return;
 }
@@ -1505,13 +1506,13 @@
 
   for (i=0 ; i++ < fields ; counts++)
   {
-    write_bits((ulong) (int) counts->field_type,5);
+    write_bits((ulonglong) (int) counts->field_type, 5);
     write_bits(counts->pack_type,6);
     if (counts->pack_type & PACK_TYPE_ZERO_FILL)
       write_bits(counts->max_zero_fill,5);
     else
       write_bits(counts->length_bits,5);
-    write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits);
+    write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
   }
   flush_bits();
   return;
@@ -1598,7 +1599,7 @@
     if (huff_tree->counts->tree_buff)
     {
       for (i=0 ; i < int_length ; i++)
-	write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8);
+ 	write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
     }
     flush_bits();
   }
@@ -1687,7 +1688,7 @@
 	(huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
 	  huff_counts[i].tree->height+huff_counts[i].length_bits;
   }
-  max_calc_length/=8;
+  max_calc_length= (max_calc_length + 7) / 8;
   if (max_calc_length < 254)
     pack_ref_length=1;
   else if (max_calc_length <= 65535)
@@ -1753,7 +1754,7 @@
 	  break;
 	case FIELD_SKIP_ENDSPACE:
 	  for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
-	  length=(uint) (end_pos-pos);
+	  length= (ulong) (end_pos - pos);
 	  if (count->pack_type & PACK_TYPE_SELECTED)
 	  {
 	    if (length > count->min_space)
@@ -1776,7 +1777,7 @@
 	  break;
 	case FIELD_SKIP_PRESPACE:
 	  for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
-	  length=(uint) (pos-start_pos);
+          length= (ulong) (pos - start_pos);
 	  if (count->pack_type & PACK_TYPE_SELECTED)
 	  {
 	    if (length > count->min_space)
@@ -1929,7 +1930,7 @@
     file_buffer.pos=file_buffer.buffer;
     file_buffer.bits=BITS_SAVED;
   }
-  file_buffer.current_byte=0;
+  file_buffer.bitbucket= 0;
 }
 
 
@@ -1987,68 +1988,58 @@
 
 	/* output `bits` low bits of `value' */
 
-static void write_bits (register ulong value, register uint bits)
+static void write_bits(register ulonglong value, register uint bits)
 {
-  if ((file_buffer.bits-=(int) bits) >= 0)
+  DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
+              (bits == 8 * sizeof(value)));
+  if ((file_buffer.bits-= (int) bits) >= 0)
   {
-    file_buffer.current_byte|=value << file_buffer.bits;
+    file_buffer.bitbucket|= value << file_buffer.bits;
   }
   else
   {
-    reg3 uint byte_buff;
+    reg3 ulonglong bit_buffer;
     bits= (uint) -file_buffer.bits;
-    DBUG_ASSERT(bits <= 8 * sizeof(value));
-    byte_buff= (file_buffer.current_byte |
-               ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0));
-#if BITS_SAVED == 32
-    *file_buffer.pos++= (byte) (byte_buff >> 24) ;
-    *file_buffer.pos++= (byte) (byte_buff >> 16) ;
+    bit_buffer= (file_buffer.bitbucket |
+                 ((bits != 8 * sizeof(value)) ? (uint) (value >> bits) : 0));
+#if BITS_SAVED == 64
+    *file_buffer.pos++= (byte) (bit_buffer >> 56);
+    *file_buffer.pos++= (byte) (bit_buffer >> 48);
+    *file_buffer.pos++= (byte) (bit_buffer >> 40);
+    *file_buffer.pos++= (byte) (bit_buffer >> 32);
 #endif
-    *file_buffer.pos++= (byte) (byte_buff >> 8) ;
-    *file_buffer.pos++= (byte) byte_buff;
+    *file_buffer.pos++= (byte) (bit_buffer >> 24) ;
+    *file_buffer.pos++= (byte) (bit_buffer >> 16) ;
+    *file_buffer.pos++= (byte) (bit_buffer >> 8) ;
+    *file_buffer.pos++= (byte) bit_buffer;
 
-    DBUG_ASSERT(bits <= 8 * sizeof(ulong));
     if (bits != 8 * sizeof(value))
-      value&= (((ulong) 1) << bits) - 1;
-#if BITS_SAVED == 16
-    if (bits >= sizeof(uint))
-    {
-      bits-=8;
-      *file_buffer.pos++= (uchar) (value >> bits);
-      value&= (1 << bits)-1;
-      if (bits >= sizeof(uint))
-      {
-	bits-=8;
-	*file_buffer.pos++= (uchar) (value >> bits);
-	value&= (1 << bits)-1;
-      }
-    }
-#endif
+      value&= (((ulonglong) 1) << bits) - 1;
     if (file_buffer.pos >= file_buffer.end)
       VOID(flush_buffer(~ (ulong) 0));
     file_buffer.bits=(int) (BITS_SAVED - bits);
-    file_buffer.current_byte=(uint) (value << (BITS_SAVED - bits));
+    file_buffer.bitbucket= value << (BITS_SAVED - bits);
   }
   return;
 }
 
 	/* Flush bits in bit_buffer to buffer */
 
-static void flush_bits (void)
+static void flush_bits(void)
 {
-  uint bits,byte_buff;
+  int bits;
+  ulonglong bit_buffer;
 
-  bits=(file_buffer.bits) & ~7;
-  byte_buff = file_buffer.current_byte >> bits;
-  bits=BITS_SAVED - bits;
+  bits= file_buffer.bits & ~7;
+  bit_buffer= file_buffer.bitbucket >> bits;
+  bits= BITS_SAVED - bits;
   while (bits > 0)
   {
-    bits-=8;
-    *file_buffer.pos++= (byte) (uchar) (byte_buff >> bits) ;
+    bits-= 8;
+    *file_buffer.pos++= (byte) (bit_buffer >> bits) ;
   }
-  file_buffer.bits=BITS_SAVED;
-  file_buffer.current_byte=0;
-  return;
+  file_buffer.bits= BITS_SAVED;
+  file_buffer.bitbucket= 0;
 }
 
 
Thread
bk commit into 5.0 tree (ingo:1.1919) BUG#8321ingo29 Apr