From: Date: March 18 2005 6:47pm Subject: bk commit into 5.0 tree (ingo:1.1813) BUG#8321 List-Archive: http://lists.mysql.com/internals/23186 X-Bug: 8321 Message-Id: Below is the list of changes that have just been committed into a local 5.0 repository of mydev. When mydev does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.1813 05/03/18 18:47:25 ingo@stripped +1 -0 Bug#8321 - myisampack bug in compression algorithm This is the third of three changesets. It does not contain the bug fix. The bug revealed that a data amount of less than 3GB could require Huffman codes of 32 bits in length. So we can expect even longer codes in the near to mid future. Therefore we should use 64-bit types to store the Huffman codes. This changeset suggests that change. myisam/myisampack.c 1.40 05/03/18 18:46:14 ingo@stripped +30 -26 Bug#8321 - myisampack bug in compression algorithm This is the third of three changesets. It does not contain the bug fix. The bug revealed that a data amount of less than 3GB could require Huffman codes of 32 bits in length. So we can expect even longer codes in the near to mid future. Therefore we should use 64-bit types to store the Huffman codes. This changeset suggests that change. # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: ingo # Host: chilla.local # Root: /home/mydev/mysql-5.0-bug8321 --- 1.39/myisam/myisampack.c Sat Dec 18 04:19:15 2004 +++ 1.40/myisam/myisampack.c Fri Mar 18 18:46:14 2005 @@ -31,6 +31,7 @@ #define __GNU_LIBRARY__ /* Skip warnings in getopt.h */ #endif #include +#include #if INT_MAX > 32767 #define BITS_SAVED 32 @@ -97,7 +98,7 @@ my_off_t bytes_packed; uint tree_pack_length; uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; - ulong *code; + ulonglong *code; uchar *code_len; } HUFF_TREE; @@ -145,7 +146,7 @@ static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,uint size, - ulong code); + ulonglong code); static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, my_off_t tot_elements,my_off_t filelength); static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); @@ -160,7 +161,7 @@ static void init_file_buffer(File file,pbool read_buffer); static int flush_buffer(ulong neaded_length); static void end_file_buffer(void); -static void write_bits(ulong value,uint bits); +static void write_bits(ulonglong value,uint bits); static void flush_bits(void); static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, ha_checksum crc); @@ -1433,12 +1434,13 @@ { elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; if (!(huff_tree->code = - (ulong*) my_malloc(elements* - (sizeof(ulong)+sizeof(uchar)), - MYF(MY_WME | MY_ZEROFILL)))) + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong)+sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) return 1; huff_tree->code_len=(uchar*) (huff_tree->code+elements); - make_traverse_code_tree(huff_tree,huff_tree->root,32,0); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), (ulonglong) 0L); } } return 0; @@ -1447,23 +1449,23 @@ static void make_traverse_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, - uint size, ulong code) + uint size, ulonglong code) { uint chr; if (!element->a.leaf.null) { chr=element->a.leaf.element_nr; - huff_tree->code_len[chr]=(uchar) (32-size); - huff_tree->code[chr]= (code >> size); - if (huff_tree->height < 32-size) - huff_tree->height= 32-size; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; } else { size--; make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); - make_traverse_code_tree(huff_tree,element->a.nod.right,size, - code+((ulong) 1L << size)); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); } return; } @@ -1504,13 +1506,13 @@ for (i=0 ; i++ < fields ; counts++) { - write_bits((ulong) (int) counts->field_type,5); + write_bits((ulonglong) (int) counts->field_type,5); write_bits(counts->pack_type,6); if (counts->pack_type & PACK_TYPE_ZERO_FILL) write_bits(counts->max_zero_fill,5); else write_bits(counts->length_bits,5); - write_bits((ulong) counts->tree->tree_number-1,huff_tree_bits); + write_bits((ulonglong) counts->tree->tree_number-1,huff_tree_bits); } flush_bits(); return; @@ -1597,7 +1599,7 @@ if (huff_tree->counts->tree_buff) { for (i=0 ; i < int_length ; i++) - write_bits((uint) (uchar) huff_tree->counts->tree_buff[i],8); + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i],8); } flush_bits(); } @@ -1686,7 +1688,7 @@ (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* huff_counts[i].tree->height+huff_counts[i].length_bits; } - max_calc_length/=8; + max_calc_length= (max_calc_length + 7) / 8; if (max_calc_length < 254) pack_ref_length=1; else if (max_calc_length <= 65535) @@ -1752,7 +1754,7 @@ break; case FIELD_SKIP_ENDSPACE: for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; - length=(uint) (end_pos-pos); + length= (ulong) (end_pos - pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) @@ -1775,7 +1777,7 @@ break; case FIELD_SKIP_PRESPACE: for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; - length=(uint) (pos-start_pos); + length= (ulong) (pos - start_pos); if (count->pack_type & PACK_TYPE_SELECTED) { if (length > count->min_space) @@ -1973,11 +1975,13 @@ /* output `bits` low bits of `value' */ -static void write_bits (register ulong value, register uint bits) +static void write_bits(register ulonglong value, register uint bits) { - if ((file_buffer.bits-=(int) bits) >= 0) + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + if ((file_buffer.bits-= (int) bits) >= 0) { - file_buffer.current_byte|=value << file_buffer.bits; + file_buffer.current_byte|= value << file_buffer.bits; } else { @@ -1991,18 +1995,18 @@ *file_buffer.pos++= (byte) (byte_buff >> 8) ; *file_buffer.pos++= (byte) byte_buff; - value&=(1 << bits)-1; + value&= (((ulonglong) 1) << bits) - 1; #if BITS_SAVED == 16 if (bits >= sizeof(uint)) { bits-=8; *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; + value&= (((ulonglong) 1) << bits) - 1; if (bits >= sizeof(uint)) { bits-=8; *file_buffer.pos++= (uchar) (value >> bits); - value&= (1 << bits)-1; + value&= (((ulonglong) 1) << bits) - 1; } } #endif