mirror of https://github.com/mirror/busybox.git
decompress_bunzip2: code shrink
function old new delta get_next_block 1828 1827 -1 get_bits 164 156 -8 read_bunzip 304 261 -43 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-52) Total: -52 bytes Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>1_18_stable
parent
fb132e4737
commit
36ef0a677e
|
@ -16,7 +16,7 @@
|
||||||
function, and various other tweaks. In (limited) tests, approximately
|
function, and various other tweaks. In (limited) tests, approximately
|
||||||
20% faster than bzcat on x86 and about 10% faster on arm.
|
20% faster than bzcat on x86 and about 10% faster on arm.
|
||||||
|
|
||||||
Note that about 2/3 of the time is spent in read_unzip() reversing
|
Note that about 2/3 of the time is spent in read_bunzip() reversing
|
||||||
the Burrows-Wheeler transformation. Much of that time is delay
|
the Burrows-Wheeler transformation. Much of that time is delay
|
||||||
resulting from cache misses.
|
resulting from cache misses.
|
||||||
|
|
||||||
|
@ -70,23 +70,25 @@ struct bunzip_data {
|
||||||
/* I/O tracking data (file handles, buffers, positions, etc.) */
|
/* I/O tracking data (file handles, buffers, positions, etc.) */
|
||||||
unsigned inbufBitCount, inbufBits;
|
unsigned inbufBitCount, inbufBits;
|
||||||
int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
|
int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
|
||||||
unsigned char *inbuf /*,*outbuf*/;
|
uint8_t *inbuf /*,*outbuf*/;
|
||||||
|
|
||||||
/* State for interrupting output loop */
|
/* State for interrupting output loop */
|
||||||
int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
|
int writeCopies, writePos, writeRunCountdown, writeCount;
|
||||||
|
int writeCurrent; /* actually a uint8_t */
|
||||||
|
|
||||||
/* The CRC values stored in the block header and calculated from the data */
|
/* The CRC values stored in the block header and calculated from the data */
|
||||||
uint32_t headerCRC, totalCRC, writeCRC;
|
uint32_t headerCRC, totalCRC, writeCRC;
|
||||||
|
|
||||||
/* Intermediate buffer and its size (in bytes) */
|
/* Intermediate buffer and its size (in bytes) */
|
||||||
unsigned *dbuf, dbufSize;
|
uint32_t *dbuf;
|
||||||
|
unsigned dbufSize;
|
||||||
|
|
||||||
/* For I/O error handling */
|
/* For I/O error handling */
|
||||||
jmp_buf jmpbuf;
|
jmp_buf jmpbuf;
|
||||||
|
|
||||||
/* Big things go last (register-relative addressing can be larger for big offsets) */
|
/* Big things go last (register-relative addressing can be larger for big offsets) */
|
||||||
uint32_t crc32Table[256];
|
uint32_t crc32Table[256];
|
||||||
unsigned char selectors[32768]; /* nSelectors=15 bits */
|
uint8_t selectors[32768]; /* nSelectors=15 bits */
|
||||||
struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
|
struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
|
||||||
};
|
};
|
||||||
/* typedef struct bunzip_data bunzip_data; -- done in .h file */
|
/* typedef struct bunzip_data bunzip_data; -- done in .h file */
|
||||||
|
@ -94,14 +96,15 @@ struct bunzip_data {
|
||||||
|
|
||||||
/* Return the next nnn bits of input. All reads from the compressed input
|
/* Return the next nnn bits of input. All reads from the compressed input
|
||||||
are done through this function. All reads are big endian */
|
are done through this function. All reads are big endian */
|
||||||
|
|
||||||
static unsigned get_bits(bunzip_data *bd, int bits_wanted)
|
static unsigned get_bits(bunzip_data *bd, int bits_wanted)
|
||||||
{
|
{
|
||||||
unsigned bits = 0;
|
unsigned bits = 0;
|
||||||
|
/* Cache bd->inbufBitCount in a CPU register (hopefully): */
|
||||||
|
int bit_count = bd->inbufBitCount;
|
||||||
|
|
||||||
/* If we need to get more data from the byte buffer, do so. (Loop getting
|
/* If we need to get more data from the byte buffer, do so. (Loop getting
|
||||||
one byte at a time to enforce endianness and avoid unaligned access.) */
|
one byte at a time to enforce endianness and avoid unaligned access.) */
|
||||||
while ((int)(bd->inbufBitCount) < bits_wanted) {
|
while (bit_count < bits_wanted) {
|
||||||
|
|
||||||
/* If we need to read more data from file into byte buffer, do so */
|
/* If we need to read more data from file into byte buffer, do so */
|
||||||
if (bd->inbufPos == bd->inbufCount) {
|
if (bd->inbufPos == bd->inbufCount) {
|
||||||
|
@ -113,33 +116,35 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Avoid 32-bit overflow (dump bit buffer to top of output) */
|
/* Avoid 32-bit overflow (dump bit buffer to top of output) */
|
||||||
if (bd->inbufBitCount >= 24) {
|
if (bit_count >= 24) {
|
||||||
bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1);
|
bits = bd->inbufBits & ((1 << bit_count) - 1);
|
||||||
bits_wanted -= bd->inbufBitCount;
|
bits_wanted -= bit_count;
|
||||||
bits <<= bits_wanted;
|
bits <<= bits_wanted;
|
||||||
bd->inbufBitCount = 0;
|
bit_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Grab next 8 bits of input from buffer. */
|
/* Grab next 8 bits of input from buffer. */
|
||||||
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
|
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
|
||||||
bd->inbufBitCount += 8;
|
bit_count += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate result */
|
/* Calculate result */
|
||||||
bd->inbufBitCount -= bits_wanted;
|
bit_count -= bits_wanted;
|
||||||
bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1);
|
bd->inbufBitCount = bit_count;
|
||||||
|
bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
|
||||||
|
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
|
/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
|
||||||
static int get_next_block(bunzip_data *bd)
|
static int get_next_block(bunzip_data *bd)
|
||||||
{
|
{
|
||||||
struct group_data *hufGroup;
|
struct group_data *hufGroup;
|
||||||
int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
|
int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
|
||||||
i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
|
i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
|
||||||
unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
|
uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
|
||||||
unsigned *dbuf, origPtr;
|
uint32_t *dbuf;
|
||||||
|
unsigned origPtr;
|
||||||
|
|
||||||
dbuf = bd->dbuf;
|
dbuf = bd->dbuf;
|
||||||
dbufSize = bd->dbufSize;
|
dbufSize = bd->dbufSize;
|
||||||
|
@ -200,7 +205,8 @@ static int get_next_block(bunzip_data *bd)
|
||||||
|
|
||||||
/* Decode MTF to get the next selector */
|
/* Decode MTF to get the next selector */
|
||||||
uc = mtfSymbol[j];
|
uc = mtfSymbol[j];
|
||||||
for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];
|
for (; j; j--)
|
||||||
|
mtfSymbol[j] = mtfSymbol[j-1];
|
||||||
mtfSymbol[0] = selectors[i] = uc;
|
mtfSymbol[0] = selectors[i] = uc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,7 +214,7 @@ static int get_next_block(bunzip_data *bd)
|
||||||
literal symbols, plus two run symbols (RUNA, RUNB) */
|
literal symbols, plus two run symbols (RUNA, RUNB) */
|
||||||
symCount = symTotal + 2;
|
symCount = symTotal + 2;
|
||||||
for (j = 0; j < groupCount; j++) {
|
for (j = 0; j < groupCount; j++) {
|
||||||
unsigned char length[MAX_SYMBOLS];
|
uint8_t length[MAX_SYMBOLS];
|
||||||
/* 8 bits is ALMOST enough for temp[], see below */
|
/* 8 bits is ALMOST enough for temp[], see below */
|
||||||
unsigned temp[MAX_HUFCODE_BITS+1];
|
unsigned temp[MAX_HUFCODE_BITS+1];
|
||||||
int minLen, maxLen, pp;
|
int minLen, maxLen, pp;
|
||||||
|
@ -315,7 +321,7 @@ static int get_next_block(bunzip_data *bd)
|
||||||
memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
|
memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
//byteCount[i] = 0;
|
//byteCount[i] = 0;
|
||||||
mtfSymbol[i] = (unsigned char)i;
|
mtfSymbol[i] = (uint8_t)i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Loop through compressed symbols. */
|
/* Loop through compressed symbols. */
|
||||||
|
@ -456,7 +462,7 @@ static int get_next_block(bunzip_data *bd)
|
||||||
|
|
||||||
/* Figure out what order dbuf would be in if we sorted it. */
|
/* Figure out what order dbuf would be in if we sorted it. */
|
||||||
for (i = 0; i < dbufCount; i++) {
|
for (i = 0; i < dbufCount; i++) {
|
||||||
uc = (unsigned char)(dbuf[i] & 0xff);
|
uc = (uint8_t)dbuf[i];
|
||||||
dbuf[byteCount[uc]] |= (i << 8);
|
dbuf[byteCount[uc]] |= (i << 8);
|
||||||
byteCount[uc]++;
|
byteCount[uc]++;
|
||||||
}
|
}
|
||||||
|
@ -465,10 +471,11 @@ static int get_next_block(bunzip_data *bd)
|
||||||
doesn't get output, and if the first three characters are identical
|
doesn't get output, and if the first three characters are identical
|
||||||
it doesn't qualify as a run (hence writeRunCountdown=5). */
|
it doesn't qualify as a run (hence writeRunCountdown=5). */
|
||||||
if (dbufCount) {
|
if (dbufCount) {
|
||||||
|
uint32_t tmp;
|
||||||
if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
|
if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
|
||||||
bd->writePos = dbuf[origPtr];
|
tmp = dbuf[origPtr];
|
||||||
bd->writeCurrent = (unsigned char)(bd->writePos & 0xff);
|
bd->writeCurrent = (uint8_t)tmp;
|
||||||
bd->writePos >>= 8;
|
bd->writePos = (tmp >> 8);
|
||||||
bd->writeRunCountdown = 5;
|
bd->writeRunCountdown = 5;
|
||||||
}
|
}
|
||||||
bd->writeCount = dbufCount;
|
bd->writeCount = dbufCount;
|
||||||
|
@ -476,7 +483,7 @@ static int get_next_block(bunzip_data *bd)
|
||||||
return RETVAL_OK;
|
return RETVAL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Undo burrows-wheeler transform on intermediate buffer to produce output.
|
/* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
|
||||||
If start_bunzip was initialized with out_fd=-1, then up to len bytes of
|
If start_bunzip was initialized with out_fd=-1, then up to len bytes of
|
||||||
data are written to outbuf. Return value is number of bytes written or
|
data are written to outbuf. Return value is number of bytes written or
|
||||||
error (all errors are negative numbers). If out_fd!=-1, outbuf and len
|
error (all errors are negative numbers). If out_fd!=-1, outbuf and len
|
||||||
|
@ -484,7 +491,7 @@ static int get_next_block(bunzip_data *bd)
|
||||||
*/
|
*/
|
||||||
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
{
|
{
|
||||||
const unsigned *dbuf;
|
const uint32_t *dbuf;
|
||||||
int pos, current, previous, gotcount;
|
int pos, current, previous, gotcount;
|
||||||
|
|
||||||
/* If last read was short due to end of file, return last block now */
|
/* If last read was short due to end of file, return last block now */
|
||||||
|
@ -500,6 +507,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
Huffman-decoded a block into the intermediate buffer yet). */
|
Huffman-decoded a block into the intermediate buffer yet). */
|
||||||
if (bd->writeCopies) {
|
if (bd->writeCopies) {
|
||||||
|
|
||||||
|
dec_writeCopies:
|
||||||
/* Inside the loop, writeCopies means extra copies (beyond 1) */
|
/* Inside the loop, writeCopies means extra copies (beyond 1) */
|
||||||
--bd->writeCopies;
|
--bd->writeCopies;
|
||||||
|
|
||||||
|
@ -508,10 +516,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
|
|
||||||
/* If the output buffer is full, snapshot state and return */
|
/* If the output buffer is full, snapshot state and return */
|
||||||
if (gotcount >= len) {
|
if (gotcount >= len) {
|
||||||
bd->writePos = pos;
|
/* Unlikely branch.
|
||||||
bd->writeCurrent = current;
|
* Use of "goto" instead of keeping code here
|
||||||
bd->writeCopies++;
|
* helps compiler to realize this. */
|
||||||
return len;
|
goto outbuf_full;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write next byte into output buffer, updating CRC */
|
/* Write next byte into output buffer, updating CRC */
|
||||||
|
@ -521,15 +529,21 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
|
|
||||||
/* Loop now if we're outputting multiple copies of this byte */
|
/* Loop now if we're outputting multiple copies of this byte */
|
||||||
if (bd->writeCopies) {
|
if (bd->writeCopies) {
|
||||||
--bd->writeCopies;
|
/* Unlikely branch */
|
||||||
continue;
|
/*--bd->writeCopies;*/
|
||||||
|
/*continue;*/
|
||||||
|
/* Same, but (ab)using other existing --writeCopies operation.
|
||||||
|
* Luckily, this also compiles into just one branch insn: */
|
||||||
|
goto dec_writeCopies;
|
||||||
}
|
}
|
||||||
decode_next_byte:
|
decode_next_byte:
|
||||||
if (!bd->writeCount--) break;
|
if (--bd->writeCount < 0)
|
||||||
|
break; /* input block is fully consumed, need next one */
|
||||||
|
|
||||||
/* Follow sequence vector to undo Burrows-Wheeler transform */
|
/* Follow sequence vector to undo Burrows-Wheeler transform */
|
||||||
previous = current;
|
previous = current;
|
||||||
pos = dbuf[pos];
|
pos = dbuf[pos];
|
||||||
current = pos & 0xff;
|
current = (uint8_t)pos;
|
||||||
pos >>= 8;
|
pos >>= 8;
|
||||||
|
|
||||||
/* After 3 consecutive copies of the same byte, the 4th
|
/* After 3 consecutive copies of the same byte, the 4th
|
||||||
|
@ -539,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
if (current != previous)
|
if (current != previous)
|
||||||
bd->writeRunCountdown = 4;
|
bd->writeRunCountdown = 4;
|
||||||
} else {
|
} else {
|
||||||
|
/* Unlikely branch */
|
||||||
/* We have a repeated run, this byte indicates the count */
|
/* We have a repeated run, this byte indicates the count */
|
||||||
bd->writeCopies = current;
|
bd->writeCopies = current;
|
||||||
current = previous;
|
current = previous;
|
||||||
|
@ -551,9 +565,9 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
/* Subtract the 1 copy we'd output anyway to get extras */
|
/* Subtract the 1 copy we'd output anyway to get extras */
|
||||||
--bd->writeCopies;
|
--bd->writeCopies;
|
||||||
}
|
}
|
||||||
}
|
} /* for(;;) */
|
||||||
|
|
||||||
/* Decompression of this block completed successfully */
|
/* Decompression of this input block completed successfully */
|
||||||
bd->writeCRC = ~bd->writeCRC;
|
bd->writeCRC = ~bd->writeCRC;
|
||||||
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
|
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
|
||||||
|
|
||||||
|
@ -565,16 +579,25 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Refill the intermediate buffer by Huffman-decoding next block of input */
|
/* Refill the intermediate buffer by Huffman-decoding next block of input */
|
||||||
/* (previous is just a convenient unused temp variable here) */
|
{
|
||||||
previous = get_next_block(bd);
|
int r = get_next_block(bd);
|
||||||
if (previous) {
|
if (r) {
|
||||||
bd->writeCount = previous;
|
bd->writeCount = r;
|
||||||
return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
|
return (r != RETVAL_LAST_BLOCK) ? r : gotcount;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bd->writeCRC = ~0;
|
bd->writeCRC = ~0;
|
||||||
pos = bd->writePos;
|
pos = bd->writePos;
|
||||||
current = bd->writeCurrent;
|
current = bd->writeCurrent;
|
||||||
goto decode_next_byte;
|
goto decode_next_byte;
|
||||||
|
|
||||||
|
outbuf_full:
|
||||||
|
/* Output buffer is full, snapshot state and return */
|
||||||
|
bd->writePos = pos;
|
||||||
|
bd->writeCurrent = current;
|
||||||
|
bd->writeCopies++;
|
||||||
|
return gotcount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
|
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
|
||||||
|
@ -607,7 +630,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
|
||||||
/* in this case, bd->inbuf is read-only */
|
/* in this case, bd->inbuf is read-only */
|
||||||
bd->inbuf = (void*)inbuf; /* cast away const-ness */
|
bd->inbuf = (void*)inbuf; /* cast away const-ness */
|
||||||
} else {
|
} else {
|
||||||
bd->inbuf = (unsigned char *)(bd + 1);
|
bd->inbuf = (uint8_t*)(bd + 1);
|
||||||
memcpy(bd->inbuf, inbuf, len);
|
memcpy(bd->inbuf, inbuf, len);
|
||||||
}
|
}
|
||||||
bd->inbufCount = len;
|
bd->inbufCount = len;
|
||||||
|
@ -634,7 +657,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
|
||||||
bd->dbufSize = 100000 * (i - h0);
|
bd->dbufSize = 100000 * (i - h0);
|
||||||
|
|
||||||
/* Cannot use xmalloc - may leak bd in NOFORK case! */
|
/* Cannot use xmalloc - may leak bd in NOFORK case! */
|
||||||
bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int));
|
bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
|
||||||
if (!bd->dbuf) {
|
if (!bd->dbuf) {
|
||||||
free(bd);
|
free(bd);
|
||||||
xfunc_die();
|
xfunc_die();
|
||||||
|
|
Loading…
Reference in New Issue