mirror of https://github.com/mirror/busybox.git
unzip: properly use CDF to find compressed files. Closes 9536
function old new delta unzip_main 2437 2350 -87 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>pull/2/head
parent
9c083f599a
commit
e3c4db8b39
213
archival/unzip.c
213
archival/unzip.c
|
@ -16,7 +16,6 @@
|
|||
* TODO
|
||||
* Zip64 + other methods
|
||||
*/
|
||||
|
||||
//config:config UNZIP
|
||||
//config: bool "unzip"
|
||||
//config: default y
|
||||
|
@ -24,8 +23,17 @@
|
|||
//config: unzip will list or extract files from a ZIP archive,
|
||||
//config: commonly found on DOS/WIN systems. The default behavior
|
||||
//config: (with no options) is to extract the archive into the
|
||||
//config: current directory. Use the `-d' option to extract to a
|
||||
//config: directory of your choice.
|
||||
//config: current directory.
|
||||
//config:
|
||||
//config:config FEATURE_UNZIP_CDF
|
||||
//config: bool "Read and use Central Directory data"
|
||||
//config: default y
|
||||
//config: depends on UNZIP
|
||||
//config: help
|
||||
//config: If you know that you only need to deal with simple
|
||||
//config: ZIP files without deleted/updated files, SFX archves etc,
|
||||
//config: you can reduce code size by unselecting this option.
|
||||
//config: To support less trivial ZIPs, say Y.
|
||||
|
||||
//applet:IF_UNZIP(APPLET(unzip, BB_DIR_USR_BIN, BB_SUID_DROP))
|
||||
//kbuild:lib-$(CONFIG_UNZIP) += unzip.o
|
||||
|
@ -80,30 +88,20 @@ typedef union {
|
|||
uint32_t ucmpsize PACKED; /* 18-21 */
|
||||
uint16_t filename_len; /* 22-23 */
|
||||
uint16_t extra_len; /* 24-25 */
|
||||
/* filename follows (not NUL terminated) */
|
||||
/* extra field follows */
|
||||
/* data follows */
|
||||
} formatted PACKED;
|
||||
} zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
|
||||
|
||||
/* Check the offset of the last element, not the length. This leniency
|
||||
* allows for poor packing, whereby the overall struct may be too long,
|
||||
* even though the elements are all in the right place.
|
||||
*/
|
||||
struct BUG_zip_header_must_be_26_bytes {
|
||||
char BUG_zip_header_must_be_26_bytes[
|
||||
offsetof(zip_header_t, formatted.extra_len) + 2
|
||||
== ZIP_HEADER_LEN ? 1 : -1];
|
||||
};
|
||||
|
||||
#define FIX_ENDIANNESS_ZIP(zip_header) do { \
|
||||
(zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \
|
||||
(zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \
|
||||
(zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \
|
||||
(zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \
|
||||
#define FIX_ENDIANNESS_ZIP(zip_header) \
|
||||
do { if (BB_BIG_ENDIAN) { \
|
||||
(zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \
|
||||
(zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \
|
||||
(zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \
|
||||
(zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \
|
||||
(zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \
|
||||
} while (0)
|
||||
}} while (0)
|
||||
|
||||
#define CDF_HEADER_LEN 42
|
||||
|
||||
|
@ -115,8 +113,8 @@ typedef union {
|
|||
uint16_t version_needed; /* 2-3 */
|
||||
uint16_t cdf_flags; /* 4-5 */
|
||||
uint16_t method; /* 6-7 */
|
||||
uint16_t mtime; /* 8-9 */
|
||||
uint16_t mdate; /* 10-11 */
|
||||
uint16_t modtime; /* 8-9 */
|
||||
uint16_t moddate; /* 10-11 */
|
||||
uint32_t crc32; /* 12-15 */
|
||||
uint32_t cmpsize; /* 16-19 */
|
||||
uint32_t ucmpsize; /* 20-23 */
|
||||
|
@ -127,27 +125,27 @@ typedef union {
|
|||
uint16_t internal_file_attributes; /* 32-33 */
|
||||
uint32_t external_file_attributes PACKED; /* 34-37 */
|
||||
uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
|
||||
/* filename follows (not NUL terminated) */
|
||||
/* extra field follows */
|
||||
/* comment follows */
|
||||
} formatted PACKED;
|
||||
} cdf_header_t;
|
||||
|
||||
struct BUG_cdf_header_must_be_42_bytes {
|
||||
char BUG_cdf_header_must_be_42_bytes[
|
||||
offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4
|
||||
== CDF_HEADER_LEN ? 1 : -1];
|
||||
};
|
||||
|
||||
#define FIX_ENDIANNESS_CDF(cdf_header) do { \
|
||||
#define FIX_ENDIANNESS_CDF(cdf_header) \
|
||||
do { if (BB_BIG_ENDIAN) { \
|
||||
(cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \
|
||||
(cdf_header).formatted.version_needed = SWAP_LE16((cdf_header).formatted.version_needed); \
|
||||
(cdf_header).formatted.method = SWAP_LE16((cdf_header).formatted.method ); \
|
||||
(cdf_header).formatted.modtime = SWAP_LE16((cdf_header).formatted.modtime ); \
|
||||
(cdf_header).formatted.moddate = SWAP_LE16((cdf_header).formatted.moddate ); \
|
||||
(cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \
|
||||
(cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \
|
||||
(cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \
|
||||
(cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \
|
||||
(cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \
|
||||
(cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \
|
||||
IF_DESKTOP( \
|
||||
(cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \
|
||||
(cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \
|
||||
) \
|
||||
} while (0)
|
||||
}} while (0)
|
||||
|
||||
#define CDE_HEADER_LEN 16
|
||||
|
||||
|
@ -166,20 +164,38 @@ typedef union {
|
|||
} formatted PACKED;
|
||||
} cde_header_t;
|
||||
|
||||
struct BUG_cde_header_must_be_16_bytes {
|
||||
#define FIX_ENDIANNESS_CDE(cde_header) \
|
||||
do { if (BB_BIG_ENDIAN) { \
|
||||
(cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \
|
||||
}} while (0)
|
||||
|
||||
struct BUG {
|
||||
/* Check the offset of the last element, not the length. This leniency
|
||||
* allows for poor packing, whereby the overall struct may be too long,
|
||||
* even though the elements are all in the right place.
|
||||
*/
|
||||
char BUG_zip_header_must_be_26_bytes[
|
||||
offsetof(zip_header_t, formatted.extra_len) + 2
|
||||
== ZIP_HEADER_LEN ? 1 : -1];
|
||||
char BUG_cdf_header_must_be_42_bytes[
|
||||
offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4
|
||||
== CDF_HEADER_LEN ? 1 : -1];
|
||||
char BUG_cde_header_must_be_16_bytes[
|
||||
sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1];
|
||||
};
|
||||
|
||||
#define FIX_ENDIANNESS_CDE(cde_header) do { \
|
||||
(cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \
|
||||
} while (0)
|
||||
|
||||
enum { zip_fd = 3 };
|
||||
|
||||
|
||||
#if ENABLE_DESKTOP
|
||||
/* This value means that we failed to find CDF */
|
||||
#define BAD_CDF_OFFSET ((uint32_t)0xffffffff)
|
||||
|
||||
#if !ENABLE_FEATURE_UNZIP_CDF
|
||||
|
||||
# define find_cdf_offset() BAD_CDF_OFFSET
|
||||
|
||||
#else
|
||||
/* Seen in the wild:
|
||||
* Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive,
|
||||
* where CDE was nearly 48 kbytes before EOF.
|
||||
|
@ -188,25 +204,26 @@ enum { zip_fd = 3 };
|
|||
* To make extraction work, bumped PEEK_FROM_END from 16k to 64k.
|
||||
*/
|
||||
#define PEEK_FROM_END (64*1024)
|
||||
|
||||
/* This value means that we failed to find CDF */
|
||||
#define BAD_CDF_OFFSET ((uint32_t)0xffffffff)
|
||||
|
||||
/* NB: does not preserve file position! */
|
||||
static uint32_t find_cdf_offset(void)
|
||||
{
|
||||
cde_header_t cde_header;
|
||||
unsigned char *buf;
|
||||
unsigned char *p;
|
||||
off_t end;
|
||||
unsigned char *buf = xzalloc(PEEK_FROM_END);
|
||||
uint32_t found;
|
||||
|
||||
end = xlseek(zip_fd, 0, SEEK_END);
|
||||
end = lseek(zip_fd, 0, SEEK_END);
|
||||
if (end == (off_t) -1)
|
||||
return BAD_CDF_OFFSET;
|
||||
|
||||
end -= PEEK_FROM_END;
|
||||
if (end < 0)
|
||||
end = 0;
|
||||
|
||||
dbg("Looking for cdf_offset starting from 0x%"OFF_FMT"x", end);
|
||||
xlseek(zip_fd, end, SEEK_SET);
|
||||
buf = xzalloc(PEEK_FROM_END);
|
||||
full_read(zip_fd, buf, PEEK_FROM_END);
|
||||
|
||||
found = BAD_CDF_OFFSET;
|
||||
|
@ -252,16 +269,25 @@ static uint32_t find_cdf_offset(void)
|
|||
static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
|
||||
{
|
||||
off_t org;
|
||||
uint32_t magic;
|
||||
|
||||
if (cdf_offset == BAD_CDF_OFFSET)
|
||||
return cdf_offset;
|
||||
|
||||
org = xlseek(zip_fd, 0, SEEK_CUR);
|
||||
|
||||
if (!cdf_offset)
|
||||
cdf_offset = find_cdf_offset();
|
||||
|
||||
if (cdf_offset != BAD_CDF_OFFSET) {
|
||||
dbg("Reading CDF at 0x%x", (unsigned)cdf_offset);
|
||||
xlseek(zip_fd, cdf_offset + 4, SEEK_SET);
|
||||
xlseek(zip_fd, cdf_offset, SEEK_SET);
|
||||
xread(zip_fd, &magic, 4);
|
||||
/* Central Directory End? */
|
||||
if (magic == ZIP_CDE_MAGIC) {
|
||||
dbg("got ZIP_CDE_MAGIC");
|
||||
return 0; /* EOF */
|
||||
}
|
||||
xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN);
|
||||
/* Caller doesn't need this: */
|
||||
/* dbg("Returning file position to 0x%"OFF_FMT"x", org); */
|
||||
/* xlseek(zip_fd, org, SEEK_SET); */
|
||||
|
||||
FIX_ENDIANNESS_CDF(*cdf_ptr);
|
||||
dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u",
|
||||
(unsigned)cdf_ptr->formatted.file_name_length,
|
||||
|
@ -272,10 +298,7 @@ static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
|
|||
+ cdf_ptr->formatted.file_name_length
|
||||
+ cdf_ptr->formatted.extra_field_length
|
||||
+ cdf_ptr->formatted.file_comment_length;
|
||||
}
|
||||
|
||||
dbg("Returning file position to 0x%"OFF_FMT"x", org);
|
||||
xlseek(zip_fd, org, SEEK_SET);
|
||||
return cdf_offset;
|
||||
};
|
||||
#endif
|
||||
|
@ -324,6 +347,7 @@ static void unzip_extract(zip_header_t *zip_header, int dst_fd)
|
|||
bb_error_msg("bad length");
|
||||
}
|
||||
}
|
||||
/* TODO? method 12: bzip2, method 14: LZMA */
|
||||
}
|
||||
|
||||
static void my_fgets80(char *buf80)
|
||||
|
@ -339,15 +363,12 @@ int unzip_main(int argc, char **argv)
|
|||
{
|
||||
enum { O_PROMPT, O_NEVER, O_ALWAYS };
|
||||
|
||||
zip_header_t zip_header;
|
||||
smallint quiet = 0;
|
||||
IF_NOT_DESKTOP(const) smallint verbose = 0;
|
||||
IF_NOT_FEATURE_UNZIP_CDF(const) smallint verbose = 0;
|
||||
smallint listing = 0;
|
||||
smallint overwrite = O_PROMPT;
|
||||
smallint x_opt_seen;
|
||||
#if ENABLE_DESKTOP
|
||||
uint32_t cdf_offset;
|
||||
#endif
|
||||
unsigned long total_usize;
|
||||
unsigned long total_size;
|
||||
unsigned total_entries;
|
||||
|
@ -430,7 +451,7 @@ int unzip_main(int argc, char **argv)
|
|||
break;
|
||||
|
||||
case 'v': /* Verbose list */
|
||||
IF_DESKTOP(verbose++;)
|
||||
IF_FEATURE_UNZIP_CDF(verbose++;)
|
||||
listing = 1;
|
||||
break;
|
||||
|
||||
|
@ -545,16 +566,32 @@ int unzip_main(int argc, char **argv)
|
|||
total_usize = 0;
|
||||
total_size = 0;
|
||||
total_entries = 0;
|
||||
#if ENABLE_DESKTOP
|
||||
cdf_offset = 0;
|
||||
#endif
|
||||
cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */
|
||||
while (1) {
|
||||
uint32_t magic;
|
||||
zip_header_t zip_header;
|
||||
mode_t dir_mode = 0777;
|
||||
#if ENABLE_DESKTOP
|
||||
#if ENABLE_FEATURE_UNZIP_CDF
|
||||
mode_t file_mode = 0666;
|
||||
#endif
|
||||
|
||||
if (!ENABLE_FEATURE_UNZIP_CDF || cdf_offset == BAD_CDF_OFFSET) {
|
||||
/* Normally happens when input is unseekable.
|
||||
*
|
||||
* Valid ZIP file has Central Directory at the end
|
||||
* with central directory file headers (CDFs).
|
||||
* After it, there is a Central Directory End structure.
|
||||
* CDFs identify what files are in the ZIP and where
|
||||
* they are located. This allows ZIP readers to load
|
||||
* the list of files without reading the entire ZIP archive.
|
||||
* ZIP files may be appended to, only files specified in
|
||||
* the CD are valid. Scanning for local file headers is
|
||||
* not a correct algorithm.
|
||||
*
|
||||
* We try to do the above, and resort to "linear" reading
|
||||
* of ZIP file only if seek failed or CDE wasn't found.
|
||||
*/
|
||||
uint32_t magic;
|
||||
|
||||
/* Check magic number */
|
||||
xread(zip_fd, &magic, 4);
|
||||
/* Central directory? It's at the end, so exit */
|
||||
|
@ -562,7 +599,6 @@ int unzip_main(int argc, char **argv)
|
|||
dbg("got ZIP_CDF_MAGIC");
|
||||
break;
|
||||
}
|
||||
#if ENABLE_DESKTOP
|
||||
/* Data descriptor? It was a streaming file, go on */
|
||||
if (magic == ZIP_DD_MAGIC) {
|
||||
dbg("got ZIP_DD_MAGIC");
|
||||
|
@ -570,53 +606,62 @@ int unzip_main(int argc, char **argv)
|
|||
unzip_skip(3 * 4);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (magic != ZIP_FILEHEADER_MAGIC)
|
||||
bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
|
||||
dbg("got ZIP_FILEHEADER_MAGIC");
|
||||
|
||||
/* Read the file header */
|
||||
xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
|
||||
FIX_ENDIANNESS_ZIP(zip_header);
|
||||
if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) {
|
||||
if ((zip_header.formatted.method != 0)
|
||||
&& (zip_header.formatted.method != 8)
|
||||
) {
|
||||
/* TODO? method 12: bzip2, method 14: LZMA */
|
||||
bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
|
||||
}
|
||||
#if !ENABLE_DESKTOP
|
||||
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) {
|
||||
bb_error_msg_and_die("zip flags 1 and 8 are not supported");
|
||||
}
|
||||
#else
|
||||
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) {
|
||||
/* 0x0001 - encrypted */
|
||||
bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
|
||||
}
|
||||
|
||||
if (cdf_offset != BAD_CDF_OFFSET) {
|
||||
#if ENABLE_FEATURE_UNZIP_CDF
|
||||
else {
|
||||
/* cdf_offset is valid (and we know the file is seekable) */
|
||||
cdf_header_t cdf_header;
|
||||
cdf_offset = read_next_cdf(cdf_offset, &cdf_header);
|
||||
/*
|
||||
* Note: cdf_offset can become BAD_CDF_OFFSET after the above call.
|
||||
*/
|
||||
if (cdf_offset == 0) /* EOF? */
|
||||
break;
|
||||
# if 0
|
||||
xlseek(zip_fd,
|
||||
SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4,
|
||||
SEEK_SET);
|
||||
xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
|
||||
FIX_ENDIANNESS_ZIP(zip_header);
|
||||
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) {
|
||||
/* 0x0008 - streaming. [u]cmpsize can be reliably gotten
|
||||
* only from Central Directory. See unzip_doc.txt
|
||||
* only from Central Directory.
|
||||
*/
|
||||
zip_header.formatted.crc32 = cdf_header.formatted.crc32;
|
||||
zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize;
|
||||
zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize;
|
||||
}
|
||||
# else
|
||||
/* CDF has the same data as local header, no need to read the latter */
|
||||
memcpy(&zip_header.formatted.version,
|
||||
&cdf_header.formatted.version_needed, ZIP_HEADER_LEN);
|
||||
xlseek(zip_fd,
|
||||
SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
|
||||
SEEK_SET);
|
||||
# endif
|
||||
if ((cdf_header.formatted.version_made_by >> 8) == 3) {
|
||||
/* This archive is created on Unix */
|
||||
dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16);
|
||||
}
|
||||
}
|
||||
if (cdf_offset == BAD_CDF_OFFSET
|
||||
&& (zip_header.formatted.zip_flags & SWAP_LE16(0x0008))
|
||||
) {
|
||||
/* If it's a streaming zip, we _require_ CDF */
|
||||
bb_error_msg_and_die("can't find file table");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) {
|
||||
/* 0x0001 - encrypted */
|
||||
bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
|
||||
}
|
||||
dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x",
|
||||
(unsigned)zip_header.formatted.cmpsize,
|
||||
(unsigned)zip_header.formatted.extra_len,
|
||||
|
@ -751,7 +796,7 @@ int unzip_main(int argc, char **argv)
|
|||
overwrite = O_ALWAYS;
|
||||
case 'y': /* Open file and fall into unzip */
|
||||
unzip_create_leading_dirs(dst_fn);
|
||||
#if ENABLE_DESKTOP
|
||||
#if ENABLE_FEATURE_UNZIP_CDF
|
||||
dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode);
|
||||
#else
|
||||
dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC);
|
||||
|
|
|
@ -31,11 +31,10 @@ rmdir foo
|
|||
rm foo.zip
|
||||
|
||||
# File containing some damaged encrypted stream
|
||||
optional FEATURE_UNZIP_CDF
|
||||
testing "unzip (bad archive)" "uudecode; unzip bad.zip 2>&1; echo \$?" \
|
||||
"Archive: bad.zip
|
||||
inflating: ]3j½r«IK-%Ix
|
||||
unzip: corrupted data
|
||||
unzip: inflate error
|
||||
unzip: short read
|
||||
1
|
||||
" \
|
||||
"" "\
|
||||
|
@ -49,6 +48,7 @@ BDYAAAAMAAEADQAAADIADQAAAEEAAAASw73Ct1DKokohPXQiNzA+FAI1HCcW
|
|||
NzITNFBLBQUKAC4JAA04Cw0EOhZQSwUGAQAABAIAAgCZAAAAeQAAAAIALhM=
|
||||
====
|
||||
"
|
||||
SKIP=
|
||||
|
||||
rm *
|
||||
|
||||
|
|
Loading…
Reference in New Issue