diff --git a/vlib/compress/deflate/deflate.v b/vlib/compress/deflate/deflate.v index 4e4900f947f6ef..865b2ef74b55b1 100644 --- a/vlib/compress/deflate/deflate.v +++ b/vlib/compress/deflate/deflate.v @@ -26,6 +26,7 @@ pub fn compress(data []u8, format CompressParams) ![]u8 { } } +// compress_zlib compresses data into a zlib stream (RFC 1950). pub fn compress_zlib(data []u8) ![]u8 { payload := deflate_compress_fixed(data) cksum := adler32(data) @@ -71,7 +72,9 @@ pub fn decompress(data []u8) ![]u8 { return inflate(data) } -fn decompress_zlib(data []u8) ![]u8 { +// decompress_zlib decompresses a zlib stream (RFC 1950) and returns the +// decompressed bytes in a new array. +pub fn decompress_zlib(data []u8) ![]u8 { if data.len < 6 { return error('invalid zlib stream: too short') } @@ -93,7 +96,9 @@ fn decompress_zlib(data []u8) ![]u8 { return decoded } -fn decompress_gzip(data []u8) ![]u8 { +// decompress_gzip decompresses a gzip stream (RFC 1952) and returns the +// decompressed bytes in a new array. +pub fn decompress_gzip(data []u8) ![]u8 { if data.len < 18 { return error('invalid gzip stream: too short') } diff --git a/vlib/compress/deflate/deflate_inflate.v b/vlib/compress/deflate/deflate_inflate.v index 9d159d9d89e856..6abf4f33fe7324 100644 --- a/vlib/compress/deflate/deflate_inflate.v +++ b/vlib/compress/deflate/deflate_inflate.v @@ -144,6 +144,9 @@ fn (mut r BitReader) huff_decode(t HuffTree) !u32 { return error('inflate: invalid Huffman code') } len_ := int(entry & 0x1f) + if len_ > r.nbits { + return error('inflate: unexpected end of stream') + } sym := entry >> 5 r.bits >>= u32(len_) r.nbits -= len_ diff --git a/vlib/compress/deflate/deflate_test.v b/vlib/compress/deflate/deflate_test.v index 06bcbedafec148..0b69332b98025d 100644 --- a/vlib/compress/deflate/deflate_test.v +++ b/vlib/compress/deflate/deflate_test.v @@ -61,3 +61,11 @@ fn test_corrupt_checksum_fails() { } assert false } + +fn test_truncated_zlib_payload_fails() { + decompress([u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01]) or { + assert err.msg().contains('unexpected end of stream') + return + } + assert false +} diff --git a/vlib/compress/zlib/README.md b/vlib/compress/zlib/README.md index 9436ae3f4ade5b..7e6db1e045b9d7 100644 --- a/vlib/compress/zlib/README.md +++ b/vlib/compress/zlib/README.md @@ -1,7 +1,7 @@ ## Description -`compress.zlib` is a module that assists in the compression and -decompression of binary data using `zlib` compression +`compress.zlib` is a small wrapper around `compress.deflate` for working with zlib streams +(RFC 1950). ## Examples @@ -9,9 +9,9 @@ decompression of binary data using `zlib` compression import compress.zlib fn main() { - uncompressed := 'Hello world!' - compressed := zlib.compress(uncompressed.bytes())! - decompressed := zlib.decompress(compressed)! - assert decompressed == uncompressed.bytes() + data := 'Hello world!'.bytes() + compressed := zlib.compress(data) or { panic(err) } + decompressed := zlib.decompress(compressed) or { panic(err) } + assert decompressed == data } ``` diff --git a/vlib/compress/zlib/interop/README.md b/vlib/compress/zlib/interop/README.md new file mode 100644 index 00000000000000..99cf6ea94c7674 --- /dev/null +++ b/vlib/compress/zlib/interop/README.md @@ -0,0 +1,16 @@ +# zlib interop checker +This helper verifies interoperability between: +- V module: `compress.zlib` +- C helper linked with `libz` +- Python stdlib: `zlib` +It runs deterministic test vectors, compresses each vector with all three producers, then +cross-decompresses every produced stream with all three consumers. +A case passes only if every decompressed output is byte-identical to the original input. +## Run +```bash +./vnew run vlib/compress/zlib/interop/zlib_interop.vsh +``` +## Requirements +- `python3` with the stdlib `zlib` module +- a C compiler (`cc`, `gcc`, or `clang`) +- `libz` development headers and linker support (`-lz`) diff --git a/vlib/compress/zlib/interop/zlib_interop.vsh b/vlib/compress/zlib/interop/zlib_interop.vsh new file mode 100644 index 00000000000000..85ff5ed81243b1 --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_interop.vsh @@ -0,0 +1,185 @@ +#!/usr/bin/env -S v + +import compress.zlib + +struct TestVector { + name string + data []u8 +} + +fn main() { + tmp_root := join_path(temp_dir(), 'v_zlib_interop_${getpid()}') + mkdir_all(tmp_root) or { panic(err) } + defer { + rmdir_all(tmp_root) or {} + } + + c_bin := compile_c_helper(tmp_root) or { + eprintln('SKIP: ${err.msg()}') + exit(2) + } + py_script := join_path(@DIR, 'zlib_ref.py') + ensure_python(py_script) or { + eprintln('SKIP: ${err.msg()}') + exit(2) + } + + vectors := make_test_vectors() + mut total_checks := 0 + for i, vec in vectors { + total_checks += run_case(tmp_root, c_bin, py_script, i, vec) or { + eprintln('FAIL: ${vec.name}: ${err.msg()}') + exit(1) + 0 + } + println('ok ${i + 1}/${vectors.len}: ${vec.name} (${vec.data.len} bytes)') + } + println('PASS: ${vectors.len} vectors, ${total_checks} cross-checks') +} + +fn compile_c_helper(tmp_root string) !string { + cc := choose_cc() + if cc == '' { + return error('no C compiler found (tried cc, gcc, clang)') + } + src := join_path(@DIR, 'zlib_ref.c') + bin := join_path(tmp_root, 'zlib_interop_ref') + must_succeed('${cc} -O2 ${shell_quote(src)} -lz -o ${shell_quote(bin)}', + 'C zlib helper build failed')! + return bin +} + +fn choose_cc() string { + for cc in ['cc', 'gcc', 'clang'] { + if execute('${cc} --version >/dev/null 2>&1').exit_code == 0 { + return cc + } + } + return '' +} + +fn ensure_python(py_script string) ! { + must_succeed("python3 -c 'import zlib' >/dev/null 2>&1", + 'python3 with zlib module is not available')! + if !exists(py_script) { + return error('missing Python helper: ${py_script}') + } +} + +fn make_test_vectors() []TestVector { + mut vectors := []TestVector{} + vectors << TestVector{'empty', []u8{}} + vectors << TestVector{'ascii_text', 'The quick brown fox jumps over the lazy dog.\n'.repeat(64).bytes()} + vectors << TestVector{'repeated_byte', []u8{len: 10000, init: `A`}} + vectors << TestVector{'all_bytes_x4', all_bytes_repeated(4)} + vectors << TestVector{'lcg_64k', lcg_bytes(65536)} + return vectors +} + +fn run_case(tmp_root string, c_bin string, py_script string, case_idx int, vec TestVector) !int { + case_dir := join_path(tmp_root, 'case_${case_idx:02}_${vec.name}') + mkdir_all(case_dir)! + + v_z := zlib.compress(vec.data)! + c_z := c_compress(case_dir, c_bin, vec.data)! + py_z := py_compress(case_dir, py_script, vec.data)! + + mut checks := 0 + producers := { + 'v': v_z + 'c': c_z + 'py': py_z + } + for producer, compressed in producers { + v_plain := zlib.decompress(compressed)! + assert_equal_bytes('v.decompress(${producer}.compress)', vec.data, v_plain)! + checks++ + + c_plain := c_decompress(case_dir, c_bin, producer, compressed)! + assert_equal_bytes('c.decompress(${producer}.compress)', vec.data, c_plain)! + checks++ + + py_plain := py_decompress(case_dir, py_script, producer, compressed)! + assert_equal_bytes('python.decompress(${producer}.compress)', vec.data, py_plain)! + checks++ + } + return checks +} + +fn c_compress(case_dir string, c_bin string, plain []u8) ![]u8 { + in_path := join_path(case_dir, 'plain.in') + out_path := join_path(case_dir, 'c.zlib') + write_file_array(in_path, plain)! + must_succeed('${shell_quote(c_bin)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'C zlib compression failed')! + return read_bytes(out_path)! +} + +fn c_decompress(case_dir string, c_bin string, producer string, compressed []u8) ![]u8 { + in_path := join_path(case_dir, '${producer}.for_c.zlib') + out_path := join_path(case_dir, '${producer}.from_c.out') + write_file_array(in_path, compressed)! + must_succeed('${shell_quote(c_bin)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'C zlib decompression failed')! + return read_bytes(out_path)! +} + +fn py_compress(case_dir string, py_script string, plain []u8) ![]u8 { + in_path := join_path(case_dir, 'plain_py.in') + out_path := join_path(case_dir, 'py.zlib') + write_file_array(in_path, plain)! + must_succeed('python3 ${shell_quote(py_script)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'Python zlib compression failed')! + return read_bytes(out_path)! +} + +fn py_decompress(case_dir string, py_script string, producer string, compressed []u8) ![]u8 { + in_path := join_path(case_dir, '${producer}.for_py.zlib') + out_path := join_path(case_dir, '${producer}.from_py.out') + write_file_array(in_path, compressed)! + must_succeed('python3 ${shell_quote(py_script)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'Python zlib decompression failed')! + return read_bytes(out_path)! +} + +fn all_bytes_repeated(times int) []u8 { + mut out := []u8{cap: 256 * times} + for _ in 0 .. times { + for i in 0 .. 256 { + out << u8(i) + } + } + return out +} + +fn lcg_bytes(n int) []u8 { + mut out := []u8{len: n} + mut x := u32(0x12345678) + for i in 0 .. n { + x = x * u32(1664525) + u32(1013904223) + out[i] = u8((x >> 16) & u32(0xff)) + } + return out +} + +fn assert_equal_bytes(label string, expected []u8, got []u8) ! { + if expected.len != got.len { + return error('${label}: length mismatch expected=${expected.len} got=${got.len}') + } + for i in 0 .. expected.len { + if expected[i] != got[i] { + return error('${label}: byte mismatch at offset ${i}') + } + } +} + +fn must_succeed(command string, context string) ! { + res := execute(command) + if res.exit_code != 0 { + return error('${context}\ncommand: ${command}\nexit_code: ${res.exit_code}\n${res.output}') + } +} + +fn shell_quote(s string) string { + return "'${s.replace("'", "'\\''")}'" +} diff --git a/vlib/compress/zlib/interop/zlib_ref.c b/vlib/compress/zlib/interop/zlib_ref.c new file mode 100644 index 00000000000000..4c644bd3bcdb4a --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_ref.c @@ -0,0 +1,150 @@ +#include +#include +#include +#include + +static int rf(const char* p, unsigned char** o, size_t* n) +{ + FILE* f = fopen(p, "rb"); + if (!f) return 1; + if (fseek(f, 0, SEEK_END) != 0) + { + fclose(f); + return 1; + } + long s = ftell(f); + if (s < 0) + { + fclose(f); + return 1; + } + if (fseek(f, 0, SEEK_SET) != 0) + { + fclose(f); + return 1; + } + *n = (size_t)s; + *o = *n ? (unsigned char*)malloc(*n) : NULL; + if (*n && !*o) + { + fclose(f); + return 1; + } + if (*n && fread(*o, 1, *n, f) != *n) + { + free(*o); + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +static int wf(const char* p, const unsigned char* b, size_t n) +{ + FILE* f = fopen(p, "wb"); + if (!f) return 1; + if (n && fwrite(b, 1, n, f) != n) + { + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +int main(int argc, char** argv) +{ + static const unsigned char dummy = 0; + if (argc != 4) + { + fputs("usage: zlib_ref compress|decompress in out\n", stderr); + return 2; + } + unsigned char* in = NULL; + size_t in_n = 0; + if (rf(argv[2], &in, &in_n)) + { + fputs("read error\n", stderr); + return 1; + } + const unsigned char* in_ptr = in_n ? in : &dummy; + if (strcmp(argv[1], "compress") == 0) + { + uLongf out_n = compressBound((uLong)in_n); + unsigned char* out = (unsigned char*)malloc(out_n ? out_n : 1); + if (!out) + { + free(in); + return 1; + } + if (compress2(out, &out_n, in_ptr, (uLong)in_n, Z_DEFAULT_COMPRESSION) != Z_OK) + { + fputs("compress2 failed\n", stderr); + free(in); + free(out); + return 1; + } + if (wf(argv[3], out, (size_t)out_n)) + { + fputs("write error\n", stderr); + free(in); + free(out); + return 1; + } + free(out); + } + else if (strcmp(argv[1], "decompress") == 0) + { + uLongf out_n = in_n * 8 + 64; + if (out_n < 256) out_n = 256; + unsigned char* out = NULL; + int rc = Z_BUF_ERROR; + while (rc == Z_BUF_ERROR) + { + unsigned char* next = (unsigned char*)realloc(out, out_n); + if (!next) + { + free(in); + free(out); + return 1; + } + out = next; + uLongf cap = out_n; + rc = uncompress(out, &cap, in_ptr, (uLong)in_n); + if (rc == Z_OK) + { + out_n = cap; + break; + } + if (rc == Z_BUF_ERROR) + { + out_n *= 2; + if (out_n < 256) out_n = 256; + } + } + if (rc != Z_OK) + { + fputs("uncompress failed\n", stderr); + free(in); + free(out); + return 1; + } + if (wf(argv[3], out, (size_t)out_n)) + { + fputs("write error\n", stderr); + free(in); + free(out); + return 1; + } + free(out); + } + else + { + fputs("unknown mode\n", stderr); + free(in); + return 2; + } + free(in); + return 0; +} diff --git a/vlib/compress/zlib/interop/zlib_ref.py b/vlib/compress/zlib/interop/zlib_ref.py new file mode 100644 index 00000000000000..0235cfa54cea47 --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_ref.py @@ -0,0 +1,19 @@ +import sys, zlib +def rf(path): + with open(path, "rb") as f: + return f.read() +def wf(path, data): + with open(path, "wb") as f: + f.write(data) +if len(sys.argv) != 4: + print("usage: zlib_ref.py compress|decompress in out", file=sys.stderr) + sys.exit(2) +mode, inp, outp = sys.argv[1], sys.argv[2], sys.argv[3] +data = rf(inp) +if mode == "compress": + wf(outp, zlib.compress(data)) +elif mode == "decompress": + wf(outp, zlib.decompress(data)) +else: + print("unknown mode", file=sys.stderr) + sys.exit(2) diff --git a/vlib/compress/zlib/zlib.v b/vlib/compress/zlib/zlib.v index dc1bada94659c3..68dd901bb2be4b 100644 --- a/vlib/compress/zlib/zlib.v +++ b/vlib/compress/zlib/zlib.v @@ -1,17 +1,13 @@ module zlib -import compress as compr +import compress.deflate -// compresses an array of bytes using zlib and returns the compressed bytes in a new array -// Example: b := 'abcdefgh'.repeat(1000).bytes(); cmpr := zlib.compress(b)!; assert cmpr.len < b.len; dc := zlib.decompress(cmpr)!; assert b == dc +// compress compresses data using the zlib container format. pub fn compress(data []u8) ![]u8 { - // flags = TDEFL_WRITE_ZLIB_HEADER (0x01000) - return compr.compress(data, 0x01000) + return deflate.compress_zlib(data) } -// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array -// Example: b := 'abcdefgh'.repeat(1000).bytes(); cmpr := zlib.compress(b)!; assert cmpr.len < b.len; dc := zlib.decompress(cmpr)!; assert b == dc +// decompress decompresses zlib-compressed data. pub fn decompress(data []u8) ![]u8 { - // flags = TINFL_FLAG_PARSE_ZLIB_HEADER (0x1) - return compr.decompress(data, 0x1) + return deflate.decompress_zlib(data) } diff --git a/vlib/compress/zlib/zlib_miniz_compat_names_collision.h b/vlib/compress/zlib/zlib_miniz_compat_names_collision.h deleted file mode 100644 index 850471081fd26a..00000000000000 --- a/vlib/compress/zlib/zlib_miniz_compat_names_collision.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ISSUE_18378_ZLIB_COMPAT_H -#define ISSUE_18378_ZLIB_COMPAT_H - -typedef void *voidpf; -typedef unsigned int uInt; - -typedef struct issue_18378_z_stream_s { - int dummy; -} z_stream; - -typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); - -#endif diff --git a/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v b/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v deleted file mode 100644 index 101ac73ae7b57c..00000000000000 --- a/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v +++ /dev/null @@ -1,7 +0,0 @@ -module zlib - -#include "@DIR/zlib_miniz_compat_names_collision.h" - -fn test_issue_18378_multiple_headers_do_not_conflict() { - assert true -} diff --git a/vlib/compress/zlib/zlib_test.v b/vlib/compress/zlib/zlib_test.v index 73338ae17c152b..af3cf67aa17255 100644 --- a/vlib/compress/zlib/zlib_test.v +++ b/vlib/compress/zlib/zlib_test.v @@ -1,8 +1,67 @@ module zlib -fn test_zlib() { - uncompressed := 'Hello world!' - compressed := compress(uncompressed.bytes())! +import encoding.hex + +fn must_decode_hex(s string) []u8 { + return hex.decode(s) or { panic(err) } +} + +fn assert_decompress_error(data []u8, reason string) ! { + decompress(data) or { + assert err.msg() == reason + return + } + return error('did not error') +} + +fn test_zlib_roundtrip_text() { + data := 'Hello world!'.bytes() + compressed := compress(data)! + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_roundtrip_empty() { + data := []u8{} + compressed := compress(data)! + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_roundtrip_binary() { + data := [u8(0), 1, 2, 3, 127, 128, 254, 255] + compressed := compress(data)! decompressed := decompress(compressed)! - assert decompressed == uncompressed.bytes() + assert decompressed == data +} + +fn test_zlib_roundtrip_large() { + data := 'abcdefgh'.repeat(1000).bytes() + compressed := compress(data)! + assert compressed.len < data.len + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_decompress_known_python_vector() { + compressed := must_decode_hex('789ccb48cdc9c95728cf2fca49e102001e720467') + decompressed := decompress(compressed)! + assert decompressed == 'hello world\n'.bytes() +} + +fn test_zlib_invalid_too_short() { + assert_decompress_error([]u8{}, 'invalid zlib stream: too short')! +} + +fn test_zlib_invalid_header_checksum() { + assert_decompress_error([u8(0x78), 0x9d, 0x00, 0x00, 0x00, 0x01], + 'invalid zlib stream: bad header checksum')! +} + +fn test_zlib_invalid_truncated_payload() { + decompress([u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01]) or { + assert err.msg().contains('unexpected end of stream') + return + } + assert false }