From e9db04292cd3c049775d04071b87fc74a8fc77a3 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 10 Dec 2019 19:27:58 +0000 Subject: [PATCH] refactor: change simple8b API to use binary encoding --- src/encoders/mod.rs | 2 ++ src/encoders/rle/mod.rs | 20 +++++++++--------- src/encoders/simple8b/mod.rs | 38 ++++++++++++++++++++-------------- src/encoders/simple8b/tests.rs | 23 ++++++++++---------- 4 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/encoders/mod.rs b/src/encoders/mod.rs index 0863ededfa..8a0a5bce3f 100644 --- a/src/encoders/mod.rs +++ b/src/encoders/mod.rs @@ -1 +1,3 @@ +mod rle; mod simple8b; +mod timestamp; diff --git a/src/encoders/rle/mod.rs b/src/encoders/rle/mod.rs index 65063bc579..f3633130d9 100644 --- a/src/encoders/rle/mod.rs +++ b/src/encoders/rle/mod.rs @@ -1,11 +1,11 @@ use integer_encoding::*; -/// encode encodes the value v, delta and count into dst. +/// encode_all encodes the value v, delta and count into dst. /// /// v should be the first element of a sequence, delta the difference that each /// value in the sequence differs by, and count the total number of values in the /// sequence. -fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec) { +pub fn encode_all(v: u64, delta: u64, count: u64, dst: &mut Vec) { let max_var_int_size = 10; // max number of bytes needed to store var int // Keep a byte back for the scaler. @@ -46,8 +46,8 @@ fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec) { dst.truncate(n); } -/// decode decodes an RLE encoded slice into the destination vector. -fn decode(src: &[u8], dst: &mut Vec) -> Result<(), &'static str> { +/// decode_all decodes an RLE encoded slice into the destination vector. +pub fn decode_all(src: &[u8], dst: &mut Vec) -> Result<(), &'static str> { if src.len() < 9 { return Err("not enough data to decode using RLE"); } @@ -92,10 +92,10 @@ mod tests { fn test_encode_rle() { let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100]; let mut dst = Vec::with_capacity(100); - encode(100, 2000, 8, &mut dst); + encode_all(100, 2000, 8, &mut dst); let mut got = Vec::with_capacity(0); - decode(&dst, &mut got).expect("failed to RLE decode"); + decode_all(&dst, &mut got).expect("failed to RLE decode"); assert_eq!(got, exp); } @@ -103,10 +103,10 @@ mod tests { fn test_encode_rle_no_cap() { let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100]; let mut dst = vec![]; - encode(100, 2000, 8, &mut dst); + encode_all(100, 2000, 8, &mut dst); let mut got = Vec::with_capacity(0); - decode(&dst, &mut got).expect("failed to RLE decode"); + decode_all(&dst, &mut got).expect("failed to RLE decode"); assert_eq!(got, exp); } @@ -114,10 +114,10 @@ mod tests { fn test_encode_rle_small() { let exp = vec![22222222, 22222222]; let mut dst = vec![]; - encode(22222222, 0, 2, &mut dst); + encode_all(22222222, 0, 2, &mut dst); let mut got = Vec::with_capacity(0); - decode(&dst, &mut got).expect("failed to RLE decode"); + decode_all(&dst, &mut got).expect("failed to RLE decode"); assert_eq!(got, exp); } } diff --git a/src/encoders/simple8b/mod.rs b/src/encoders/simple8b/mod.rs index c719039e16..47fac7e1c0 100644 --- a/src/encoders/simple8b/mod.rs +++ b/src/encoders/simple8b/mod.rs @@ -1,9 +1,13 @@ +use std::error::Error; + // -// Original Author: Stuart Carnie -// https://github.com/stuartcarnie/rust-encoding +// Adapted from https://github.com/stuartcarnie/rust-encoding // const S8B_BIT_SIZE: usize = 60; +// maximum value that can be encoded. +pub const MAX_VALUE: u64 = (1 << 60) - 1; + const NUM_BITS: [[u8; 2]; 14] = [ [60, 1], [30, 2], @@ -21,8 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [ [1, 60], ]; -pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec) -> Result<(), &'static str> { - let mut j = 0; +/// encode_all packs and binary encodes the provides slice of u64 values using +/// simple8b into the provided vector. +pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec) -> Result<(), Box> { let mut i = 0; 'next_value: while i < src.len() { // try to pack a run of 240 or 120 1s @@ -38,13 +43,11 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec) -> Result<(), &'static let k = a.iter().take_while(|x| **x == 1).count(); if k == 240 { i += 240; - dst.push(0); - j += 1; + dst.resize(dst.len() + 8, 0); continue; } else if k >= 120 { i += 120; - dst.push(1 << 60); - j += 1; + dst.extend_from_slice(&(1u64 << 60).to_be_bytes()); continue; } } @@ -67,27 +70,30 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec) -> Result<(), &'static break; } } - dst.push(val); - j += 1; + dst.extend_from_slice(&val.to_be_bytes()); i += int_n; continue 'next_value; } - return Err("value out of bounds"); + return Err(From::from("value out of bounds")); } - dst.truncate(j); Ok(()) } -pub fn decode_all<'a>(src: &[u64], dst: &'a mut Vec) -> Result<(), &'static str> { +/// decode_all decodes and unpacks the binary-encoded values stored in src into +/// dst. +pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec) { + let mut i = 0; let mut j = 0; - for (_, v) in src.iter().enumerate() { + let mut buf: [u8; 8] = [0; 8]; + while i < src.len() { if dst.len() < j + 240 { dst.resize(j + 240, 0); // may need 240 capacity } - j += decode(*v, &mut dst[j..]) + buf.copy_from_slice(&src[i..i + 8]); + j += decode(u64::from_be_bytes(buf), &mut dst[j..]); + i += 8; } dst.truncate(j); - Ok(()) } pub fn decode(v: u64, dst: &mut [u64]) -> usize { diff --git a/src/encoders/simple8b/tests.rs b/src/encoders/simple8b/tests.rs index dcb2883de5..4bea86fcec 100644 --- a/src/encoders/simple8b/tests.rs +++ b/src/encoders/simple8b/tests.rs @@ -5,13 +5,13 @@ use rand::{Rng, SeedableRng}; #[test] fn test_encode_no_values() { let src = vec![]; - let mut dst = vec![0u64; 1000]; + let mut dst = vec![]; // check for error encode_all(&src, &mut dst).expect("failed to encode src"); // verify encoded no values. - assert_eq!(dst, src) + assert_eq!(dst.len(), src.len()) } #[test] @@ -21,9 +21,8 @@ fn test_encode_mixed_sizes() { let mut encoded = vec![]; let mut decoded = vec![]; encode_all(&src, &mut encoded).expect("failed to encode"); - assert_eq!(encoded.len(), 2); // verify vector is truncated. - // - decode_all(&encoded, &mut decoded).expect("failed to decode"); + assert_eq!(encoded.len(), 16); // verify vector is truncated. + decode_all(&encoded, &mut decoded); assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes"); } @@ -32,8 +31,10 @@ fn test_encode_too_big() { let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1]; let mut encoded = vec![]; - let e = encode_all(&src, &mut encoded).expect_err("encoding did not fail"); - assert_eq!(e, "value out of bounds"); + match encode_all(&src, &mut encoded) { + Ok(_) => assert!(false), // TODO(edd): fix this silly assertion + Err(_) => (), + } } #[test] @@ -112,7 +113,7 @@ fn test_encode_all() { let mut encoded = vec![]; encode_all(&test.input, &mut encoded).expect("failed to encode"); let mut decoded = vec![]; - decode_all(&encoded, &mut decoded).expect("failed to decode"); + decode_all(&encoded, &mut decoded); assert_eq!(decoded.to_vec(), test.input, "{}", test.name); } @@ -125,7 +126,7 @@ fn test_encode_all() { let mut encoded = vec![]; encode_all(&input, &mut encoded).expect("failed to encode"); let mut decoded = vec![]; - decode_all(&encoded, &mut decoded).expect("failed to decode"); + decode_all(&encoded, &mut decoded); assert_eq!(decoded.to_vec(), input, "{}", "120 ones"); input = ones(240)(); @@ -134,7 +135,7 @@ fn test_encode_all() { let mut encoded = vec![]; encode_all(&input, &mut encoded).expect("failed to encode"); let mut decoded = vec![]; - decode_all(&encoded, &mut decoded).expect("failed to decode"); + decode_all(&encoded, &mut decoded); assert_eq!(decoded.to_vec(), input, "{}", "119 ones"); input = ones(241)(); @@ -143,7 +144,7 @@ fn test_encode_all() { let mut encoded = vec![]; encode_all(&input, &mut encoded).expect("failed to encode"); let mut decoded = vec![]; - decode_all(&encoded, &mut decoded).expect("failed to decode"); + decode_all(&encoded, &mut decoded); assert_eq!(decoded.to_vec(), input, "{}", "239 ones"); }