refactor: change simple8b API to use binary encoding

pull/24376/head
Edd Robinson 2019-12-10 19:27:58 +00:00
parent fb83e9c7fa
commit e9db04292c
4 changed files with 46 additions and 37 deletions

View File

@ -1 +1,3 @@
mod rle;
mod simple8b; mod simple8b;
mod timestamp;

View File

@ -1,11 +1,11 @@
use integer_encoding::*; use integer_encoding::*;
/// encode encodes the value v, delta and count into dst. /// encode_all encodes the value v, delta and count into dst.
/// ///
/// v should be the first element of a sequence, delta the difference that each /// v should be the first element of a sequence, delta the difference that each
/// value in the sequence differs by, and count the total number of values in the /// value in the sequence differs by, and count the total number of values in the
/// sequence. /// sequence.
fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) { pub fn encode_all(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
let max_var_int_size = 10; // max number of bytes needed to store var int let max_var_int_size = 10; // max number of bytes needed to store var int
// Keep a byte back for the scaler. // Keep a byte back for the scaler.
@ -46,8 +46,8 @@ fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n); dst.truncate(n);
} }
/// decode decodes an RLE encoded slice into the destination vector. /// decode_all decodes an RLE encoded slice into the destination vector.
fn decode(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> { pub fn decode_all(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> {
if src.len() < 9 { if src.len() < 9 {
return Err("not enough data to decode using RLE"); return Err("not enough data to decode using RLE");
} }
@ -92,10 +92,10 @@ mod tests {
fn test_encode_rle() { fn test_encode_rle() {
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100]; let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
let mut dst = Vec::with_capacity(100); let mut dst = Vec::with_capacity(100);
encode(100, 2000, 8, &mut dst); encode_all(100, 2000, 8, &mut dst);
let mut got = Vec::with_capacity(0); let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode"); decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp); assert_eq!(got, exp);
} }
@ -103,10 +103,10 @@ mod tests {
fn test_encode_rle_no_cap() { fn test_encode_rle_no_cap() {
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100]; let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
let mut dst = vec![]; let mut dst = vec![];
encode(100, 2000, 8, &mut dst); encode_all(100, 2000, 8, &mut dst);
let mut got = Vec::with_capacity(0); let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode"); decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp); assert_eq!(got, exp);
} }
@ -114,10 +114,10 @@ mod tests {
fn test_encode_rle_small() { fn test_encode_rle_small() {
let exp = vec![22222222, 22222222]; let exp = vec![22222222, 22222222];
let mut dst = vec![]; let mut dst = vec![];
encode(22222222, 0, 2, &mut dst); encode_all(22222222, 0, 2, &mut dst);
let mut got = Vec::with_capacity(0); let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode"); decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp); assert_eq!(got, exp);
} }
} }

View File

@ -1,9 +1,13 @@
use std::error::Error;
// //
// Original Author: Stuart Carnie // Adapted from https://github.com/stuartcarnie/rust-encoding
// https://github.com/stuartcarnie/rust-encoding
// //
const S8B_BIT_SIZE: usize = 60; const S8B_BIT_SIZE: usize = 60;
// maximum value that can be encoded.
pub const MAX_VALUE: u64 = (1 << 60) - 1;
const NUM_BITS: [[u8; 2]; 14] = [ const NUM_BITS: [[u8; 2]; 14] = [
[60, 1], [60, 1],
[30, 2], [30, 2],
@ -21,8 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
[1, 60], [1, 60],
]; ];
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> { /// encode_all packs and binary encodes the provides slice of u64 values using
let mut j = 0; /// simple8b into the provided vector.
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<Error>> {
let mut i = 0; let mut i = 0;
'next_value: while i < src.len() { 'next_value: while i < src.len() {
// try to pack a run of 240 or 120 1s // try to pack a run of 240 or 120 1s
@ -38,13 +43,11 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
let k = a.iter().take_while(|x| **x == 1).count(); let k = a.iter().take_while(|x| **x == 1).count();
if k == 240 { if k == 240 {
i += 240; i += 240;
dst.push(0); dst.resize(dst.len() + 8, 0);
j += 1;
continue; continue;
} else if k >= 120 { } else if k >= 120 {
i += 120; i += 120;
dst.push(1 << 60); dst.extend_from_slice(&(1u64 << 60).to_be_bytes());
j += 1;
continue; continue;
} }
} }
@ -67,27 +70,30 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
break; break;
} }
} }
dst.push(val); dst.extend_from_slice(&val.to_be_bytes());
j += 1;
i += int_n; i += int_n;
continue 'next_value; continue 'next_value;
} }
return Err("value out of bounds"); return Err(From::from("value out of bounds"));
} }
dst.truncate(j);
Ok(()) Ok(())
} }
pub fn decode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> { /// decode_all decodes and unpacks the binary-encoded values stored in src into
/// dst.
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
let mut i = 0;
let mut j = 0; let mut j = 0;
for (_, v) in src.iter().enumerate() { let mut buf: [u8; 8] = [0; 8];
while i < src.len() {
if dst.len() < j + 240 { if dst.len() < j + 240 {
dst.resize(j + 240, 0); // may need 240 capacity dst.resize(j + 240, 0); // may need 240 capacity
} }
j += decode(*v, &mut dst[j..]) buf.copy_from_slice(&src[i..i + 8]);
j += decode(u64::from_be_bytes(buf), &mut dst[j..]);
i += 8;
} }
dst.truncate(j); dst.truncate(j);
Ok(())
} }
pub fn decode(v: u64, dst: &mut [u64]) -> usize { pub fn decode(v: u64, dst: &mut [u64]) -> usize {

View File

@ -5,13 +5,13 @@ use rand::{Rng, SeedableRng};
#[test] #[test]
fn test_encode_no_values() { fn test_encode_no_values() {
let src = vec![]; let src = vec![];
let mut dst = vec![0u64; 1000]; let mut dst = vec![];
// check for error // check for error
encode_all(&src, &mut dst).expect("failed to encode src"); encode_all(&src, &mut dst).expect("failed to encode src");
// verify encoded no values. // verify encoded no values.
assert_eq!(dst, src) assert_eq!(dst.len(), src.len())
} }
#[test] #[test]
@ -21,9 +21,8 @@ fn test_encode_mixed_sizes() {
let mut encoded = vec![]; let mut encoded = vec![];
let mut decoded = vec![]; let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode"); encode_all(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 2); // verify vector is truncated. assert_eq!(encoded.len(), 16); // verify vector is truncated.
// decode_all(&encoded, &mut decoded);
decode_all(&encoded, &mut decoded).expect("failed to decode");
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes"); assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
} }
@ -32,8 +31,10 @@ fn test_encode_too_big() {
let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1]; let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1];
let mut encoded = vec![]; let mut encoded = vec![];
let e = encode_all(&src, &mut encoded).expect_err("encoding did not fail"); match encode_all(&src, &mut encoded) {
assert_eq!(e, "value out of bounds"); Ok(_) => assert!(false), // TODO(edd): fix this silly assertion
Err(_) => (),
}
} }
#[test] #[test]
@ -112,7 +113,7 @@ fn test_encode_all() {
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&test.input, &mut encoded).expect("failed to encode"); encode_all(&test.input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode"); decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), test.input, "{}", test.name); assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
} }
@ -125,7 +126,7 @@ fn test_encode_all() {
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode"); decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "120 ones"); assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
input = ones(240)(); input = ones(240)();
@ -134,7 +135,7 @@ fn test_encode_all() {
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode"); decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "119 ones"); assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
input = ones(241)(); input = ones(241)();
@ -143,7 +144,7 @@ fn test_encode_all() {
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode"); decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "239 ones"); assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
} }