refactor: change simple8b API to use binary encoding

pull/24376/head
Edd Robinson 2019-12-10 19:27:58 +00:00
parent fb83e9c7fa
commit e9db04292c
4 changed files with 46 additions and 37 deletions

View File

@ -1 +1,3 @@
mod rle;
mod simple8b;
mod timestamp;

View File

@ -1,11 +1,11 @@
use integer_encoding::*;
/// encode encodes the value v, delta and count into dst.
/// encode_all encodes the value v, delta and count into dst.
///
/// v should be the first element of a sequence, delta the difference that each
/// value in the sequence differs by, and count the total number of values in the
/// sequence.
fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
pub fn encode_all(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
let max_var_int_size = 10; // max number of bytes needed to store var int
// Keep a byte back for the scaler.
@ -46,8 +46,8 @@ fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n);
}
/// decode decodes an RLE encoded slice into the destination vector.
fn decode(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> {
/// decode_all decodes an RLE encoded slice into the destination vector.
pub fn decode_all(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> {
if src.len() < 9 {
return Err("not enough data to decode using RLE");
}
@ -92,10 +92,10 @@ mod tests {
fn test_encode_rle() {
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
let mut dst = Vec::with_capacity(100);
encode(100, 2000, 8, &mut dst);
encode_all(100, 2000, 8, &mut dst);
let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode");
decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp);
}
@ -103,10 +103,10 @@ mod tests {
fn test_encode_rle_no_cap() {
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
let mut dst = vec![];
encode(100, 2000, 8, &mut dst);
encode_all(100, 2000, 8, &mut dst);
let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode");
decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp);
}
@ -114,10 +114,10 @@ mod tests {
fn test_encode_rle_small() {
let exp = vec![22222222, 22222222];
let mut dst = vec![];
encode(22222222, 0, 2, &mut dst);
encode_all(22222222, 0, 2, &mut dst);
let mut got = Vec::with_capacity(0);
decode(&dst, &mut got).expect("failed to RLE decode");
decode_all(&dst, &mut got).expect("failed to RLE decode");
assert_eq!(got, exp);
}
}

View File

@ -1,9 +1,13 @@
use std::error::Error;
//
// Original Author: Stuart Carnie
// https://github.com/stuartcarnie/rust-encoding
// Adapted from https://github.com/stuartcarnie/rust-encoding
//
const S8B_BIT_SIZE: usize = 60;
// maximum value that can be encoded.
pub const MAX_VALUE: u64 = (1 << 60) - 1;
const NUM_BITS: [[u8; 2]; 14] = [
[60, 1],
[30, 2],
@ -21,8 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
[1, 60],
];
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> {
let mut j = 0;
/// encode_all packs and binary encodes the provides slice of u64 values using
/// simple8b into the provided vector.
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<Error>> {
let mut i = 0;
'next_value: while i < src.len() {
// try to pack a run of 240 or 120 1s
@ -38,13 +43,11 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
let k = a.iter().take_while(|x| **x == 1).count();
if k == 240 {
i += 240;
dst.push(0);
j += 1;
dst.resize(dst.len() + 8, 0);
continue;
} else if k >= 120 {
i += 120;
dst.push(1 << 60);
j += 1;
dst.extend_from_slice(&(1u64 << 60).to_be_bytes());
continue;
}
}
@ -67,27 +70,30 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
break;
}
}
dst.push(val);
j += 1;
dst.extend_from_slice(&val.to_be_bytes());
i += int_n;
continue 'next_value;
}
return Err("value out of bounds");
return Err(From::from("value out of bounds"));
}
dst.truncate(j);
Ok(())
}
pub fn decode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> {
/// decode_all decodes and unpacks the binary-encoded values stored in src into
/// dst.
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
let mut i = 0;
let mut j = 0;
for (_, v) in src.iter().enumerate() {
let mut buf: [u8; 8] = [0; 8];
while i < src.len() {
if dst.len() < j + 240 {
dst.resize(j + 240, 0); // may need 240 capacity
}
j += decode(*v, &mut dst[j..])
buf.copy_from_slice(&src[i..i + 8]);
j += decode(u64::from_be_bytes(buf), &mut dst[j..]);
i += 8;
}
dst.truncate(j);
Ok(())
}
pub fn decode(v: u64, dst: &mut [u64]) -> usize {

View File

@ -5,13 +5,13 @@ use rand::{Rng, SeedableRng};
#[test]
fn test_encode_no_values() {
let src = vec![];
let mut dst = vec![0u64; 1000];
let mut dst = vec![];
// check for error
encode_all(&src, &mut dst).expect("failed to encode src");
// verify encoded no values.
assert_eq!(dst, src)
assert_eq!(dst.len(), src.len())
}
#[test]
@ -21,9 +21,8 @@ fn test_encode_mixed_sizes() {
let mut encoded = vec![];
let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 2); // verify vector is truncated.
//
decode_all(&encoded, &mut decoded).expect("failed to decode");
assert_eq!(encoded.len(), 16); // verify vector is truncated.
decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
}
@ -32,8 +31,10 @@ fn test_encode_too_big() {
let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1];
let mut encoded = vec![];
let e = encode_all(&src, &mut encoded).expect_err("encoding did not fail");
assert_eq!(e, "value out of bounds");
match encode_all(&src, &mut encoded) {
Ok(_) => assert!(false), // TODO(edd): fix this silly assertion
Err(_) => (),
}
}
#[test]
@ -112,7 +113,7 @@ fn test_encode_all() {
let mut encoded = vec![];
encode_all(&test.input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode");
decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
}
@ -125,7 +126,7 @@ fn test_encode_all() {
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode");
decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
input = ones(240)();
@ -134,7 +135,7 @@ fn test_encode_all() {
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode");
decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
input = ones(241)();
@ -143,7 +144,7 @@ fn test_encode_all() {
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded).expect("failed to decode");
decode_all(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
}