refactor: change simple8b API to use binary encoding
parent
fb83e9c7fa
commit
e9db04292c
|
@ -1 +1,3 @@
|
||||||
|
mod rle;
|
||||||
mod simple8b;
|
mod simple8b;
|
||||||
|
mod timestamp;
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
use integer_encoding::*;
|
use integer_encoding::*;
|
||||||
|
|
||||||
/// encode encodes the value v, delta and count into dst.
|
/// encode_all encodes the value v, delta and count into dst.
|
||||||
///
|
///
|
||||||
/// v should be the first element of a sequence, delta the difference that each
|
/// v should be the first element of a sequence, delta the difference that each
|
||||||
/// value in the sequence differs by, and count the total number of values in the
|
/// value in the sequence differs by, and count the total number of values in the
|
||||||
/// sequence.
|
/// sequence.
|
||||||
fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
|
pub fn encode_all(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
|
||||||
let max_var_int_size = 10; // max number of bytes needed to store var int
|
let max_var_int_size = 10; // max number of bytes needed to store var int
|
||||||
|
|
||||||
// Keep a byte back for the scaler.
|
// Keep a byte back for the scaler.
|
||||||
|
@ -46,8 +46,8 @@ fn encode(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
|
||||||
dst.truncate(n);
|
dst.truncate(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// decode decodes an RLE encoded slice into the destination vector.
|
/// decode_all decodes an RLE encoded slice into the destination vector.
|
||||||
fn decode(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> {
|
pub fn decode_all(src: &[u8], dst: &mut Vec<u64>) -> Result<(), &'static str> {
|
||||||
if src.len() < 9 {
|
if src.len() < 9 {
|
||||||
return Err("not enough data to decode using RLE");
|
return Err("not enough data to decode using RLE");
|
||||||
}
|
}
|
||||||
|
@ -92,10 +92,10 @@ mod tests {
|
||||||
fn test_encode_rle() {
|
fn test_encode_rle() {
|
||||||
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
|
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
|
||||||
let mut dst = Vec::with_capacity(100);
|
let mut dst = Vec::with_capacity(100);
|
||||||
encode(100, 2000, 8, &mut dst);
|
encode_all(100, 2000, 8, &mut dst);
|
||||||
|
|
||||||
let mut got = Vec::with_capacity(0);
|
let mut got = Vec::with_capacity(0);
|
||||||
decode(&dst, &mut got).expect("failed to RLE decode");
|
decode_all(&dst, &mut got).expect("failed to RLE decode");
|
||||||
assert_eq!(got, exp);
|
assert_eq!(got, exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,10 +103,10 @@ mod tests {
|
||||||
fn test_encode_rle_no_cap() {
|
fn test_encode_rle_no_cap() {
|
||||||
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
|
let exp = vec![100, 2100, 4100, 6100, 8100, 10100, 12100, 14100];
|
||||||
let mut dst = vec![];
|
let mut dst = vec![];
|
||||||
encode(100, 2000, 8, &mut dst);
|
encode_all(100, 2000, 8, &mut dst);
|
||||||
|
|
||||||
let mut got = Vec::with_capacity(0);
|
let mut got = Vec::with_capacity(0);
|
||||||
decode(&dst, &mut got).expect("failed to RLE decode");
|
decode_all(&dst, &mut got).expect("failed to RLE decode");
|
||||||
assert_eq!(got, exp);
|
assert_eq!(got, exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,10 +114,10 @@ mod tests {
|
||||||
fn test_encode_rle_small() {
|
fn test_encode_rle_small() {
|
||||||
let exp = vec![22222222, 22222222];
|
let exp = vec![22222222, 22222222];
|
||||||
let mut dst = vec![];
|
let mut dst = vec![];
|
||||||
encode(22222222, 0, 2, &mut dst);
|
encode_all(22222222, 0, 2, &mut dst);
|
||||||
|
|
||||||
let mut got = Vec::with_capacity(0);
|
let mut got = Vec::with_capacity(0);
|
||||||
decode(&dst, &mut got).expect("failed to RLE decode");
|
decode_all(&dst, &mut got).expect("failed to RLE decode");
|
||||||
assert_eq!(got, exp);
|
assert_eq!(got, exp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Original Author: Stuart Carnie
|
// Adapted from https://github.com/stuartcarnie/rust-encoding
|
||||||
// https://github.com/stuartcarnie/rust-encoding
|
|
||||||
//
|
//
|
||||||
const S8B_BIT_SIZE: usize = 60;
|
const S8B_BIT_SIZE: usize = 60;
|
||||||
|
|
||||||
|
// maximum value that can be encoded.
|
||||||
|
pub const MAX_VALUE: u64 = (1 << 60) - 1;
|
||||||
|
|
||||||
const NUM_BITS: [[u8; 2]; 14] = [
|
const NUM_BITS: [[u8; 2]; 14] = [
|
||||||
[60, 1],
|
[60, 1],
|
||||||
[30, 2],
|
[30, 2],
|
||||||
|
@ -21,8 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
|
||||||
[1, 60],
|
[1, 60],
|
||||||
];
|
];
|
||||||
|
|
||||||
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> {
|
/// encode_all packs and binary encodes the provides slice of u64 values using
|
||||||
let mut j = 0;
|
/// simple8b into the provided vector.
|
||||||
|
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<Error>> {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
'next_value: while i < src.len() {
|
'next_value: while i < src.len() {
|
||||||
// try to pack a run of 240 or 120 1s
|
// try to pack a run of 240 or 120 1s
|
||||||
|
@ -38,13 +43,11 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
|
||||||
let k = a.iter().take_while(|x| **x == 1).count();
|
let k = a.iter().take_while(|x| **x == 1).count();
|
||||||
if k == 240 {
|
if k == 240 {
|
||||||
i += 240;
|
i += 240;
|
||||||
dst.push(0);
|
dst.resize(dst.len() + 8, 0);
|
||||||
j += 1;
|
|
||||||
continue;
|
continue;
|
||||||
} else if k >= 120 {
|
} else if k >= 120 {
|
||||||
i += 120;
|
i += 120;
|
||||||
dst.push(1 << 60);
|
dst.extend_from_slice(&(1u64 << 60).to_be_bytes());
|
||||||
j += 1;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,27 +70,30 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dst.push(val);
|
dst.extend_from_slice(&val.to_be_bytes());
|
||||||
j += 1;
|
|
||||||
i += int_n;
|
i += int_n;
|
||||||
continue 'next_value;
|
continue 'next_value;
|
||||||
}
|
}
|
||||||
return Err("value out of bounds");
|
return Err(From::from("value out of bounds"));
|
||||||
}
|
}
|
||||||
dst.truncate(j);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_all<'a>(src: &[u64], dst: &'a mut Vec<u64>) -> Result<(), &'static str> {
|
/// decode_all decodes and unpacks the binary-encoded values stored in src into
|
||||||
|
/// dst.
|
||||||
|
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
|
||||||
|
let mut i = 0;
|
||||||
let mut j = 0;
|
let mut j = 0;
|
||||||
for (_, v) in src.iter().enumerate() {
|
let mut buf: [u8; 8] = [0; 8];
|
||||||
|
while i < src.len() {
|
||||||
if dst.len() < j + 240 {
|
if dst.len() < j + 240 {
|
||||||
dst.resize(j + 240, 0); // may need 240 capacity
|
dst.resize(j + 240, 0); // may need 240 capacity
|
||||||
}
|
}
|
||||||
j += decode(*v, &mut dst[j..])
|
buf.copy_from_slice(&src[i..i + 8]);
|
||||||
|
j += decode(u64::from_be_bytes(buf), &mut dst[j..]);
|
||||||
|
i += 8;
|
||||||
}
|
}
|
||||||
dst.truncate(j);
|
dst.truncate(j);
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode(v: u64, dst: &mut [u64]) -> usize {
|
pub fn decode(v: u64, dst: &mut [u64]) -> usize {
|
||||||
|
|
|
@ -5,13 +5,13 @@ use rand::{Rng, SeedableRng};
|
||||||
#[test]
|
#[test]
|
||||||
fn test_encode_no_values() {
|
fn test_encode_no_values() {
|
||||||
let src = vec![];
|
let src = vec![];
|
||||||
let mut dst = vec![0u64; 1000];
|
let mut dst = vec![];
|
||||||
|
|
||||||
// check for error
|
// check for error
|
||||||
encode_all(&src, &mut dst).expect("failed to encode src");
|
encode_all(&src, &mut dst).expect("failed to encode src");
|
||||||
|
|
||||||
// verify encoded no values.
|
// verify encoded no values.
|
||||||
assert_eq!(dst, src)
|
assert_eq!(dst.len(), src.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -21,9 +21,8 @@ fn test_encode_mixed_sizes() {
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
let mut decoded = vec![];
|
let mut decoded = vec![];
|
||||||
encode_all(&src, &mut encoded).expect("failed to encode");
|
encode_all(&src, &mut encoded).expect("failed to encode");
|
||||||
assert_eq!(encoded.len(), 2); // verify vector is truncated.
|
assert_eq!(encoded.len(), 16); // verify vector is truncated.
|
||||||
//
|
decode_all(&encoded, &mut decoded);
|
||||||
decode_all(&encoded, &mut decoded).expect("failed to decode");
|
|
||||||
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
|
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,8 +31,10 @@ fn test_encode_too_big() {
|
||||||
let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1];
|
let src = vec![7, 6, 2 << 61 - 1, 4, 3, 2, 1];
|
||||||
|
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
let e = encode_all(&src, &mut encoded).expect_err("encoding did not fail");
|
match encode_all(&src, &mut encoded) {
|
||||||
assert_eq!(e, "value out of bounds");
|
Ok(_) => assert!(false), // TODO(edd): fix this silly assertion
|
||||||
|
Err(_) => (),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -112,7 +113,7 @@ fn test_encode_all() {
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
encode_all(&test.input, &mut encoded).expect("failed to encode");
|
encode_all(&test.input, &mut encoded).expect("failed to encode");
|
||||||
let mut decoded = vec![];
|
let mut decoded = vec![];
|
||||||
decode_all(&encoded, &mut decoded).expect("failed to decode");
|
decode_all(&encoded, &mut decoded);
|
||||||
assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
|
assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,7 +126,7 @@ fn test_encode_all() {
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||||
let mut decoded = vec![];
|
let mut decoded = vec![];
|
||||||
decode_all(&encoded, &mut decoded).expect("failed to decode");
|
decode_all(&encoded, &mut decoded);
|
||||||
assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
|
assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
|
||||||
|
|
||||||
input = ones(240)();
|
input = ones(240)();
|
||||||
|
@ -134,7 +135,7 @@ fn test_encode_all() {
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||||
let mut decoded = vec![];
|
let mut decoded = vec![];
|
||||||
decode_all(&encoded, &mut decoded).expect("failed to decode");
|
decode_all(&encoded, &mut decoded);
|
||||||
assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
|
assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
|
||||||
|
|
||||||
input = ones(241)();
|
input = ones(241)();
|
||||||
|
@ -143,7 +144,7 @@ fn test_encode_all() {
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||||
let mut decoded = vec![];
|
let mut decoded = vec![];
|
||||||
decode_all(&encoded, &mut decoded).expect("failed to decode");
|
decode_all(&encoded, &mut decoded);
|
||||||
assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
|
assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue