mirror of https://github.com/milvus-io/milvus.git
Remove unused dependency (#12534)
Signed-off-by: Xiangyu Wang <xiangyu.wang@zilliz.com>pull/12584/head
parent
474f468897
commit
a37a1062e1
1
Makefile
1
Makefile
|
@ -100,7 +100,6 @@ build-cpp:
|
|||
@echo "Building Milvus cpp library ..."
|
||||
@(env bash $(PWD)/scripts/core_build.sh -f "$(CUSTOM_THIRDPARTY_PATH)")
|
||||
@(env bash $(PWD)/scripts/cwrapper_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
|
||||
@(env bash $(PWD)/scripts/cwrapper_dablooms_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
|
||||
@(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
|
||||
|
||||
build-cpp-with-unittest:
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
output
|
||||
cmake-build-debug
|
||||
.idea
|
||||
cmake_build
|
|
@ -1,34 +0,0 @@
|
|||
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
cmake_minimum_required( VERSION 3.18 )
|
||||
project(dablooms)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
add_library(dablooms STATIC dablooms.cpp murmur.cpp)
|
||||
target_include_directories(dablooms
|
||||
PUBLIC
|
||||
${PROJECT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_sources(dablooms PUBLIC dablooms.cpp murmur.cpp
|
||||
|
||||
)
|
||||
set_target_properties( dablooms PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
# target_link_libraries(dablooms PUBLIC dablooms)
|
||||
|
||||
if(NOT CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
install(TARGETS dablooms DESTINATION ${CMAKE_INSTALL_PREFIX})
|
|
@ -1,17 +0,0 @@
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -1,74 +0,0 @@
|
|||
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
SOURCE=${BASH_SOURCE[0]}
|
||||
while [ -h $SOURCE ]; do # resolve $SOURCE until the file is no longer a symlink
|
||||
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
|
||||
SOURCE=$(readlink $SOURCE)
|
||||
[[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
|
||||
done
|
||||
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
|
||||
# echo $DIR
|
||||
|
||||
CMAKE_BUILD=${DIR}/cmake_build
|
||||
OUTPUT_LIB=${DIR}/output
|
||||
|
||||
if [ ! -d ${CMAKE_BUILD} ];then
|
||||
mkdir ${CMAKE_BUILD}
|
||||
fi
|
||||
|
||||
if [ -d ${OUTPUT_LIB} ];then
|
||||
rm -rf ${OUTPUT_LIB}
|
||||
fi
|
||||
mkdir ${OUTPUT_LIB}
|
||||
|
||||
BUILD_TYPE="Debug"
|
||||
CUSTOM_THIRDPARTY_PATH=""
|
||||
|
||||
while getopts "t:h:f:" arg; do
|
||||
case $arg in
|
||||
f)
|
||||
CUSTOM_THIRDPARTY_PATH=$OPTARG
|
||||
;;
|
||||
t)
|
||||
BUILD_TYPE=$OPTARG # BUILD_TYPE
|
||||
;;
|
||||
h) # help
|
||||
echo "-t: build type(default: Debug)
|
||||
-f: custom thirdparty path(default:)
|
||||
-h: help
|
||||
"
|
||||
exit 0
|
||||
;;
|
||||
?)
|
||||
echo "ERROR! unknown argument"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
echo "BUILD_TYPE: " $BUILD_TYPE
|
||||
echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH
|
||||
|
||||
pushd ${CMAKE_BUILD}
|
||||
CMAKE_CMD="cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} .."
|
||||
|
||||
${CMAKE_CMD}
|
||||
echo ${CMAKE_CMD}
|
||||
|
||||
if [[ ! ${jobs+1} ]]; then
|
||||
jobs=$(nproc)
|
||||
fi
|
||||
make -j ${jobs} && make install
|
|
@ -1,442 +0,0 @@
|
|||
/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "murmur.h"
|
||||
#include "dablooms.h"
|
||||
|
||||
#define DABLOOMS_VERSION "0.9.1"
|
||||
|
||||
#define ERROR_TIGHTENING_RATIO 0.5
|
||||
#define SALT_CONSTANT 0x97c29b3a
|
||||
|
||||
const char *dablooms_version(void)
|
||||
{
|
||||
return DABLOOMS_VERSION;
|
||||
}
|
||||
|
||||
void free_bitmap(bitmap_t *bitmap)
|
||||
{
|
||||
if (bitmap != nullptr) {
|
||||
free(bitmap->array);
|
||||
free(bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t new_size)
|
||||
{
|
||||
size_t old_size = (bitmap->array == nullptr) ? 0 : bitmap->bytes;
|
||||
char* new_array = (char*)realloc(bitmap->array, new_size);
|
||||
if (new_array == nullptr) {
|
||||
// Todo: malloc error
|
||||
|
||||
} else {
|
||||
bitmap->bytes = new_size;
|
||||
bitmap->array = new_array;
|
||||
if (new_size > old_size) {
|
||||
memset(bitmap->array + old_size, 0, new_size - old_size);
|
||||
}
|
||||
}
|
||||
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
/* Create a new bitmap, not full featured, simple to give
|
||||
* us a means of interacting with the 4 bit counters */
|
||||
bitmap_t *new_bitmap(size_t bytes)
|
||||
{
|
||||
bitmap_t *bitmap;
|
||||
if ((bitmap = (bitmap_t *)malloc(sizeof(bitmap_t))) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if ((bitmap->array = (char*)malloc(bytes)) == nullptr) {
|
||||
free(bitmap);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
memset(bitmap->array, 0, bytes);
|
||||
bitmap->bytes = bytes;
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset)
|
||||
{
|
||||
long access = index / 2 + offset;
|
||||
uint8_t temp;
|
||||
uint8_t n = bitmap->array[access];
|
||||
if (index % 2 != 0) {
|
||||
temp = (n & 0x0f);
|
||||
n = (n & 0xf0) + ((n & 0x0f) + 0x01);
|
||||
} else {
|
||||
temp = (n & 0xf0) >> 4;
|
||||
n = (n & 0x0f) + ((n & 0xf0) + 0x10);
|
||||
}
|
||||
|
||||
if (temp == 0x0f) {
|
||||
// fprintf(stderr, "Error, 4 bit int Overflow\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
bitmap->array[access] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* increments the four bit counter */
|
||||
int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset)
|
||||
{
|
||||
long access = index / 2 + offset;
|
||||
uint8_t temp;
|
||||
uint8_t n = bitmap->array[access];
|
||||
|
||||
if (index % 2 != 0) {
|
||||
temp = (n & 0x0f);
|
||||
n = (n & 0xf0) + ((n & 0x0f) - 0x01);
|
||||
} else {
|
||||
temp = (n & 0xf0) >> 4;
|
||||
n = (n & 0x0f) + ((n & 0xf0) - 0x10);
|
||||
}
|
||||
|
||||
if (temp == 0x00) {
|
||||
// fprintf(stderr, "Error, Decrementing zero\n");
|
||||
// fprintf(stderr, "Bloom filter Error: you have deleted the same id more than 15 times!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
bitmap->array[access] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* decrements the four bit counter */
|
||||
int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset)
|
||||
{
|
||||
long access = index / 2 + offset;
|
||||
if (index % 2 != 0 ) {
|
||||
return bitmap->array[access] & 0x0f;
|
||||
} else {
|
||||
return bitmap->array[access] & 0xf0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the actual hashing for `key`
|
||||
*
|
||||
* Only call the hash once to get a pair of initial values (h1 and
|
||||
* h2). Use these values to generate all hashes in a quick loop.
|
||||
*
|
||||
* See paper by Kirsch, Mitzenmacher [2006]
|
||||
* http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf
|
||||
*/
|
||||
void hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes)
|
||||
{
|
||||
int i;
|
||||
uint32_t checksum[4];
|
||||
|
||||
MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
|
||||
uint32_t h1 = checksum[0];
|
||||
uint32_t h2 = checksum[1];
|
||||
|
||||
for (i = 0; i < bloom->nfuncs; i++) {
|
||||
hashes[i] = (h1 + i * h2) % bloom->counts_per_func;
|
||||
}
|
||||
}
|
||||
|
||||
counting_bloom_t *counting_bloom_init(unsigned int capacity, double error_rate, long offset)
|
||||
{
|
||||
counting_bloom_t *bloom;
|
||||
|
||||
if ((bloom = (counting_bloom_t *)malloc(sizeof(counting_bloom_t))) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
bloom->bitmap = nullptr;
|
||||
bloom->capacity = capacity;
|
||||
bloom->error_rate = error_rate;
|
||||
bloom->offset = offset + sizeof(counting_bloom_header_t);
|
||||
bloom->nfuncs = (size_t) ceil(log(1 / error_rate) / log(2));
|
||||
bloom->counts_per_func = (unsigned int) ceil(capacity * fabs(log(error_rate)) / (bloom->nfuncs * pow(log(2), 2)));
|
||||
bloom->size = bloom->nfuncs * bloom->counts_per_func;
|
||||
/* rounding-up integer divide by 2 of bloom->size */
|
||||
bloom->num_bytes = ((bloom->size + 1) / 2) + sizeof(counting_bloom_header_t);
|
||||
bloom->hashes = (uint32_t *)calloc(bloom->nfuncs, sizeof(uint32_t));
|
||||
|
||||
return bloom;
|
||||
}
|
||||
|
||||
int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len)
|
||||
{
|
||||
unsigned int index, i, offset;
|
||||
unsigned int *hashes = bloom->hashes;
|
||||
|
||||
hash_func(bloom, s, len, hashes);
|
||||
|
||||
bool error = false;
|
||||
for (i = 0; i < bloom->nfuncs; i++) {
|
||||
offset = i * bloom->counts_per_func;
|
||||
index = hashes[i] + offset;
|
||||
if (bitmap_increment(bloom->bitmap, index, bloom->offset) == -1) {
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
bloom->header->count++;
|
||||
|
||||
//return 0;
|
||||
return error ? -1 : 0;
|
||||
}
|
||||
|
||||
int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len)
|
||||
{
|
||||
unsigned int index, i, offset;
|
||||
unsigned int *hashes = bloom->hashes;
|
||||
|
||||
hash_func(bloom, s, len, hashes);
|
||||
|
||||
bool error = false;
|
||||
for (i = 0; i < bloom->nfuncs; i++) {
|
||||
offset = i * bloom->counts_per_func;
|
||||
index = hashes[i] + offset;
|
||||
if (bitmap_decrement(bloom->bitmap, index, bloom->offset) == -1) {
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
bloom->header->count--;
|
||||
|
||||
//return 0;
|
||||
return error ? -1 : 0;
|
||||
}
|
||||
|
||||
int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len)
|
||||
{
|
||||
unsigned int index, i, offset;
|
||||
unsigned int *hashes = bloom->hashes;
|
||||
|
||||
hash_func(bloom, s, len, hashes);
|
||||
|
||||
for (i = 0; i < bloom->nfuncs; i++) {
|
||||
offset = i * bloom->counts_per_func;
|
||||
index = hashes[i] + offset;
|
||||
if (!(bitmap_check(bloom->bitmap, index, bloom->offset))) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int free_scaling_bloom(scaling_bloom_t *bloom)
|
||||
{
|
||||
int i;
|
||||
for (i = bloom->num_blooms - 1; i >= 0; i--) {
|
||||
free(bloom->blooms[i]->hashes);
|
||||
free(bloom->blooms[i]);
|
||||
}
|
||||
free(bloom->blooms);
|
||||
free_bitmap(bloom->bitmap);
|
||||
free(bloom);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* creates a new counting bloom filter from a given scaling bloom filter, with count and id */
|
||||
counting_bloom_t *new_counting_bloom_from_scale(scaling_bloom_t *bloom, bool extern_bitmap = false)
|
||||
{
|
||||
int i;
|
||||
long offset;
|
||||
double error_rate;
|
||||
counting_bloom_t *cur_bloom;
|
||||
|
||||
error_rate = bloom->error_rate * (pow(ERROR_TIGHTENING_RATIO, bloom->num_blooms + 1));
|
||||
|
||||
if ((bloom->blooms = (counting_bloom_t **)realloc(bloom->blooms, (bloom->num_blooms + 1) * sizeof(counting_bloom_t *))) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
cur_bloom = counting_bloom_init(bloom->capacity, error_rate, bloom->num_bytes);
|
||||
bloom->blooms[bloom->num_blooms] = cur_bloom;
|
||||
bloom->num_blooms++;
|
||||
|
||||
if (!extern_bitmap) {
|
||||
bloom->bitmap = bitmap_resize(bloom->bitmap, bloom->num_bytes + cur_bloom->num_bytes);
|
||||
/* reset header pointer, as realloc may have moved */
|
||||
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
|
||||
/* Set the pointers for these header structs to the right location since realloc may have moved */
|
||||
for (i = 0; i < bloom->num_blooms; i++) {
|
||||
offset = bloom->blooms[i]->offset - sizeof(counting_bloom_header_t);
|
||||
bloom->blooms[i]->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
|
||||
}
|
||||
} else {
|
||||
offset = cur_bloom->offset - sizeof(counting_bloom_header_t);
|
||||
cur_bloom->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
|
||||
}
|
||||
|
||||
bloom->num_bytes += cur_bloom->num_bytes;
|
||||
cur_bloom->bitmap = bloom->bitmap;
|
||||
|
||||
return cur_bloom;
|
||||
}
|
||||
|
||||
uint64_t scaling_bloom_clear_seqnums(scaling_bloom_t *bloom)
|
||||
{
|
||||
uint64_t seqnum = bloom->header->mem_seqnum;
|
||||
bloom->header->mem_seqnum = 0;
|
||||
return seqnum;
|
||||
}
|
||||
|
||||
int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
|
||||
{
|
||||
int i;
|
||||
uint64_t seqnum;
|
||||
|
||||
counting_bloom_t *cur_bloom = nullptr;
|
||||
for (i = bloom->num_blooms - 1; i >= 0; i--) {
|
||||
cur_bloom = bloom->blooms[i];
|
||||
if (id >= cur_bloom->header->id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
seqnum = scaling_bloom_clear_seqnums(bloom);
|
||||
|
||||
if ((id > bloom->header->max_id) && (cur_bloom->header->count >= cur_bloom->capacity)) {
|
||||
cur_bloom = new_counting_bloom_from_scale(bloom);
|
||||
cur_bloom->header->count = 0;
|
||||
cur_bloom->header->id = bloom->header->max_id + 1;
|
||||
}
|
||||
if (bloom->header->max_id < id) {
|
||||
bloom->header->max_id = id;
|
||||
}
|
||||
bool error = false;
|
||||
if (counting_bloom_add(cur_bloom, s, len) == -1) {
|
||||
error = true;
|
||||
}
|
||||
|
||||
bloom->header->mem_seqnum = seqnum + 1;
|
||||
|
||||
//return 1;
|
||||
return error ? -1 : 1;
|
||||
}
|
||||
|
||||
int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
|
||||
{
|
||||
counting_bloom_t *cur_bloom;
|
||||
int i;
|
||||
uint64_t seqnum;
|
||||
|
||||
bool error = false;
|
||||
for (i = bloom->num_blooms - 1; i >= 0; i--) {
|
||||
cur_bloom = bloom->blooms[i];
|
||||
if (id >= cur_bloom->header->id) {
|
||||
seqnum = scaling_bloom_clear_seqnums(bloom);
|
||||
|
||||
if (counting_bloom_remove(cur_bloom, s, len) == -1) {
|
||||
error = true;
|
||||
}
|
||||
|
||||
bloom->header->mem_seqnum = seqnum + 1;
|
||||
//return 1;
|
||||
return error ? -1 : 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len)
|
||||
{
|
||||
int i;
|
||||
counting_bloom_t *cur_bloom;
|
||||
for (i = bloom->num_blooms - 1; i >= 0; i--) {
|
||||
cur_bloom = bloom->blooms[i];
|
||||
if (counting_bloom_check(cur_bloom, s, len)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
scaling_bloom_t *scaling_bloom_init(unsigned int capacity, double error_rate, bitmap_t* bitmap = nullptr)
|
||||
{
|
||||
scaling_bloom_t *bloom;
|
||||
|
||||
if ((bloom = (scaling_bloom_t *)malloc(sizeof(scaling_bloom_t))) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (bitmap == nullptr) {
|
||||
if ((bloom->bitmap = new_bitmap(sizeof(scaling_bloom_header_t))) == nullptr) {
|
||||
free(bloom);
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
bloom->bitmap = bitmap;
|
||||
}
|
||||
|
||||
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
|
||||
bloom->capacity = capacity;
|
||||
bloom->error_rate = error_rate;
|
||||
bloom->num_blooms = 0;
|
||||
bloom->num_bytes = sizeof(scaling_bloom_header_t);
|
||||
bloom->blooms = nullptr;
|
||||
|
||||
return bloom;
|
||||
}
|
||||
|
||||
scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate)
|
||||
{
|
||||
scaling_bloom_t *bloom;
|
||||
counting_bloom_t *cur_bloom;
|
||||
|
||||
bloom = scaling_bloom_init(capacity, error_rate);
|
||||
|
||||
if (!(cur_bloom = new_counting_bloom_from_scale(bloom))) {
|
||||
free_scaling_bloom(bloom);
|
||||
return nullptr;
|
||||
}
|
||||
cur_bloom->header->count = 0;
|
||||
cur_bloom->header->id = 0;
|
||||
|
||||
bloom->header->mem_seqnum = 1;
|
||||
return bloom;
|
||||
}
|
||||
|
||||
scaling_bloom_t *new_scaling_bloom_from_bitmap(unsigned int capacity, double error_rate, bitmap_t* bitmap)
|
||||
{
|
||||
scaling_bloom_t *bloom;
|
||||
counting_bloom_t *cur_bloom;
|
||||
|
||||
if ((bloom = scaling_bloom_init(capacity, error_rate, bitmap)) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int size = bitmap->bytes - sizeof(scaling_bloom_header_t);
|
||||
while (size) {
|
||||
cur_bloom = new_counting_bloom_from_scale(bloom, true);
|
||||
// leave count and id as they were set in the file
|
||||
size -= cur_bloom->num_bytes;
|
||||
if (size < 0) {
|
||||
free_scaling_bloom(bloom);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return bloom;
|
||||
}
|
||||
|
||||
size_t bloom_size(scaling_bloom_t *bloom) {
|
||||
size_t rst = 0;
|
||||
if (bloom != nullptr) {
|
||||
rst = sizeof(scaling_bloom_t);
|
||||
rst += bloom->num_bytes;
|
||||
rst += bloom->num_blooms * (sizeof(counting_bloom_t) + sizeof(void*));
|
||||
for (unsigned int i = 0; i < bloom->num_blooms; i++) {
|
||||
rst += bloom->blooms[i]->nfuncs * sizeof(uint32_t);
|
||||
}
|
||||
}
|
||||
return rst;
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef __BLOOM_H__
|
||||
#define __BLOOM_H__
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
const char *dablooms_version(void);
|
||||
|
||||
typedef struct {
|
||||
size_t bytes;
|
||||
char *array;
|
||||
} bitmap_t;
|
||||
|
||||
|
||||
bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t old_size, size_t new_size);
|
||||
bitmap_t *new_bitmap(size_t bytes);
|
||||
|
||||
int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset);
|
||||
int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset);
|
||||
int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset);
|
||||
|
||||
void free_bitmap(bitmap_t *bitmap);
|
||||
|
||||
typedef struct {
|
||||
uint64_t id;
|
||||
uint32_t count;
|
||||
uint32_t _pad;
|
||||
} counting_bloom_header_t;
|
||||
|
||||
typedef struct {
|
||||
counting_bloom_header_t *header;
|
||||
unsigned int capacity;
|
||||
long offset;
|
||||
unsigned int counts_per_func;
|
||||
uint32_t *hashes;
|
||||
size_t nfuncs;
|
||||
size_t size;
|
||||
size_t num_bytes;
|
||||
double error_rate;
|
||||
bitmap_t *bitmap;
|
||||
} counting_bloom_t;
|
||||
|
||||
int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len);
|
||||
int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len);
|
||||
int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len);
|
||||
|
||||
typedef struct {
|
||||
uint64_t max_id;
|
||||
uint64_t mem_seqnum;
|
||||
uint64_t reserved;
|
||||
} scaling_bloom_header_t;
|
||||
|
||||
typedef struct {
|
||||
scaling_bloom_header_t *header;
|
||||
unsigned int capacity;
|
||||
unsigned int num_blooms;
|
||||
size_t num_bytes;
|
||||
double error_rate;
|
||||
counting_bloom_t **blooms;
|
||||
bitmap_t *bitmap;
|
||||
} scaling_bloom_t;
|
||||
|
||||
scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate);
|
||||
scaling_bloom_t *new_scaling_bloom_from_bitmap(unsigned int capacity, double error_rate, bitmap_t* bitmap);
|
||||
int free_scaling_bloom(scaling_bloom_t *bloom);
|
||||
int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
|
||||
int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
|
||||
int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len);
|
||||
size_t bloom_size(scaling_bloom_t *bloom);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -1,120 +0,0 @@
|
|||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
// algorithms are optimized for their respective platforms. You can still
|
||||
// compile and run any of them on any platform, but your performance with the
|
||||
// non-native version will be less than optimal.
|
||||
|
||||
#include "murmur.h"
|
||||
|
||||
#define FORCE_INLINE inline static
|
||||
|
||||
FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
|
||||
{
|
||||
return (x << r) | (x >> (64 - r));
|
||||
}
|
||||
|
||||
#define ROTL64(x,y) rotl64(x,y)
|
||||
|
||||
#define BIG_CONSTANT(x) (x##LLU)
|
||||
|
||||
#define getblock(x, i) (x[i])
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Finalization mix - force all bits of a hash block to avalanche
|
||||
|
||||
FORCE_INLINE uint64_t fmix64(uint64_t k)
|
||||
{
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
||||
k ^= k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, const int len,
|
||||
const uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 16;
|
||||
|
||||
uint64_t h1 = seed;
|
||||
uint64_t h2 = seed;
|
||||
|
||||
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
||||
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
||||
|
||||
int i;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint64_t * blocks = (const uint64_t *)(data);
|
||||
|
||||
for(i = 0; i < nblocks; i++) {
|
||||
uint64_t k1 = getblock(blocks,i*2+0);
|
||||
uint64_t k2 = getblock(blocks,i*2+1);
|
||||
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
||||
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
||||
|
||||
uint64_t k1 = 0;
|
||||
uint64_t k2 = 0;
|
||||
|
||||
switch(len & 15) {
|
||||
case 15: k2 ^= ((uint64_t)tail[14]) << 48;
|
||||
case 14: k2 ^= ((uint64_t)tail[13]) << 40;
|
||||
case 13: k2 ^= ((uint64_t)tail[12]) << 32;
|
||||
case 12: k2 ^= ((uint64_t)tail[11]) << 24;
|
||||
case 11: k2 ^= ((uint64_t)tail[10]) << 16;
|
||||
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
|
||||
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
|
||||
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
|
||||
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
|
||||
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
|
||||
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
|
||||
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
|
||||
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
|
||||
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
}
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len; h2 ^= len;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix64(h1);
|
||||
h2 = fmix64(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
((uint64_t*)out)[0] = h1;
|
||||
((uint64_t*)out)[1] = h2;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
|
@ -1,12 +0,0 @@
|
|||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
#ifndef _MURMURHASH3_H_
|
||||
#define _MURMURHASH3_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
#endif // _MURMURHASH3_H_
|
|
@ -1,61 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
package dablooms
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I${SRCDIR}/cwrapper
|
||||
|
||||
#cgo LDFLAGS: -L${SRCDIR}/cwrapper/output -ldablooms -lstdc++ -lm
|
||||
#include <stdlib.h>
|
||||
#include <dablooms.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ScalingBloom is a scaling bloom filter that supports remove elements after added
|
||||
type ScalingBloom struct {
|
||||
cfilter *C.scaling_bloom_t
|
||||
}
|
||||
|
||||
// NewScalingBloom returns a ScalingBloom object
|
||||
func NewScalingBloom(capacity uint64, errorRate float64) *ScalingBloom {
|
||||
sb := &ScalingBloom{
|
||||
cfilter: C.new_scaling_bloom(C.uint(capacity), C.double(errorRate)),
|
||||
}
|
||||
return sb
|
||||
}
|
||||
|
||||
// Destroy is used to free memory of this object
|
||||
func (sb *ScalingBloom) Destroy() {
|
||||
C.free_scaling_bloom(sb.cfilter)
|
||||
}
|
||||
|
||||
// Add is used to add an element to this bloom filter
|
||||
func (sb *ScalingBloom) Add(key []byte, id int64) bool {
|
||||
cKey := (*C.char)(unsafe.Pointer(&key[0]))
|
||||
return C.scaling_bloom_add(sb.cfilter, cKey, C.size_t(len(key)), C.uint64_t(id)) == 1
|
||||
}
|
||||
|
||||
// Remove is used to remove an element from this bloom filter
|
||||
func (sb *ScalingBloom) Remove(key []byte, id int64) bool {
|
||||
cKey := (*C.char)(unsafe.Pointer(&key[0]))
|
||||
return C.scaling_bloom_remove(sb.cfilter, cKey, C.size_t(len(key)), C.uint64_t(id)) == 1
|
||||
}
|
||||
|
||||
// Check returns whether a key may exist in this bloom filter
|
||||
func (sb *ScalingBloom) Check(key []byte) bool {
|
||||
cKey := (*C.char)(unsafe.Pointer(&key[0]))
|
||||
return C.scaling_bloom_check(sb.cfilter, cKey, C.size_t(len(key))) == 1
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
package dablooms
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type stats struct {
|
||||
TruePositives int64
|
||||
TrueNegatives int64
|
||||
FalsePositives int64
|
||||
FalseNegatives int64
|
||||
}
|
||||
|
||||
var Capacity uint64 = 1000000
|
||||
var ErrorRate float64 = .05
|
||||
|
||||
func PrintResults(stats *stats) {
|
||||
falsePositiveRate := float64(stats.FalsePositives) / float64(stats.FalsePositives+stats.TrueNegatives)
|
||||
fmt.Printf("True positives: %7d\n", stats.TruePositives)
|
||||
fmt.Printf("True negatives: %7d\n", stats.TrueNegatives)
|
||||
fmt.Printf("False positives: %7d\n", stats.FalsePositives)
|
||||
fmt.Printf("False negatives: %7d\n", stats.FalseNegatives)
|
||||
fmt.Printf("False positive rate: %f\n", falsePositiveRate)
|
||||
|
||||
if falsePositiveRate > ErrorRate {
|
||||
fmt.Printf("False positive rate too high\n")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDablooms_Correctness(t *testing.T) {
|
||||
sb := NewScalingBloom(Capacity, ErrorRate)
|
||||
assert.NotNil(t, sb)
|
||||
|
||||
start := time.Now().UnixNano()
|
||||
for i := 0; i < int(Capacity*2); i++ {
|
||||
if i%2 == 0 {
|
||||
key := strconv.Itoa(i)
|
||||
sb.Add([]byte(key), int64(i))
|
||||
}
|
||||
}
|
||||
end := time.Now().UnixNano()
|
||||
|
||||
seconds := float64((end - start) / 1e9)
|
||||
fmt.Printf("The time cost for add: %fs\n", seconds)
|
||||
|
||||
results := &stats{
|
||||
TruePositives: 0,
|
||||
TrueNegatives: 0,
|
||||
FalsePositives: 0,
|
||||
FalseNegatives: 0,
|
||||
}
|
||||
|
||||
start = time.Now().UnixNano()
|
||||
for i := 0; i < int(Capacity*2); i++ {
|
||||
if i%2 == 1 {
|
||||
key := strconv.Itoa(i)
|
||||
positive := sb.Check([]byte(key))
|
||||
if positive {
|
||||
results.FalsePositives++
|
||||
} else {
|
||||
results.TrueNegatives++
|
||||
}
|
||||
}
|
||||
}
|
||||
end = time.Now().UnixNano()
|
||||
seconds = float64((end - start) / 1e9)
|
||||
fmt.Printf("Time cost for check: %fs\n", seconds)
|
||||
|
||||
sb.Destroy()
|
||||
|
||||
PrintResults(results)
|
||||
|
||||
// False negatives means that there should
|
||||
assert.False(t, results.FalseNegatives > 0)
|
||||
}
|
Loading…
Reference in New Issue