milvus/core/thirdparty/dablooms/dablooms.cpp

597 lines
16 KiB
C++
Raw Normal View History

Delete and WAL feature branch merge (#1436) * add read/write lock * change compact to ddl queue * add api to get vector data * add flush / merge / compact lock * add api to get vector data * add data size for table info * add db recovery test * add data_size check * change file name to uppercase Signed-off-by: jinhai <hai.jin@zilliz.com> * update wal flush_merge_compact_mutex_ * update wal flush_merge_compact_mutex_ * change requirement * change requirement * upd requirement * add logging * add logging * add logging * add logging * add logging * add logging * add logging * add logging * add logging * delete part * add all size checks * fix bug * update faiss get_vector_by_id * add get_vector case * update get vector by id * update server * fix DBImpl * attempting to fix #1268 * lint * update unit test * fix #1259 * issue 1271 fix wal config * update * fix cases Signed-off-by: del.zhenwu <zhenxiang.li@zilliz.com> * update read / write error message * update read / write error message * [skip ci] get vectors by id from raw files instead faiss * [skip ci] update FilesByType meta * update * fix ci error * update * lint * Hide partition_name parameter * Remove douban pip source Signed-off-by: zhenwu <zw@zilliz.com> * Update epsilon value in test cases Signed-off-by: zhenwu <zw@zilliz.com> * Add default partition * Caiyd crud (#1313) * fix clang format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix unittest build error Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * add faiss_bitset_test Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * avoid user directly operate partition table * fix has table bug * Caiyd crud (#1323) * fix clang format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix unittest build error Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * use compile option -O3 Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update faiss_bitset_test.cpp Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * change open flags * change OngoingFileChecker to static instance * mark ongoing files when applying deletes * update clean up with ttl * fix centos ci * update * lint * update partition Signed-off-by: zhenwu <zw@zilliz.com> * update delete and flush to include partitions * update * Update cases Signed-off-by: zhenwu <zw@zilliz.com> * Fix test cases crud (#1350) * fix order * add wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix invalid operation issue Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix invalid operation issue Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix bug Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix bug Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * crud fix Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * crud fix Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * add table info test cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> Signed-off-by: JinHai-CN <hai.jin@zilliz.com> * merge cases Signed-off-by: zhenwu <zw@zilliz.com> * Shengjun (#1349) * Add GPU sharing solution on native Kubernetes (#1102) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Fix http server bug (#1096) * refactoring(create_table done) * refactoring * refactor server delivery (insert done) * refactoring server module (count_table done) * server refactor done * cmake pass * refactor server module done. * set grpc response status correctly * format done. * fix redefine ErrorMap() * optimize insert reducing ids data copy * optimize grpc request with reducing data copy * clang format * [skip ci] Refactor server module done. update changlog. prepare for PR * remove explicit and change int32_t to int64_t * add web server * [skip ci] add license in web module * modify header include & comment oatpp environment config * add port configure & create table in handler * modify web url * simple url complation done & add swagger * make sure web url * web functionality done. debuging * add web unittest * web test pass * add web server port * add web server port in template * update unittest cmake file * change web server default port to 19121 * rename method in web module & unittest pass * add search case in unittest for web module * rename some variables * fix bug * unittest pass * web prepare * fix cmd bug(check server status) * update changlog * add web port validate & default set * clang-format pass * add web port test in unittest * add CORS & redirect root to swagger ui * add web status * web table method func cascade test pass * add config url in web module * modify thirdparty cmake to avoid building oatpp test * clang format * update changlog * add constants in web module * reserve Config.cpp * fix constants reference bug * replace web server with async module * modify component to support async * format * developing controller & add test clent into unittest * add web port into demo/server_config * modify thirdparty cmake to allow build test * remove unnecessary comment * add endpoint info in controller * finish web test(bug here) * clang format * add web test cpp to lint exclusions * check null field in GetConfig * add macro RETURN STATUS DTo * fix cmake conflict * fix crash when exit server * remove surplus comments & add http param check * add uri /docs to direct swagger * format * change cmd to system * add default value & unittest in web module * add macros to judge if GPU supported * add macros in unit & add default in index dto & print error message when bind http port fail * format (fix #788) * fix cors bug (not completed) * comment cors * change web framework to simple api * comments optimize * change to simple API * remove comments in controller.hpp * remove EP_COMMON_CMAKE_ARGS in oatpp and oatpp-swagger * add ep cmake args to sqlite * clang-format * change a format * test pass * change name to * fix compiler issue(oatpp-swagger depend on oatpp) * add & in start_server.h * specify lib location with oatpp and oatpp-swagger * add comments * add swagger definition * [skip ci] change http method options status code * remove oatpp swagger(fix #970) * remove comments * check Start web behavior * add default to cpu_cache_capacity * remove swagger component.hpp & /docs url * remove /docs info * remove /docs in unittest * remove space in test rpc * remove repeate info in CHANGLOG * change cache_insert_data default value as a constant * [skip ci] Fix some broken links (#960) * [skip ci] Fix broken link * [skip ci] Fix broken link * [skip ci] Fix broken link * [skip ci] Fix broken links * fix issue 373 (#964) * fix issue 373 * Adjustment format * Adjustment format * Adjustment format * change readme * #966 update NOTICE.md (#967) * remove comments * check Start web behavior * add default to cpu_cache_capacity * remove swagger component.hpp & /docs url * remove /docs info * remove /docs in unittest * remove space in test rpc * remove repeate info in CHANGLOG * change cache_insert_data default value as a constant * adjust web port cofig place * rename web_port variable * change gpu resources invoke way to cmd() * set advanced config name add DEFAULT * change config setting to cmd * modify .. * optimize code * assign TableDto' count default value 0 (fix #995) * check if table exists when show partitions (fix #1028) * check table exists when drop partition (fix #1029) * check if partition name is legal (fix #1022) * modify status code when partition tag is illegal * update changlog * add info to /system url * add binary index and add bin uri & handler method(not completed) * optimize http insert and search time(fix #1066) | add binary vectors support(fix #1067) * fix test partition bug * fix test bug when check insert records * add binary vectors test * add default for offset and page_size * fix uinttest bug * [skip ci] remove comments * optimize web code for PR comments * add new folder named utils * check offset and pagesize (fix #1082) * improve error message if offset or page_size is not legal (fix #1075) * add log into web module * update changlog * check gpu sources setting when assign repeated value (fix #990) * update changlog * clang-format pass * add default handler in http handler * [skip ci] improve error msg when check gpu resources * change check offset way * remove func IsIntStr * add case * change int32 to int64 when check number str * add log in we module(doing) * update test case * add log in web controller Co-authored-by: jielinxu <52057195+jielinxu@users.noreply.github.com> Co-authored-by: JackLCL <53512883+JackLCL@users.noreply.github.com> Co-authored-by: Cai Yudong <yudong.cai@zilliz.com> * Filtering for specific paths in Jenkins CI (#1107) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Fix Filtering for specific paths in Jenkins CI bug (#1109) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Fix Filtering for specific paths in Jenkins CI bug (#1110) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Don't skip ci when triggered by a time (#1113) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Don't skip ci when triggered by a time * Don't skip ci when triggered by a time * Set default sending to Milvus Dev mail group (#1121) * run hadolint with reviewdog * add LINCENSE in Dockerfile * run hadolint with reviewdog * Reporter of reviewdog command is "github-pr-check" * format Dockerfile * ignore DL3007 in hadolint * clean up old docker images * Add GPU sharing solution on native Kubernetes * nightly test mailer * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Test filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * Filtering for specific paths in Jenkins CI * No skip ci when triggered by a time * Don't skip ci when triggered by a time * Set default sending to Milvus Dev * Support hnsw (#1131) * add hnsw * add config * format... * format.. * Remove test.template (#1129) * Update framework * remove files * Remove files * Remove ann-acc cases && Update java-sdk cases * change cn to en * [skip ci] remove doc test * [skip ci] change cn to en * Case stability * Add mail notification when test failed * Add main notification * Add main notification * gen milvus instance from utils * Distable case with multiprocess * Add mail notification when nightly test failed * add milvus handler param * add http handler * Remove test.template Co-authored-by: quicksilver <zhifeng.zhang@zilliz.com> * Add doc for the RESTful API / Update contributor number in Milvus readme (#1100) * [skip ci] Update contributor number. * [skip ci] Add RESTful API doc. * [skip ci] Some updates. * [skip ci] Change port to 19121. * [skip ci] Update README.md. Update the descriptions for OPTIONS. * Update README.md Fix a typo. * #1105 update error message when creating IVFSQ8H index without GPU resources (#1117) * [skip ci] Update README (#1104) * remove Nvidia owned files from faiss (#1136) * #1135 remove Nvidia owned files from faiss * Revert "#1135 remove Nvidia owned files from faiss" This reverts commit 3bc007c28c8df5861fdd0452fd64c0e2e719eda2. * #1135 remove Nvidia API implementation * #1135 remove Nvidia owned files from faiss * Update CODE_OF_CONDUCT.md (#1163) * Improve codecov (#1095) * Optimize config test. Dir src/config 99% lines covered * add unittest coverage * optimize cache&config unittest * code format * format * format code * fix merge conflict * cover src/utils unittest * '#831 fix exe_path judge error' * #831 fix exe_path judge error * add some unittest coverage * add some unittest coverage * improve coverage of src/wrapper * improve src/wrapper coverage * *test optimize db/meta unittest * fix bug * *test optimize mysqlMetaImpl unittest * *style: format code * import server& scheduler unittest coverage * handover next work * *test: add some test_meta test case * *format code * *fix: fix typo * feat(codecov): improve code coverage for src/db(#872) * feat(codecov): improve code coverage for src/db/engine(#872) * feat(codecov): improve code coverage(#872) * fix config unittest bug * feat(codecov): improve code coverage core/db/engine(#872) * feat(codecov): improve code coverage core/knowhere * feat(codecov): improve code coverage core/knowhere * feat(codecov): improve code coverage * feat(codecov): fix cpu test some error * feat(codecov): improve code coverage * feat(codecov): rename some fiu * fix(db/meta): fix switch/case default action * feat(codecov): improve code coverage(#872) * fix error caused by merge code * format code * feat(codecov): improve code coverage & format code(#872) * feat(codecov): fix test error(#872) * feat(codecov): fix unittest test_mem(#872) * feat(codecov): fix unittest(#872) * feat(codecov): fix unittest for resource manager(#872) * feat(codecov): code format (#872) * feat(codecov): trigger ci(#872) * fix(RequestScheduler): remove a wrong sleep statement * test(test_rpc): fix rpc test * Fix format issue * Remove unused comments * Fix unit test error Co-authored-by: ABNER-1 <ABNER-1@users.noreply.github.com> Co-authored-by: Jin Hai <hai.jin@zilliz.com> * Support run dev test with http handler in python SDK (#1116) * refactoring(create_table done) * refactoring * refactor server delivery (insert done) * refactoring server module (count_table done) * server refactor done * cmake pass * refactor server module done. * set grpc response status correctly * format done. * fix redefine ErrorMap() * optimize insert reducing ids data copy * optimize grpc request with reducing data copy * clang format * [skip ci] Refactor server module done. update changlog. prepare for PR * remove explicit and change int32_t to int64_t * add web server * [skip ci] add license in web module * modify header include & comment oatpp environment config * add port configure & create table in handler * modify web url * simple url complation done & add swagger * make sure web url * web functionality done. debuging * add web unittest * web test pass * add web server port * add web server port in template * update unittest cmake file * change web server default port to 19121 * rename method in web module & unittest pass * add search case in unittest for web module * rename some variables * fix bug * unittest pass * web prepare * fix cmd bug(check server status) * update changlog * add web port validate & default set * clang-format pass * add web port test in unittest * add CORS & redirect root to swagger ui * add web status * web table method func cascade test pass * add config url in web module * modify thirdparty cmake to avoid building oatpp test * clang format * update changlog * add constants in web module * reserve Config.cpp * fix constants reference bug * replace web server with async module * modify component to support async * format * developing controller & add test clent into unittest * add web port into demo/server_config * modify thirdparty cmake to allow build test * remove unnecessary comment * add endpoint info in controller * finish web test(bug here) * clang format * add web test cpp to lint exclusions * check null field in GetConfig * add macro RETURN STATUS DTo * fix cmake conflict * fix crash when exit server * remove surplus comments & add http param check * add uri /docs to direct swagger * format * change cmd to system * add default value & unittest in web module * add macros to judge if GPU supported * add macros in unit & add default in index dto & print error message when bind http port fail * format (fix #788) * fix cors bug (not completed) * comment cors * change web framework to simple api * comments optimize * change to simple API * remove comments in controller.hpp * remove EP_COMMON_CMAKE_ARGS in oatpp and oatpp-swagger * add ep cmake args to sqlite * clang-format * change a format * test pass * change name to * fix compiler issue(oatpp-swagger depend on oatpp) * add & in start_server.h * specify lib location with oatpp and oatpp-swagger * add comments * add swagger definition * [skip ci] change http method options status code * remove oatpp swagger(fix #970) * remove comments * check Start web behavior * add default to cpu_cache_capacity * remove swagger component.hpp & /docs url * remove /docs info * remove /docs in unittest * remove space in test rpc * remove repeate info in CHANGLOG * change cache_insert_data default value as a constant * [skip ci] Fix some broken links (#960) * [skip ci] Fix broken link * [skip ci] Fix broken link * [skip ci] Fix broken link * [skip ci] Fix broken links * fix issue 373 (#964) * fix issue 373 * Adjustment format * Adjustment format * Adjustment format * change readme * #966 update NOTICE.md (#967) * remove comments * check Start web behavior * add default to cpu_cache_capacity * remove swagger component.hpp & /docs url * remove /docs info * remove /docs in unittest * remove space in test rpc * remove repeate info in CHANGLOG * change cache_insert_data default value as a constant * adjust web port cofig place * rename web_port variable * change gpu resources invoke way to cmd() * set advanced config name add DEFAULT * change config setting to cmd * modify .. * optimize code * assign TableDto' count default value 0 (fix #995) * check if table exists when show partitions (fix #1028) * check table exists when drop partition (fix #1029) * check if partition name is legal (fix #1022) * modify status code when partition tag is illegal * update changlog * add info to /system url * add binary index and add bin uri & handler method(not completed) * optimize http insert and search time(fix #1066) | add binary vectors support(fix #1067) * fix test partition bug * fix test bug when check insert records * add binary vectors test * add default for offset and page_size * fix uinttest bug * [skip ci] remove comments * optimize web code for PR comments * add new folder named utils * check offset and pagesize (fix #1082) * improve error message if offset or page_size is not legal (fix #1075) * add log into web module * update changlog * check gpu sources setting when assign repeated value (fix #990) * update changlog * clang-format pass * add default handler in http handler * [skip ci] improve error msg when check gpu resources * change check offset way * remove func IsIntStr * add case * change int32 to int64 when check number str * add log in we module(doing) * update test case * add log in web controller * remove surplus dot * add preload into /system/ * change get_milvus() to get_milvus(args['handler']) * support load table into memory with http server (fix #1115) * [skip ci] comment surplus dto in VectorDto Co-authored-by: jielinxu <52057195+jielinxu@users.noreply.github.com> Co-authored-by: JackLCL <53512883+JackLCL@users.noreply.github.com> Co-authored-by: Cai Yudong <yudong.cai@zilliz.com> * Fix #1140 (#1162) * fix Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * update... Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * fix2 Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * fix3 Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * update changelog Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * Update INSTALL.md (#1175) * Update INSTALL.md 1. Change image tag and Milvus source code to latest. 2. Fix a typo Signed-off-by: Lu Wang <yamasite@qq.com> * Update INSTALL.md Signed-off-by: lu.wang <yamasite@qq.com> * add Tanimoto ground truth (#1138) * add milvus ground truth * add milvus groundtruth * [skip ci] add milvus ground truth * [skip ci]add tanimoto ground truth * fix mix case bug (#1208) * fix mix case bug Signed-off-by: del.zhenwu <zhenxiang.li@zilliz.com> * Remove case.md Signed-off-by: del.zhenwu <zhenxiang.li@zilliz.com> * Update README.md (#1206) Add LFAI mailing lists. Signed-off-by: Lutkin Wang <yamasite@qq.com> * Add design.md to store links to design docs (#1219) * Update README.md Add link to Milvus design docs Signed-off-by: Lutkin Wang <yamasite@qq.com> * Create design.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update design.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Add troubleshooting info about libmysqlpp.so.3 error (#1225) * Update INSTALL.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update INSTALL.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update README.md (#1233) Signed-off-by: Lutkin Wang <yamasite@qq.com> * #1240 Update license declaration of each file (#1241) * #1240 Update license declaration of each files Signed-off-by: jinhai <hai.jin@zilliz.com> * #1240 Update CHANGELOG Signed-off-by: jinhai <hai.jin@zilliz.com> * Update README.md (#1258) Add Jenkins master badge. Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update INSTALL.md (#1265) Fix indentation. * support CPU profiling (#1251) * #1250 support CPU profiling Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * #1250 fix code coverage Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * Fix HNSW crash (#1262) * fix Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com> * update. Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com> * Add troubleshooting information for INSTALL.md and enhance readability (#1274) * Update INSTALL.md 1. Add new troubleshooting message; 2. Enhance readability. Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update INSTALL.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update INSTALL.md Signed-off-by: Lutkin Wang <yamasite@qq.com> * Update INSTALL.md Add CentOS link. Signed-off-by: Lutkin Wang <yamasite@qq.com> * Create COMMUNITY.md (#1292) Signed-off-by: Lutkin Wang <yamasite@qq.com> * fix gtest * add copyright * fix gtest * MERGE_NOT_YET * fix lint Co-authored-by: quicksilver <zhifeng.zhang@zilliz.com> Co-authored-by: BossZou <40255591+BossZou@users.noreply.github.com> Co-authored-by: jielinxu <52057195+jielinxu@users.noreply.github.com> Co-authored-by: JackLCL <53512883+JackLCL@users.noreply.github.com> Co-authored-by: Cai Yudong <yudong.cai@zilliz.com> Co-authored-by: Tinkerrr <linxiaojun.cn@outlook.com> Co-authored-by: del-zhenwu <56623710+del-zhenwu@users.noreply.github.com> Co-authored-by: Lutkin Wang <yamasite@qq.com> Co-authored-by: shengjh <46514371+shengjh@users.noreply.github.com> Co-authored-by: ABNER-1 <ABNER-1@users.noreply.github.com> Co-authored-by: Jin Hai <hai.jin@zilliz.com> Co-authored-by: shiyu22 <cshiyu22@gmail.com> * #1302 Get all record IDs in a segment by given a segment id * Remove query time ranges Signed-off-by: zhenwu <zw@zilliz.com> * #1295 let wal enable by default * fix cases Signed-off-by: zhenwu <zw@zilliz.com> * fix partition cases Signed-off-by: zhenwu <zw@zilliz.com> * [skip ci] update test_db * update * fix case bug Signed-off-by: zhenwu <zw@zilliz.com> * lint * fix test case failures * remove some code * Caiyd crud 1 (#1377) * fix clang format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix unittest build error Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix build issue when enable profiling Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix hastable bug * update bloom filter * update * benchmark * update benchmark * update * update * remove wal record size Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * remove wal record size config Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * update apply deletes: switch to binary search * update sdk_simple Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update apply deletes: switch to binary search * add test_search_by_id Signed-off-by: zhenwu <zw@zilliz.com> * add more log * flush error with multi same ids Signed-off-by: zhenwu <zw@zilliz.com> * modify wal config Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * update * add binary search_by_id * fix case bug Signed-off-by: zhenwu <zw@zilliz.com> * update cases Signed-off-by: zhenwu <zw@zilliz.com> * fix unit test #1395 * improve merge performance * add uids_ for VectorIndex to improve search performance Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix error Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update * fix search * fix record num Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * refine code * refine code * Add get_vector_ids test cases (#1407) * fix order * add wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix wal case Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix invalid operation issue Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix invalid operation issue Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix bug Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * fix bug Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * crud fix Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * crud fix Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au> * add table info test cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> Signed-off-by: JinHai-CN <hai.jin@zilliz.com> * add to compact case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * add to compact case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * add to compact case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * add case and debug compact Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * test pdb Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * test pdb Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * test pdb Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix cases Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update table_info case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update table_info case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update table_info case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update get vector ids case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update get vector ids case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update get vector ids case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update get vector ids case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * update case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * pdb test Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * pdb test Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * add tests for get_vector_ids Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix case Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * add binary and ip Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix binary index Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * fix pdb Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> * #1408 fix search result in-correct after DeleteById Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * add one case * delete failed segment * update serialize * update serialize * fix case Signed-off-by: zhenwu <zw@zilliz.com> * update * update case assertion Signed-off-by: zhenwu <zw@zilliz.com> * [skip ci] update config * change bloom filter msync flag to async * #1319 add more timing debug info Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update * update * add normalize Signed-off-by: zhenwu <zw@zilliz.com> * add normalize Signed-off-by: zhenwu <zw@zilliz.com> * add normalize Signed-off-by: zhenwu <zw@zilliz.com> * Fix compiling error Signed-off-by: jinhai <hai.jin@zilliz.com> * support ip (#1383) * support ip Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com> * IP result distance sort by descend Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * update Signed-off-by: Nicky <nicky.xj.lin@gmail.com> * format Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com> * get table lsn * Remove unused third party Signed-off-by: jinhai <hai.jin@zilliz.com> * Refine code Signed-off-by: jinhai <hai.jin@zilliz.com> * #1319 fix clang format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix wal applied lsn Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * validate partition tag * #1319 improve search performance Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * build error Co-authored-by: Zhiru Zhu <youny626@hotmail.com> Co-authored-by: groot <yihua.mo@zilliz.com> Co-authored-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au> Co-authored-by: shengjh <46514371+shengjh@users.noreply.github.com> Co-authored-by: del-zhenwu <56623710+del-zhenwu@users.noreply.github.com> Co-authored-by: shengjun.li <49774184+shengjun1985@users.noreply.github.com> Co-authored-by: Cai Yudong <yudong.cai@zilliz.com> Co-authored-by: quicksilver <zhifeng.zhang@zilliz.com> Co-authored-by: BossZou <40255591+BossZou@users.noreply.github.com> Co-authored-by: jielinxu <52057195+jielinxu@users.noreply.github.com> Co-authored-by: JackLCL <53512883+JackLCL@users.noreply.github.com> Co-authored-by: Tinkerrr <linxiaojun.cn@outlook.com> Co-authored-by: Lutkin Wang <yamasite@qq.com> Co-authored-by: ABNER-1 <ABNER-1@users.noreply.github.com> Co-authored-by: shiyu22 <cshiyu22@gmail.com>
2020-02-29 08:11:31 +00:00
/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
#define _GNU_SOURCE
#include <sys/stat.h>
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <fcntl.h>
#include <math.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include "murmur.h"
#include "dablooms.h"
#define DABLOOMS_VERSION "0.9.1"
#define ERROR_TIGHTENING_RATIO 0.5
#define SALT_CONSTANT 0x97c29b3a
const char *dablooms_version(void)
{
return DABLOOMS_VERSION;
}
void free_bitmap(bitmap_t *bitmap)
{
if ((munmap(bitmap->array, bitmap->bytes)) < 0) {
perror("Error, unmapping memory");
}
close(bitmap->fd);
free(bitmap);
}
bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t old_size, size_t new_size)
{
int fd = bitmap->fd;
struct stat fileStat;
fstat(fd, &fileStat);
size_t size = fileStat.st_size;
/* grow file if necessary */
if (size < new_size) {
if (ftruncate(fd, new_size) < 0) {
perror("Error increasing file size with ftruncate");
free_bitmap(bitmap);
close(fd);
return NULL;
}
}
lseek(fd, 0, SEEK_SET);
/* resize if mmap exists and possible on this os, else new mmap */
if (bitmap->array != NULL) {
#if __linux
bitmap->array = (char *)mremap(bitmap->array, old_size, new_size, MREMAP_MAYMOVE);
if (bitmap->array == MAP_FAILED) {
perror("Error resizing mmap");
free_bitmap(bitmap);
close(fd);
return NULL;
}
#else
if (munmap(bitmap->array, bitmap->bytes) < 0) {
perror("Error unmapping memory");
free_bitmap(bitmap);
close(fd);
return NULL;
}
bitmap->array = NULL;
#endif
}
if (bitmap->array == NULL) {
bitmap->array = (char *)mmap(0, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (bitmap->array == MAP_FAILED) {
perror("Error init mmap");
free_bitmap(bitmap);
close(fd);
return NULL;
}
}
bitmap->bytes = new_size;
return bitmap;
}
/* Create a new bitmap, not full featured, simple to give
* us a means of interacting with the 4 bit counters */
bitmap_t *new_bitmap(int fd, size_t bytes)
{
bitmap_t *bitmap;
if ((bitmap = (bitmap_t *)malloc(sizeof(bitmap_t))) == NULL) {
return NULL;
}
bitmap->bytes = bytes;
bitmap->fd = fd;
bitmap->array = NULL;
if ((bitmap = bitmap_resize(bitmap, 0, bytes)) == NULL) {
return NULL;
}
return bitmap;
}
int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
uint8_t temp;
uint8_t n = bitmap->array[access];
if (index % 2 != 0) {
temp = (n & 0x0f);
n = (n & 0xf0) + ((n & 0x0f) + 0x01);
} else {
temp = (n & 0xf0) >> 4;
n = (n & 0x0f) + ((n & 0xf0) + 0x10);
}
if (temp == 0x0f) {
// fprintf(stderr, "Error, 4 bit int Overflow\n");
return -1;
}
bitmap->array[access] = n;
return 0;
}
/* increments the four bit counter */
int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
uint8_t temp;
uint8_t n = bitmap->array[access];
if (index % 2 != 0) {
temp = (n & 0x0f);
n = (n & 0xf0) + ((n & 0x0f) - 0x01);
} else {
temp = (n & 0xf0) >> 4;
n = (n & 0x0f) + ((n & 0xf0) - 0x10);
}
if (temp == 0x00) {
// fprintf(stderr, "Error, Decrementing zero\n");
// fprintf(stderr, "Bloom filter Error: you have deleted the same id more than 15 times!\n");
return -1;
}
bitmap->array[access] = n;
return 0;
}
/* decrements the four bit counter */
int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
if (index % 2 != 0 ) {
return bitmap->array[access] & 0x0f;
} else {
return bitmap->array[access] & 0xf0;
}
}
int bitmap_flush(bitmap_t *bitmap)
{
if ((msync(bitmap->array, bitmap->bytes, MS_ASYNC) < 0)) {
perror("Error, flushing bitmap to disk");
return -1;
} else {
return 0;
}
}
/*
* Perform the actual hashing for `key`
*
* Only call the hash once to get a pair of initial values (h1 and
* h2). Use these values to generate all hashes in a quick loop.
*
* See paper by Kirsch, Mitzenmacher [2006]
* http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf
*/
void hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes)
{
int i;
uint32_t checksum[4];
MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
uint32_t h1 = checksum[0];
uint32_t h2 = checksum[1];
for (i = 0; i < bloom->nfuncs; i++) {
hashes[i] = (h1 + i * h2) % bloom->counts_per_func;
}
}
int free_counting_bloom(counting_bloom_t *bloom)
{
if (bloom != NULL) {
free(bloom->hashes);
bloom->hashes = NULL;
free_bitmap(bloom->bitmap);
free(bloom);
bloom = NULL;
}
return 0;
}
counting_bloom_t *counting_bloom_init(unsigned int capacity, double error_rate, long offset)
{
counting_bloom_t *bloom;
if ((bloom = (counting_bloom_t *)malloc(sizeof(counting_bloom_t))) == NULL) {
fprintf(stderr, "Error, could not realloc a new bloom filter\n");
return NULL;
}
bloom->bitmap = NULL;
bloom->capacity = capacity;
bloom->error_rate = error_rate;
bloom->offset = offset + sizeof(counting_bloom_header_t);
bloom->nfuncs = (int) ceil(log(1 / error_rate) / log(2));
bloom->counts_per_func = (int) ceil(capacity * fabs(log(error_rate)) / (bloom->nfuncs * pow(log(2), 2)));
bloom->size = bloom->nfuncs * bloom->counts_per_func;
/* rounding-up integer divide by 2 of bloom->size */
bloom->num_bytes = ((bloom->size + 1) / 2) + sizeof(counting_bloom_header_t);
bloom->hashes = (uint32_t *)calloc(bloom->nfuncs, sizeof(uint32_t));
return bloom;
}
counting_bloom_t *new_counting_bloom(unsigned int capacity, double error_rate, const char *filename)
{
counting_bloom_t *cur_bloom;
int fd;
if ((fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) {
perror("Error, Opening File Failed");
fprintf(stderr, " %s \n", filename);
return NULL;
}
cur_bloom = counting_bloom_init(capacity, error_rate, 0);
cur_bloom->bitmap = new_bitmap(fd, cur_bloom->num_bytes);
cur_bloom->header = (counting_bloom_header_t *)(cur_bloom->bitmap->array);
return cur_bloom;
}
int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
bool error = false;
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (bitmap_increment(bloom->bitmap, index, bloom->offset) == -1) {
error = true;
}
}
bloom->header->count++;
//return 0;
return error ? -1 : 0;
}
int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
bool error = false;
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (bitmap_decrement(bloom->bitmap, index, bloom->offset) == -1) {
error = true;
}
}
bloom->header->count--;
//return 0;
return error ? -1 : 0;
}
int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (!(bitmap_check(bloom->bitmap, index, bloom->offset))) {
return 0;
}
}
return 1;
}
int free_scaling_bloom(scaling_bloom_t *bloom)
{
int i;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
free(bloom->blooms[i]->hashes);
bloom->blooms[i]->hashes = NULL;
free(bloom->blooms[i]);
bloom->blooms[i] = NULL;
}
free(bloom->blooms);
free_bitmap(bloom->bitmap);
free(bloom);
return 0;
}
/* creates a new counting bloom filter from a given scaling bloom filter, with count and id */
counting_bloom_t *new_counting_bloom_from_scale(scaling_bloom_t *bloom)
{
int i;
long offset;
double error_rate;
counting_bloom_t *cur_bloom;
error_rate = bloom->error_rate * (pow(ERROR_TIGHTENING_RATIO, bloom->num_blooms + 1));
if ((bloom->blooms = (counting_bloom_t **)realloc(bloom->blooms, (bloom->num_blooms + 1) * sizeof(counting_bloom_t *))) == NULL) {
fprintf(stderr, "Error, could not realloc a new bloom filter\n");
return NULL;
}
cur_bloom = counting_bloom_init(bloom->capacity, error_rate, bloom->num_bytes);
bloom->blooms[bloom->num_blooms] = cur_bloom;
bloom->bitmap = bitmap_resize(bloom->bitmap, bloom->num_bytes, bloom->num_bytes + cur_bloom->num_bytes);
/* reset header pointer, as mmap may have moved */
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
/* Set the pointers for these header structs to the right location since mmap may have moved */
bloom->num_blooms++;
for (i = 0; i < bloom->num_blooms; i++) {
offset = bloom->blooms[i]->offset - sizeof(counting_bloom_header_t);
bloom->blooms[i]->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
}
bloom->num_bytes += cur_bloom->num_bytes;
cur_bloom->bitmap = bloom->bitmap;
return cur_bloom;
}
counting_bloom_t *new_counting_bloom_from_file(unsigned int capacity, double error_rate, const char *filename)
{
int fd;
off_t size;
counting_bloom_t *bloom;
if ((fd = open(filename, O_RDWR, (mode_t)0600)) < 0) {
fprintf(stderr, "Error, Could not open file %s: %s\n", filename, strerror(errno));
return NULL;
}
if ((size = lseek(fd, 0, SEEK_END)) < 0) {
perror("Error, calling lseek() to tell file size");
close(fd);
return NULL;
}
if (size == 0) {
fprintf(stderr, "Error, File size zero\n");
}
bloom = counting_bloom_init(capacity, error_rate, 0);
if (size != bloom->num_bytes) {
free_counting_bloom(bloom);
fprintf(stderr, "Error, Actual filesize and expected filesize are not equal\n");
return NULL;
}
if ((bloom->bitmap = new_bitmap(fd, size)) == NULL) {
fprintf(stderr, "Error, Could not create bitmap with file\n");
free_counting_bloom(bloom);
return NULL;
}
bloom->header = (counting_bloom_header_t *)(bloom->bitmap->array);
return bloom;
}
uint64_t scaling_bloom_clear_seqnums(scaling_bloom_t *bloom)
{
uint64_t seqnum;
if (bloom->header->disk_seqnum != 0) {
// disk_seqnum cleared on disk before any other changes
bloom->header->disk_seqnum = 0;
bitmap_flush(bloom->bitmap);
}
seqnum = bloom->header->mem_seqnum;
bloom->header->mem_seqnum = 0;
return seqnum;
}
int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
{
int i;
uint64_t seqnum;
counting_bloom_t *cur_bloom = NULL;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (id >= cur_bloom->header->id) {
break;
}
}
seqnum = scaling_bloom_clear_seqnums(bloom);
if ((id > bloom->header->max_id) && (cur_bloom->header->count >= cur_bloom->capacity - 1)) {
cur_bloom = new_counting_bloom_from_scale(bloom);
cur_bloom->header->count = 0;
cur_bloom->header->id = bloom->header->max_id + 1;
}
if (bloom->header->max_id < id) {
bloom->header->max_id = id;
}
bool error = false;
if (counting_bloom_add(cur_bloom, s, len) == -1) {
error = true;
}
bloom->header->mem_seqnum = seqnum + 1;
//return 1;
return error ? -1 : 1;
}
int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
{
counting_bloom_t *cur_bloom;
int i;
uint64_t seqnum;
bool error = false;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (id >= cur_bloom->header->id) {
seqnum = scaling_bloom_clear_seqnums(bloom);
if (counting_bloom_remove(cur_bloom, s, len) == -1) {
error = true;
}
bloom->header->mem_seqnum = seqnum + 1;
//return 1;
return error ? -1 : 1;
}
}
return 0;
}
int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len)
{
int i;
counting_bloom_t *cur_bloom;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (counting_bloom_check(cur_bloom, s, len)) {
return 1;
}
}
return 0;
}
int scaling_bloom_flush(scaling_bloom_t *bloom)
{
if (bitmap_flush(bloom->bitmap) != 0) {
return -1;
}
// all changes written to disk before disk_seqnum set
if (bloom->header->disk_seqnum == 0) {
bloom->header->disk_seqnum = bloom->header->mem_seqnum;
return bitmap_flush(bloom->bitmap);
}
return 0;
}
uint64_t scaling_bloom_mem_seqnum(scaling_bloom_t *bloom)
{
return bloom->header->mem_seqnum;
}
uint64_t scaling_bloom_disk_seqnum(scaling_bloom_t *bloom)
{
return bloom->header->disk_seqnum;
}
scaling_bloom_t *scaling_bloom_init(unsigned int capacity, double error_rate, const char *filename, int fd)
{
scaling_bloom_t *bloom;
if ((bloom = (scaling_bloom_t *)malloc(sizeof(scaling_bloom_t))) == NULL) {
return NULL;
}
if ((bloom->bitmap = new_bitmap(fd, sizeof(scaling_bloom_header_t))) == NULL) {
fprintf(stderr, "Error, Could not create bitmap with file\n");
free_scaling_bloom(bloom);
return NULL;
}
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
bloom->capacity = capacity;
bloom->error_rate = error_rate;
bloom->num_blooms = 0;
bloom->num_bytes = sizeof(scaling_bloom_header_t);
bloom->fd = fd;
bloom->blooms = NULL;
return bloom;
}
scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate, const char *filename)
{
scaling_bloom_t *bloom;
counting_bloom_t *cur_bloom;
int fd;
if ((fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) {
perror("Error, Opening File Failed");
fprintf(stderr, " %s \n", filename);
return NULL;
}
bloom = scaling_bloom_init(capacity, error_rate, filename, fd);
if (!(cur_bloom = new_counting_bloom_from_scale(bloom))) {
fprintf(stderr, "Error, Could not create counting bloom\n");
free_scaling_bloom(bloom);
return NULL;
}
cur_bloom->header->count = 0;
cur_bloom->header->id = 0;
bloom->header->mem_seqnum = 1;
return bloom;
}
scaling_bloom_t *new_scaling_bloom_from_file(unsigned int capacity, double error_rate, const char *filename)
{
int fd;
off_t size;
scaling_bloom_t *bloom;
counting_bloom_t *cur_bloom;
if ((fd = open(filename, O_RDWR, (mode_t)0600)) < 0) {
fprintf(stderr, "Error, Could not open file %s: %s\n", filename, strerror(errno));
return NULL;
}
if ((size = lseek(fd, 0, SEEK_END)) < 0) {
perror("Error, calling lseek() to tell file size");
close(fd);
return NULL;
}
if (size == 0) {
fprintf(stderr, "Error, File size zero\n");
}
bloom = scaling_bloom_init(capacity, error_rate, filename, fd);
size -= sizeof(scaling_bloom_header_t);
while (size) {
cur_bloom = new_counting_bloom_from_scale(bloom);
// leave count and id as they were set in the file
size -= cur_bloom->num_bytes;
if (size < 0) {
free_scaling_bloom(bloom);
fprintf(stderr, "Error, Actual filesize and expected filesize are not equal\n");
return NULL;
}
}
return bloom;
}