from time import time, sleep import pytest from pymilvus.grpc_gen.common_pb2 import SegmentState from base.client_base import TestcaseBase from common import common_func as cf from common import common_type as ct from common.common_type import CaseLabel, CheckTasks from utils.util_log import test_log as log prefix = "compact" tmp_nb = 100 # @pytest.mark.skip(reason="Ci failed") class TestCompactionParams(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) def test_compact_without_connection(self): """ target: test compact without connection method: compact after remove connection expected: raise exception """ # init collection with tmp_nb default data collection_w = self.init_collection_general(prefix, nb=tmp_nb, insert_data=True)[0] # remove connection and delete self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list error = {ct.err_code: 0, ct.err_msg: "should create connect first"} collection_w.compact(check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_compact_twice(self): """ target: test compact twice method: 1.create with shard_num=1 2.insert and flush twice (two segments) 3.compact 4.insert new data 5.compact expected: Merge into one segment """ # init collection with one shard, insert into two segments collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=tmp_nb) # first compact two segments collection_w.compact() collection_w.wait_for_compaction_completed() c_plans1 = collection_w.get_compaction_plans()[0] target_1 = c_plans1.plans[0].target # insert new data df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) log.debug(collection_w.num_entities) # second compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_state() c_plans2 = collection_w.get_compaction_plans()[0] assert target_1 in c_plans2.plans[0].sources log.debug(c_plans2.plans[0].target) @pytest.mark.tags(CaseLabel.L1) def test_compact_partition(self): """ target: test compact partition method: compact partition expected: Verify partition segments merged """ # create collection with shard_num=1, and create partition collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) partition_w = self.init_partition_wrap(collection_wrap=collection_w) # insert flush twice for i in range(2): df = cf.gen_default_dataframe_data(tmp_nb) partition_w.insert(df) assert partition_w.num_entities == tmp_nb * (i + 1) # compact collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 1 assert len(c_plans.plans[0].sources) == 2 target = c_plans.plans[0].target # verify queryNode load the compacted segments collection_w.load() segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert target == segment_info[0].segmentID @pytest.mark.tags(CaseLabel.L2) def test_compact_only_growing_segment(self): """ target: test compact growing data method: 1.insert into multi segments without flush 2.compact expected: No compaction (compact just for sealed data) """ # create and insert without flush collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) # compact when only growing segment collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 0 collection_w.load() segments_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] for segment_info in segments_info: assert segment_info.state == SegmentState.Growing @pytest.mark.tags(CaseLabel.L2) def test_compact_empty_collection(self): """ target: test compact an empty collection method: compact an empty collection expected: No exception """ # init collection and empty collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) # compact collection_w.compact() c_plans, _ = collection_w.get_compaction_plans() assert len(c_plans.plans) == 0 @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("delete_pos", [1, tmp_nb // 2]) def test_compact_after_delete(self, delete_pos): """ target: test delete one entity and compact method: 1.create with shard_num=1 2.delete one sealed entity, half entities 2.compact expected: Verify compact result """ # create, insert without flush collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) # delete single entity, flush single_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:delete_pos]}' collection_w.delete(single_expr) assert collection_w.num_entities == tmp_nb # compact, get plan collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] # Delete type compaction just merge insert log and delta log of one segment # todo assert len(c_plans.plans[0].sources) == 1 collection_w.load() collection_w.query(single_expr, check_items=CheckTasks.check_query_empty) res = df.iloc[-1:, :1].to_dict('records') collection_w.query(f'{ct.default_int64_field_name} in {insert_res.primary_keys[-1:]}', check_items={'exp_res': res}) @pytest.mark.tags(CaseLabel.L1) def test_compact_delete_ratio(self): """ target: test delete entities reaches ratio and auto-compact method: 1.create with shard_num=1 2.insert (compact load delta log, not from dmlChannel) 3.delete 20% of nb, flush expected: Verify auto compaction, merge insert log and delta log """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) # delete 20% entities ratio_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // ct.compact_delta_ratio_reciprocal]}' collection_w.delete(ratio_expr) assert collection_w.num_entities == tmp_nb # auto_compact sleep(1) # Delete type compaction just merge insert log and delta log of one segment # todo assert len(c_plans.plans[0].sources) == 1 collection_w.load() collection_w.query(ratio_expr, check_items=CheckTasks.check_query_empty) res = df.iloc[-1:, :1].to_dict('records') collection_w.query(f'{ct.default_int64_field_name} in {insert_res.primary_keys[-1:]}', check_items={'exp_res': res}) @pytest.mark.tags(CaseLabel.L2) def test_compact_delete_less_ratio(self): """ target: test delete entities less ratio and no compact method: 1.create collection shard_num=1 2.insert without flush 3.delete 10% entities and flush expected: Verify no compact (can't), delete successfully """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) # delete 10% entities, ratio = 0.1 less_ratio_reciprocal = 10 ratio_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:tmp_nb // less_ratio_reciprocal]}' collection_w.delete(ratio_expr) assert collection_w.num_entities == tmp_nb collection_w.load() collection_w.query(ratio_expr, check_task=CheckTasks.check_query_empty) @pytest.mark.tags(CaseLabel.L0) def test_compact_after_delete_all(self): """ target: test delete all and compact method: 1.create with shard_num=1 2.delete all sealed data 3.compact expected: collection num_entities is close to 0 """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data() res, _ = collection_w.insert(df) expr = f'{ct.default_int64_field_name} in {res.primary_keys}' collection_w.delete(expr) assert collection_w.num_entities == ct.default_nb # currently no way to verify whether it is compact after delete, # because the merge compact plan is generate first collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() log.debug(collection_w.num_entities) collection_w.load() collection_w.query(expr, check_items=CheckTasks.check_query_empty) @pytest.mark.skip(reason="TODO") @pytest.mark.tags(CaseLabel.L2) def test_compact_delete_max_delete_size(self): """ target: test compact delta log reaches max delete size 10MiB method: todo expected: auto merge single segment """ pass @pytest.mark.xfail(reason="Issue 12344") @pytest.mark.tags(CaseLabel.L2) def test_compact_max_time_interval(self): """ target: test auto compact with max interval 60s method: 1.create with shard_num=1 2.insert flush twice (two segments) 3.wait max_compaction_interval (60s) expected: Verify compaction results """ # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) collection_w.compact() for i in range(2): df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb * (i + 1) sleep(61) # verify queryNode load the compacted segments collection_w.load() segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] class TestCompactionOperation(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) def test_compact_both_delete_merge(self): """ target: test compact both delete and merge method: 1.create collection with shard_num=1 2.insert data into two segments 3.delete and flush (new insert) 4.compact 5.load and search expected: """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) ids = [] for i in range(2): df = cf.gen_default_dataframe_data(tmp_nb, start=i * tmp_nb) insert_res, _ = collection_w.insert(df) assert collection_w.num_entities == (i + 1) * tmp_nb ids.extend(insert_res.primary_keys) expr = f'{ct.default_int64_field_name} in {[0, 2 * tmp_nb - 1]}' collection_w.delete(expr) collection_w.insert(cf.gen_default_dataframe_data(1, start=2 * tmp_nb)) assert collection_w.num_entities == 2 * tmp_nb + 1 collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # search sleep(5) ids.pop(0) ids.pop(-1) collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_items={"nq": ct.default_nq, "ids": ids, "limit": ct.default_limit}) @pytest.mark.tags(CaseLabel.L1) def test_compact_after_index(self): """ target: test compact after create index method: 1.insert data into two segments 2.create index 3.compact 4.search expected: Verify segment info and index info """ collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=ct.default_nb, is_dup=False) # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) log.debug(collection_w.index()) # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res) == ct.default_nq for hits in search_res: assert len(hits) == ct.default_limit @pytest.mark.tags(CaseLabel.L1) def test_compact_after_binary_index(self): """ target: test compact after create index method: 1.insert binary data into two segments 2.create binary index 3.compact 4.search expected: Verify segment info and index info """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1, schema=cf.gen_default_binary_collection_schema()) for i in range(2): df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == (i + 1) * ct.default_nb # create index collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index) log.debug(collection_w.index()) collection_w.load() search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1] search_res_one, _ = collection_w.search(vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_one) == ct.default_nq for hits in search_res_one: assert len(hits) == ct.default_limit # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # verify index re-build and re-load search_params = {"metric_type": "L1", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search(vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit, check_task=CheckTasks.err_res, check_items={ct.err_code: 1, ct.err_msg: "Metric type of field index isn't " "the same with search info"}) # verify search result search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} search_res_two, _ = collection_w.search(vectors, ct.default_binary_vec_field_name, search_params, ct.default_limit) assert len(search_res_two) == ct.default_nq for hits in search_res_two: assert len(hits) == ct.default_limit @pytest.mark.tags(CaseLabel.L1) def test_compact_and_index(self): """ target: test compact and create index method: 1.insert data into two segments 2.compact 3.create index 4.load and search expected: Verify search result and index info """ collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=ct.default_nb) # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # create index collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) log.debug(collection_w.index()) # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res) == ct.default_nq for hits in search_res: assert len(hits) == ct.default_limit @pytest.mark.tags(CaseLabel.L1) def test_compact_delete_and_search(self): """ target: test delete and compact segment, and search method: 1.create collection and insert 2.delete part entities 3.compact 4.load and search expected: Verify search result """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data() insert_res, _ = collection_w.insert(df) expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:ct.default_nb // 2]}' collection_w.delete(expr) assert collection_w.num_entities == ct.default_nb collection_w.compact() # search sleep(2) collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": ct.default_nq, "ids": insert_res.primary_keys[ct.default_nb // 2:], "limit": ct.default_limit} ) @pytest.mark.tags(CaseLabel.L0) def test_compact_merge_and_search(self): """ target: test compact and search method: 1.insert data into two segments 2.compact 3.load and search expected: Verify search result """ collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=ct.default_nb) # compact collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # search collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res) == ct.default_nq for hits in search_res: assert len(hits) == ct.default_limit # @pytest.mark.skip(reason="Todo") @pytest.mark.tags(CaseLabel.L2) def test_compact_search_after_delete_channel(self): """ target: test search after compact, and queryNode get delete request from channel, rather than compacted delta log method: 1.insert, flush and load 2.delete half 3.compact 4.search expected: No compact, compact get delta log from storage """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data() insert_res, _ = collection_w.insert(df) assert collection_w.num_entities == ct.default_nb collection_w.load() expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:ct.default_nb // 2]}' collection_w.delete(expr) collection_w.compact() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans) == 0 # search sleep(2) collection_w.load() search_res, _ = collection_w.search(cf.gen_vectors(ct.default_nq, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": ct.default_nq, "ids": insert_res.primary_keys[ct.default_nb // 2:], "limit": ct.default_limit} ) @pytest.mark.tags(CaseLabel.L1) def test_compact_delete_inside_time_travel(self): """ target: test compact inside time_travel range method: 1.insert data and get ts 2.delete all ids 4.compact 5.search with ts expected: Verify search result """ from pymilvus import utility collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) # insert and get tt df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) tt = utility.mkts_from_hybridts(insert_res.timestamp, milliseconds=0.) # delete all expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys}' delete_res, _ = collection_w.delete(expr) log.debug(collection_w.num_entities) collection_w.compact() collection_w.load() search_one, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) assert 0 in search_one[0].ids @pytest.mark.xfail(reason="Issue 12450") @pytest.mark.tags(CaseLabel.L3) def test_compact_delete_outside_time_travel(self): """ target: test compact outside time_travel range method: 1.create and insert 2.get time stamp 3.delete 4.compact after compact_retention_duration 5.load and search with travel time tt expected: Empty search result """ from pymilvus import utility collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), shards_num=1) # insert df = cf.gen_default_dataframe_data(tmp_nb) insert_res, _ = collection_w.insert(df) tt = utility.mkts_from_hybridts(insert_res.timestamp, milliseconds=0.) expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys}' delete_res, _ = collection_w.delete(expr) log.debug(collection_w.num_entities) # ensure compact remove delta data that delete outside retention range # sleep(ct.compact_retention_duration) sleep(60) collection_w.compact() collection_w.load() # search with travel_time tt search_res, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) log.debug(search_res[0].ids) assert len(search_res[0]) == 0 @pytest.mark.tags(CaseLabel.L0) def test_compact_merge_two_segments(self): """ target: test compact merge two segments method: 1.create with shard_num=1 2.insert and flush 3.insert and flush again 4.compact 5.load expected: Verify segments are merged """ num_of_segment = 2 # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.collection_insert_multi_segments_one_shard(prefix, num_of_segment, tmp_nb) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] # verify the two segments are merged into one assert len(c_plans.plans) == 1 assert len(c_plans.plans[0].sources) == 2 target = c_plans.plans[0].target # verify queryNode load the compacted segments collection_w.load() segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert target == segment_info[0].segmentID @pytest.mark.tags(CaseLabel.L2) def test_compact_no_merge(self): """ target: test compact when no segments merge method: 1.create with shard_num=1 2.insert and flush 3.compact and search expected: No exception and no compact plans """ # create collection collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb collection_w.compact() collection_w.wait_for_compaction_completed() c_plans, _ = collection_w.get_compaction_plans() assert len(c_plans.plans) == 0 @pytest.mark.tags(CaseLabel.L1) def test_compact_merge_multi_segments(self): """ target: test compact and merge multi small segments method: 1.create with shard_num=1 2.insert one and flush (multi times) 3.compact 4.load and search expected: Verify segments info """ # greater than auto-merge threshold 10 num_of_segment = ct.compact_segment_num_threshold + 1 # create collection shard_num=1, insert 11 segments, each with one entity collection_w = self.collection_insert_multi_segments_one_shard(prefix, num_of_segment=num_of_segment) collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] assert len(c_plans.plans[0].sources) == 2 target = c_plans.plans[0].target collection_w.load() segments_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert len(segments_info) == 1 assert segments_info[0].segmentID == target @pytest.mark.tags(CaseLabel.L2) def test_compact_merge_inside_time_travel(self): """ target: test compact and merge segments inside time_travel range method: search with time travel after merge compact expected: Verify segments inside time_travel merged """ from pymilvus import utility # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) # insert twice df1 = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df1)[0] assert collection_w.num_entities == tmp_nb df2 = cf.gen_default_dataframe_data(tmp_nb, start=tmp_nb) insert_two = collection_w.insert(df2)[0] assert collection_w.num_entities == tmp_nb * 2 tt = utility.mkts_from_hybridts(insert_two.timestamp, milliseconds=0.1) collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans()[0] collection_w.load() search_res, _ = collection_w.search(df2[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, travel_timestamp=tt) assert tmp_nb in search_res[0].ids assert len(search_res[0]) == ct.default_limit @pytest.mark.tags(CaseLabel.L2) def test_compact_threshold_auto_merge(self): """ target: test num (segment_size < 1/2Max) reaches auto-merge threshold 10 method: 1.create with shard_num=1 2.insert flush 10 times (merge threshold 10) 3.wait for compaction, load expected: Get query segments into to verify segments auto-merged into one """ threshold = ct.compact_segment_num_threshold # create collection shard_num=1, insert 10 segments, each with one entity collection_w = self.collection_insert_multi_segments_one_shard(prefix, num_of_segment=threshold) # Estimated auto-merging takes 30s cost = 60 collection_w.load() start = time() while True: sleep(5) segments_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] # verify segments reaches threshold, auto-merge ten segments into one if len(segments_info) == 1: break end = time() if end - start > cost: raise BaseException(1, "Ccompact auto-merge more than 60s") @pytest.mark.tags(CaseLabel.L2) def test_compact_less_threshold_no_merge(self): """ target: test compact the num of segments that size less than 1/2Max, does not reach the threshold method: 1.create collection with shard_num = 1 2.insert flush 9 times (segments threshold 10) 3.after a while, load expected: Verify segments no merge """ less_threshold = ct.compact_segment_num_threshold - 1 # create collection shard_num=1, insert 9 segments, each with one entity collection_w = self.collection_insert_multi_segments_one_shard(prefix, num_of_segment=less_threshold) sleep(3) # load and verify no auto-merge collection_w.load() segments_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert len(segments_info) == less_threshold @pytest.mark.skip(reason="Todo") @pytest.mark.tags(CaseLabel.L2) def test_compact_multi_collections(self): """ target: test compact multi collections with merge method: create 50 collections, add entities into them and compact in turn expected: No exception """ pass @pytest.mark.tags(CaseLabel.L1) def test_compact_and_insert(self): """ target: test insert after compact method: 1.create and insert with flush 2.delete and compact 3.insert new data 4.load and search expected: Verify search result and segment info """ # create collection shard_num=1, insert 2 segments, each with tmp_nb entities collection_w = self.collection_insert_multi_segments_one_shard(prefix, nb_of_segment=tmp_nb) # compact two segments collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # insert new data, verify insert flush successfully df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb * 3 @pytest.mark.tags(CaseLabel.L1) def test_compact_and_delete(self): """ target: test delete after compact method: 1.delete half and compact 2.load and query 3.delete and query expected: Verify deleted ids """ # init collection with one shard, insert into two segments collection_w = self.collection_insert_multi_segments_one_shard(prefix, is_dup=False) # compact and complete collection_w.compact() collection_w.wait_for_compaction_completed() collection_w.get_compaction_plans() # delete and query expr = f'{ct.default_int64_field_name} in {[0]}' collection_w.delete(expr) collection_w.load() collection_w.query(expr, check_task=CheckTasks.check_query_empty) expr_1 = f'{ct.default_int64_field_name} in {[1]}' collection_w.query(expr_1, check_task=CheckTasks.check_query_results, check_items={'exp_res': [{'int64': 1}]}) @pytest.mark.tags(CaseLabel.L1) def test_compact_cross_shards(self): """ target: test compact cross shards method: 1.create with shard_num=2 2.insert once and flush (two segments, belonging to two shards) 3.compact and completed expected: Verify no compact """ # insert into two segments with two shard collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=2) df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb # compact collection_w.compact() collection_w.wait_for_compaction_completed(timeout=1) c_plans = collection_w.get_compaction_plans()[0] # Actually no merged assert len(c_plans.plans) == 0 @pytest.mark.tags(CaseLabel.L1) def test_compact_cross_partition(self): """ target: test compact cross partitions method: 1.create with shard_num=1 2.create partition and insert, flush 3.insert _default partition and flush 4.compact expected: Verify no compact """ # create collection and partition collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1) partition_w = self.init_partition_wrap(collection_wrap=collection_w) # insert df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(df) assert collection_w.num_entities == tmp_nb partition_w.insert(df) assert collection_w.num_entities == tmp_nb * 2 # compact collection_w.compact() collection_w.wait_for_compaction_completed() c_plans = collection_w.get_compaction_plans()[0] # Actually no merged assert len(c_plans.plans) == 0 collection_w.load() segments_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0] assert segments_info[0].partitionID != segments_info[-1].partitionID